summaryrefslogtreecommitdiff
path: root/deps/openssl/openssl/crypto/bn/asm
diff options
context:
space:
mode:
Diffstat (limited to 'deps/openssl/openssl/crypto/bn/asm')
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/README27
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/alpha-mont.pl12
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/armv4-gf2m.pl65
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/armv4-mont.pl690
-rwxr-xr-xdeps/openssl/openssl/crypto/bn/asm/armv8-mont.pl1510
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/bn-586.pl15
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/bn-c64xplus.asm382
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/c64xplus-gf2m.pl160
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/co-586.pl13
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/ia64-mont.pl13
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/ia64.S15
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/mips-mont.pl11
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/mips.pl13
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/mips3-mont.pl327
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/mips3.s2201
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/pa-risc2.s6
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/pa-risc2W.s7
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/parisc-mont.pl11
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/ppc-mont.pl9
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/ppc.pl12
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/ppc64-mont.pl9
-rwxr-xr-xdeps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl11
-rwxr-xr-xdeps/openssl/openssl/crypto/bn/asm/rsaz-x86_64.pl15
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/s390x-gf2m.pl11
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/s390x-mont.pl11
-rw-r--r--[-rwxr-xr-x]deps/openssl/openssl/crypto/bn/asm/s390x.S10
-rwxr-xr-xdeps/openssl/openssl/crypto/bn/asm/sparct4-mont.pl12
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/sparcv8.S10
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/sparcv8plus.S18
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/sparcv9-gf2m.pl12
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/sparcv9-mont.pl26
-rwxr-xr-xdeps/openssl/openssl/crypto/bn/asm/sparcv9a-mont.pl31
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/via-mont.pl14
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/vis3-mont.pl29
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/vms.mar6440
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/x86-gf2m.pl14
-rwxr-xr-xdeps/openssl/openssl/crypto/bn/asm/x86-mont.pl20
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/x86.pl28
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/x86/add.pl76
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/x86/comba.pl277
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/x86/div.pl15
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/x86/f3
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/x86/mul.pl77
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/x86/mul_add.pl87
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/x86/sqr.pl60
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/x86/sub.pl76
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/x86_64-gcc.c23
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/x86_64-gf2m.pl11
-rwxr-xr-xdeps/openssl/openssl/crypto/bn/asm/x86_64-mont.pl15
-rwxr-xr-xdeps/openssl/openssl/crypto/bn/asm/x86_64-mont5.pl15
50 files changed, 2845 insertions, 10110 deletions
diff --git a/deps/openssl/openssl/crypto/bn/asm/README b/deps/openssl/openssl/crypto/bn/asm/README
deleted file mode 100644
index b0f3a68a06..0000000000
--- a/deps/openssl/openssl/crypto/bn/asm/README
+++ /dev/null
@@ -1,27 +0,0 @@
-<OBSOLETE>
-
-All assember in this directory are just version of the file
-crypto/bn/bn_asm.c.
-
-Quite a few of these files are just the assember output from gcc since on
-quite a few machines they are 2 times faster than the system compiler.
-
-For the x86, I have hand written assember because of the bad job all
-compilers seem to do on it. This normally gives a 2 time speed up in the RSA
-routines.
-
-For the DEC alpha, I also hand wrote the assember (except the division which
-is just the output from the C compiler pasted on the end of the file).
-On the 2 alpha C compilers I had access to, it was not possible to do
-64b x 64b -> 128b calculations (both long and the long long data types
-were 64 bits). So the hand assember gives access to the 128 bit result and
-a 2 times speedup :-).
-
-There are 3 versions of assember for the HP PA-RISC.
-
-pa-risc.s is the origional one which works fine and generated using gcc :-)
-
-pa-risc2W.s and pa-risc2.s are 64 and 32-bit PA-RISC 2.0 implementations
-by Chris Ruemmler from HP (with some help from the HP C compiler).
-
-</OBSOLETE>
diff --git a/deps/openssl/openssl/crypto/bn/asm/alpha-mont.pl b/deps/openssl/openssl/crypto/bn/asm/alpha-mont.pl
index 03596e2014..1d68d6d072 100644
--- a/deps/openssl/openssl/crypto/bn/asm/alpha-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/alpha-mont.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2006-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
@@ -15,6 +22,9 @@
# I.e. if you compare 1GHz 21264 and 2GHz Opteron, you'll observe ~2x
# difference.
+$output=pop;
+open STDOUT,">$output";
+
# int bn_mul_mont(
$rp="a0"; # BN_ULONG *rp,
$ap="a1"; # const BN_ULONG *ap,
diff --git a/deps/openssl/openssl/crypto/bn/asm/armv4-gf2m.pl b/deps/openssl/openssl/crypto/bn/asm/armv4-gf2m.pl
index 72381a7724..0bb5433075 100644
--- a/deps/openssl/openssl/crypto/bn/asm/armv4-gf2m.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/armv4-gf2m.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -32,14 +39,31 @@
#
# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf
-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-open STDOUT,">$output";
+$flavour = shift;
+if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
+else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
+
+if ($flavour && $flavour ne "void") {
+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+ ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+ ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+ die "can't locate arm-xlate.pl";
+
+ open STDOUT,"| \"$^X\" $xlate $flavour $output";
+} else {
+ open STDOUT,">$output";
+}
$code=<<___;
#include "arm_arch.h"
.text
+#if defined(__thumb2__)
+.syntax unified
+.thumb
+#else
.code 32
+#endif
___
################
# private interface to mul_1x1_ialu
@@ -120,11 +144,17 @@ mul_1x1_ialu:
eor $hi,$hi,$t0,lsr#8
ldr $t0,[sp,$i0] @ tab[b >> 30 ]
+#ifdef __thumb2__
+ itt ne
+#endif
eorne $lo,$lo,$b,lsl#30
eorne $hi,$hi,$b,lsr#2
tst $a,#1<<31
eor $lo,$lo,$t1,lsl#27
eor $hi,$hi,$t1,lsr#5
+#ifdef __thumb2__
+ itt ne
+#endif
eorne $lo,$lo,$b,lsl#31
eorne $hi,$hi,$b,lsr#1
eor $lo,$lo,$t0,lsl#30
@@ -144,20 +174,33 @@ $code.=<<___;
.align 5
bn_GF2m_mul_2x2:
#if __ARM_MAX_ARCH__>=7
+ stmdb sp!,{r10,lr}
ldr r12,.LOPENSSL_armcap
-.Lpic: ldr r12,[pc,r12]
- tst r12,#1
+ adr r10,.LOPENSSL_armcap
+ ldr r12,[r12,r10]
+#ifdef __APPLE__
+ ldr r12,[r12]
+#endif
+ tst r12,#ARMV7_NEON
+ itt ne
+ ldrne r10,[sp],#8
bne .LNEON
+ stmdb sp!,{r4-r9}
+#else
+ stmdb sp!,{r4-r10,lr}
#endif
___
$ret="r10"; # reassigned 1st argument
$code.=<<___;
- stmdb sp!,{r4-r10,lr}
mov $ret,r0 @ reassign 1st argument
mov $b,r3 @ $b=b1
+ sub r7,sp,#36
+ mov r8,sp
+ and r7,r7,#-32
ldr r3,[sp,#32] @ load b0
mov $mask,#7<<2
- sub sp,sp,#32 @ allocate tab[8]
+ mov sp,r7 @ allocate tab[8]
+ str r8,[r7,#32]
bl mul_1x1_ialu @ a1·b1
str $lo,[$ret,#8]
@@ -181,6 +224,7 @@ ___
$code.=<<___;
ldmia $ret,{@r[0]-@r[3]}
eor $lo,$lo,$hi
+ ldr sp,[sp,#32] @ destroy tab[8]
eor $hi,$hi,@r[1]
eor $lo,$lo,@r[0]
eor $hi,$hi,@r[2]
@@ -188,7 +232,6 @@ $code.=<<___;
eor $hi,$hi,@r[3]
str $hi,[$ret,#8]
eor $lo,$lo,$hi
- add sp,sp,#32 @ destroy tab[8]
str $lo,[$ret,#4]
#if __ARM_ARCH__>=5
@@ -213,8 +256,8 @@ $code.=<<___;
.align 5
.LNEON:
ldr r12, [sp] @ 5th argument
- vmov.32 $a, r2, r1
- vmov.32 $b, r12, r3
+ vmov $a, r2, r1
+ vmov $b, r12, r3
vmov.i64 $k48, #0x0000ffffffffffff
vmov.i64 $k32, #0x00000000ffffffff
vmov.i64 $k16, #0x000000000000ffff
@@ -267,7 +310,7 @@ $code.=<<___;
#if __ARM_MAX_ARCH__>=7
.align 5
.LOPENSSL_armcap:
-.word OPENSSL_armcap_P-(.Lpic+8)
+.word OPENSSL_armcap_P-.
#endif
.asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
.align 5
diff --git a/deps/openssl/openssl/crypto/bn/asm/armv4-mont.pl b/deps/openssl/openssl/crypto/bn/asm/armv4-mont.pl
index 1d330e9f8a..0dc4fe95e4 100644
--- a/deps/openssl/openssl/crypto/bn/asm/armv4-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/armv4-mont.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -38,8 +45,29 @@
# for execution on all NEON-capable processors, because gain on
# others outweighs the marginal loss on Cortex-A9.
-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-open STDOUT,">$output";
+# September 2015
+#
+# Align Cortex-A9 performance with November 2013 improvements, i.e.
+# NEON code is now ~20-105% faster than integer-only one on this
+# processor. But this optimization further improved performance even
+# on other processors: NEON code path is ~45-180% faster than original
+# integer-only on Cortex-A8, ~10-210% on Cortex-A15, ~70-450% on
+# Snapdragon S4.
+
+$flavour = shift;
+if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
+else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
+
+if ($flavour && $flavour ne "void") {
+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+ ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+ ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+ die "can't locate arm-xlate.pl";
+
+ open STDOUT,"| \"$^X\" $xlate $flavour $output";
+} else {
+ open STDOUT,">$output";
+}
$num="r0"; # starts as num argument, but holds &tp[num-1]
$ap="r1";
@@ -70,12 +98,17 @@ $code=<<___;
#include "arm_arch.h"
.text
+#if defined(__thumb2__)
+.syntax unified
+.thumb
+#else
.code 32
+#endif
#if __ARM_MAX_ARCH__>=7
.align 5
.LOPENSSL_armcap:
-.word OPENSSL_armcap_P-bn_mul_mont
+.word OPENSSL_armcap_P-.Lbn_mul_mont
#endif
.global bn_mul_mont
@@ -83,15 +116,19 @@ $code=<<___;
.align 5
bn_mul_mont:
+.Lbn_mul_mont:
ldr ip,[sp,#4] @ load num
stmdb sp!,{r0,r2} @ sp points at argument block
#if __ARM_MAX_ARCH__>=7
tst ip,#7
bne .Lialu
- adr r0,bn_mul_mont
+ adr r0,.Lbn_mul_mont
ldr r2,.LOPENSSL_armcap
ldr r0,[r0,r2]
- tst r0,#1 @ NEON available?
+#ifdef __APPLE__
+ ldr r0,[r0]
+#endif
+ tst r0,#ARMV7_NEON @ NEON available?
ldmia sp, {r0,r2}
beq .Lialu
add sp,sp,#8
@@ -101,6 +138,9 @@ bn_mul_mont:
#endif
cmp ip,#2
mov $num,ip @ load num
+#ifdef __thumb2__
+ ittt lt
+#endif
movlt r0,#0
addlt sp,sp,#2*4
blt .Labrt
@@ -148,10 +188,11 @@ bn_mul_mont:
ldr $n0,[$_n0] @ restore n0
adc $nhi,$nhi,#0
str $nlo,[$num] @ tp[num-1]=
+ mov $tj,sp
str $nhi,[$num,#4] @ tp[num]=
.Louter:
- sub $tj,$num,sp @ "original" $num-1 value
+ sub $tj,$num,$tj @ "original" $num-1 value
sub $ap,$ap,$tj @ "rewind" ap to &ap[1]
ldr $bi,[$tp,#4]! @ *(++bp)
sub $np,$np,$tj @ "rewind" np to &np[1]
@@ -196,11 +237,16 @@ bn_mul_mont:
str $nhi,[$num,#4] @ tp[num]=
cmp $tp,$tj
+#ifdef __thumb2__
+ itt ne
+#endif
+ movne $tj,sp
bne .Louter
ldr $rp,[$_rp] @ pull rp
+ mov $aj,sp
add $num,$num,#4 @ $num to point at &tp[num]
- sub $aj,$num,sp @ "original" num value
+ sub $aj,$num,$aj @ "original" num value
mov $tp,sp @ "rewind" $tp
mov $ap,$tp @ "borrow" $ap
sub $np,$np,$aj @ "rewind" $np to &np[0]
@@ -226,7 +272,8 @@ bn_mul_mont:
cmp $tp,$num
bne .Lcopy
- add sp,$num,#4 @ skip over tp[num+1]
+ mov sp,$num
+ add sp,sp,#4 @ skip over tp[num+1]
ldmia sp!,{r4-r12,lr} @ restore registers
add sp,sp,#2*4 @ skip over {r0,r2}
mov r0,#1
@@ -241,19 +288,16 @@ bn_mul_mont:
.size bn_mul_mont,.-bn_mul_mont
___
{
-sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; }
-sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; }
-
my ($A0,$A1,$A2,$A3)=map("d$_",(0..3));
my ($N0,$N1,$N2,$N3)=map("d$_",(4..7));
my ($Z,$Temp)=("q4","q5");
-my ($A0xB,$A1xB,$A2xB,$A3xB,$A4xB,$A5xB,$A6xB,$A7xB)=map("q$_",(6..13));
+my @ACC=map("q$_",(6..13));
my ($Bi,$Ni,$M0)=map("d$_",(28..31));
-my $zero=&Dlo($Z);
-my $temp=&Dlo($Temp);
+my $zero="$Z#lo";
+my $temp="$Temp#lo";
my ($rptr,$aptr,$bptr,$nptr,$n0,$num)=map("r$_",(0..5));
-my ($tinptr,$toutptr,$inner,$outer)=map("r$_",(6..9));
+my ($tinptr,$toutptr,$inner,$outer,$bnptr)=map("r$_",(6..11));
$code.=<<___;
#if __ARM_MAX_ARCH__>=7
@@ -267,60 +311,60 @@ bn_mul8x_mont_neon:
stmdb sp!,{r4-r11}
vstmdb sp!,{d8-d15} @ ABI specification says so
ldmia ip,{r4-r5} @ load rest of parameter block
+ mov ip,sp
+
+ cmp $num,#8
+ bhi .LNEON_8n
+
+ @ special case for $num==8, everything is in register bank...
- sub $toutptr,sp,#16
vld1.32 {${Bi}[0]}, [$bptr,:32]!
- sub $toutptr,$toutptr,$num,lsl#4
+ veor $zero,$zero,$zero
+ sub $toutptr,sp,$num,lsl#4
vld1.32 {$A0-$A3}, [$aptr]! @ can't specify :32 :-(
and $toutptr,$toutptr,#-64
vld1.32 {${M0}[0]}, [$n0,:32]
mov sp,$toutptr @ alloca
- veor $zero,$zero,$zero
- subs $inner,$num,#8
vzip.16 $Bi,$zero
- vmull.u32 $A0xB,$Bi,${A0}[0]
- vmull.u32 $A1xB,$Bi,${A0}[1]
- vmull.u32 $A2xB,$Bi,${A1}[0]
- vshl.i64 $temp,`&Dhi("$A0xB")`,#16
- vmull.u32 $A3xB,$Bi,${A1}[1]
+ vmull.u32 @ACC[0],$Bi,${A0}[0]
+ vmull.u32 @ACC[1],$Bi,${A0}[1]
+ vmull.u32 @ACC[2],$Bi,${A1}[0]
+ vshl.i64 $Ni,@ACC[0]#hi,#16
+ vmull.u32 @ACC[3],$Bi,${A1}[1]
- vadd.u64 $temp,$temp,`&Dlo("$A0xB")`
+ vadd.u64 $Ni,$Ni,@ACC[0]#lo
veor $zero,$zero,$zero
- vmul.u32 $Ni,$temp,$M0
+ vmul.u32 $Ni,$Ni,$M0
- vmull.u32 $A4xB,$Bi,${A2}[0]
+ vmull.u32 @ACC[4],$Bi,${A2}[0]
vld1.32 {$N0-$N3}, [$nptr]!
- vmull.u32 $A5xB,$Bi,${A2}[1]
- vmull.u32 $A6xB,$Bi,${A3}[0]
+ vmull.u32 @ACC[5],$Bi,${A2}[1]
+ vmull.u32 @ACC[6],$Bi,${A3}[0]
vzip.16 $Ni,$zero
- vmull.u32 $A7xB,$Bi,${A3}[1]
-
- bne .LNEON_1st
-
- @ special case for num=8, everything is in register bank...
+ vmull.u32 @ACC[7],$Bi,${A3}[1]
- vmlal.u32 $A0xB,$Ni,${N0}[0]
+ vmlal.u32 @ACC[0],$Ni,${N0}[0]
sub $outer,$num,#1
- vmlal.u32 $A1xB,$Ni,${N0}[1]
- vmlal.u32 $A2xB,$Ni,${N1}[0]
- vmlal.u32 $A3xB,$Ni,${N1}[1]
-
- vmlal.u32 $A4xB,$Ni,${N2}[0]
- vmov $Temp,$A0xB
- vmlal.u32 $A5xB,$Ni,${N2}[1]
- vmov $A0xB,$A1xB
- vmlal.u32 $A6xB,$Ni,${N3}[0]
- vmov $A1xB,$A2xB
- vmlal.u32 $A7xB,$Ni,${N3}[1]
- vmov $A2xB,$A3xB
- vmov $A3xB,$A4xB
+ vmlal.u32 @ACC[1],$Ni,${N0}[1]
+ vmlal.u32 @ACC[2],$Ni,${N1}[0]
+ vmlal.u32 @ACC[3],$Ni,${N1}[1]
+
+ vmlal.u32 @ACC[4],$Ni,${N2}[0]
+ vmov $Temp,@ACC[0]
+ vmlal.u32 @ACC[5],$Ni,${N2}[1]
+ vmov @ACC[0],@ACC[1]
+ vmlal.u32 @ACC[6],$Ni,${N3}[0]
+ vmov @ACC[1],@ACC[2]
+ vmlal.u32 @ACC[7],$Ni,${N3}[1]
+ vmov @ACC[2],@ACC[3]
+ vmov @ACC[3],@ACC[4]
vshr.u64 $temp,$temp,#16
- vmov $A4xB,$A5xB
- vmov $A5xB,$A6xB
- vadd.u64 $temp,$temp,`&Dhi("$Temp")`
- vmov $A6xB,$A7xB
- veor $A7xB,$A7xB
+ vmov @ACC[4],@ACC[5]
+ vmov @ACC[5],@ACC[6]
+ vadd.u64 $temp,$temp,$Temp#hi
+ vmov @ACC[6],@ACC[7]
+ veor @ACC[7],@ACC[7]
vshr.u64 $temp,$temp,#16
b .LNEON_outer8
@@ -330,279 +374,302 @@ bn_mul8x_mont_neon:
vld1.32 {${Bi}[0]}, [$bptr,:32]!
veor $zero,$zero,$zero
vzip.16 $Bi,$zero
- vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp
+ vadd.u64 @ACC[0]#lo,@ACC[0]#lo,$temp
- vmlal.u32 $A0xB,$Bi,${A0}[0]
- vmlal.u32 $A1xB,$Bi,${A0}[1]
- vmlal.u32 $A2xB,$Bi,${A1}[0]
- vshl.i64 $temp,`&Dhi("$A0xB")`,#16
- vmlal.u32 $A3xB,$Bi,${A1}[1]
+ vmlal.u32 @ACC[0],$Bi,${A0}[0]
+ vmlal.u32 @ACC[1],$Bi,${A0}[1]
+ vmlal.u32 @ACC[2],$Bi,${A1}[0]
+ vshl.i64 $Ni,@ACC[0]#hi,#16
+ vmlal.u32 @ACC[3],$Bi,${A1}[1]
- vadd.u64 $temp,$temp,`&Dlo("$A0xB")`
+ vadd.u64 $Ni,$Ni,@ACC[0]#lo
veor $zero,$zero,$zero
subs $outer,$outer,#1
- vmul.u32 $Ni,$temp,$M0
+ vmul.u32 $Ni,$Ni,$M0
- vmlal.u32 $A4xB,$Bi,${A2}[0]
- vmlal.u32 $A5xB,$Bi,${A2}[1]
- vmlal.u32 $A6xB,$Bi,${A3}[0]
+ vmlal.u32 @ACC[4],$Bi,${A2}[0]
+ vmlal.u32 @ACC[5],$Bi,${A2}[1]
+ vmlal.u32 @ACC[6],$Bi,${A3}[0]
vzip.16 $Ni,$zero
- vmlal.u32 $A7xB,$Bi,${A3}[1]
-
- vmlal.u32 $A0xB,$Ni,${N0}[0]
- vmlal.u32 $A1xB,$Ni,${N0}[1]
- vmlal.u32 $A2xB,$Ni,${N1}[0]
- vmlal.u32 $A3xB,$Ni,${N1}[1]
-
- vmlal.u32 $A4xB,$Ni,${N2}[0]
- vmov $Temp,$A0xB
- vmlal.u32 $A5xB,$Ni,${N2}[1]
- vmov $A0xB,$A1xB
- vmlal.u32 $A6xB,$Ni,${N3}[0]
- vmov $A1xB,$A2xB
- vmlal.u32 $A7xB,$Ni,${N3}[1]
- vmov $A2xB,$A3xB
- vmov $A3xB,$A4xB
+ vmlal.u32 @ACC[7],$Bi,${A3}[1]
+
+ vmlal.u32 @ACC[0],$Ni,${N0}[0]
+ vmlal.u32 @ACC[1],$Ni,${N0}[1]
+ vmlal.u32 @ACC[2],$Ni,${N1}[0]
+ vmlal.u32 @ACC[3],$Ni,${N1}[1]
+
+ vmlal.u32 @ACC[4],$Ni,${N2}[0]
+ vmov $Temp,@ACC[0]
+ vmlal.u32 @ACC[5],$Ni,${N2}[1]
+ vmov @ACC[0],@ACC[1]
+ vmlal.u32 @ACC[6],$Ni,${N3}[0]
+ vmov @ACC[1],@ACC[2]
+ vmlal.u32 @ACC[7],$Ni,${N3}[1]
+ vmov @ACC[2],@ACC[3]
+ vmov @ACC[3],@ACC[4]
vshr.u64 $temp,$temp,#16
- vmov $A4xB,$A5xB
- vmov $A5xB,$A6xB
- vadd.u64 $temp,$temp,`&Dhi("$Temp")`
- vmov $A6xB,$A7xB
- veor $A7xB,$A7xB
+ vmov @ACC[4],@ACC[5]
+ vmov @ACC[5],@ACC[6]
+ vadd.u64 $temp,$temp,$Temp#hi
+ vmov @ACC[6],@ACC[7]
+ veor @ACC[7],@ACC[7]
vshr.u64 $temp,$temp,#16
bne .LNEON_outer8
- vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp
+ vadd.u64 @ACC[0]#lo,@ACC[0]#lo,$temp
mov $toutptr,sp
- vshr.u64 $temp,`&Dlo("$A0xB")`,#16
+ vshr.u64 $temp,@ACC[0]#lo,#16
mov $inner,$num
- vadd.u64 `&Dhi("$A0xB")`,`&Dhi("$A0xB")`,$temp
- add $tinptr,sp,#16
- vshr.u64 $temp,`&Dhi("$A0xB")`,#16
- vzip.16 `&Dlo("$A0xB")`,`&Dhi("$A0xB")`
+ vadd.u64 @ACC[0]#hi,@ACC[0]#hi,$temp
+ add $tinptr,sp,#96
+ vshr.u64 $temp,@ACC[0]#hi,#16
+ vzip.16 @ACC[0]#lo,@ACC[0]#hi
- b .LNEON_tail2
+ b .LNEON_tail_entry
.align 4
-.LNEON_1st:
- vmlal.u32 $A0xB,$Ni,${N0}[0]
- vld1.32 {$A0-$A3}, [$aptr]!
- vmlal.u32 $A1xB,$Ni,${N0}[1]
+.LNEON_8n:
+ veor @ACC[0],@ACC[0],@ACC[0]
+ sub $toutptr,sp,#128
+ veor @ACC[1],@ACC[1],@ACC[1]
+ sub $toutptr,$toutptr,$num,lsl#4
+ veor @ACC[2],@ACC[2],@ACC[2]
+ and $toutptr,$toutptr,#-64
+ veor @ACC[3],@ACC[3],@ACC[3]
+ mov sp,$toutptr @ alloca
+ veor @ACC[4],@ACC[4],@ACC[4]
+ add $toutptr,$toutptr,#256
+ veor @ACC[5],@ACC[5],@ACC[5]
+ sub $inner,$num,#8
+ veor @ACC[6],@ACC[6],@ACC[6]
+ veor @ACC[7],@ACC[7],@ACC[7]
+
+.LNEON_8n_init:
+ vst1.64 {@ACC[0]-@ACC[1]},[$toutptr,:256]!
subs $inner,$inner,#8
- vmlal.u32 $A2xB,$Ni,${N1}[0]
- vmlal.u32 $A3xB,$Ni,${N1}[1]
-
- vmlal.u32 $A4xB,$Ni,${N2}[0]
- vld1.32 {$N0-$N1}, [$nptr]!
- vmlal.u32 $A5xB,$Ni,${N2}[1]
- vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]!
- vmlal.u32 $A6xB,$Ni,${N3}[0]
- vmlal.u32 $A7xB,$Ni,${N3}[1]
- vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]!
-
- vmull.u32 $A0xB,$Bi,${A0}[0]
- vld1.32 {$N2-$N3}, [$nptr]!
- vmull.u32 $A1xB,$Bi,${A0}[1]
- vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]!
- vmull.u32 $A2xB,$Bi,${A1}[0]
- vmull.u32 $A3xB,$Bi,${A1}[1]
- vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]!
-
- vmull.u32 $A4xB,$Bi,${A2}[0]
- vmull.u32 $A5xB,$Bi,${A2}[1]
- vmull.u32 $A6xB,$Bi,${A3}[0]
- vmull.u32 $A7xB,$Bi,${A3}[1]
-
- bne .LNEON_1st
-
- vmlal.u32 $A0xB,$Ni,${N0}[0]
- add $tinptr,sp,#16
- vmlal.u32 $A1xB,$Ni,${N0}[1]
- sub $aptr,$aptr,$num,lsl#2 @ rewind $aptr
- vmlal.u32 $A2xB,$Ni,${N1}[0]
- vld1.64 {$Temp}, [sp,:128]
- vmlal.u32 $A3xB,$Ni,${N1}[1]
- sub $outer,$num,#1
-
- vmlal.u32 $A4xB,$Ni,${N2}[0]
- vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]!
- vmlal.u32 $A5xB,$Ni,${N2}[1]
- vshr.u64 $temp,$temp,#16
- vld1.64 {$A0xB}, [$tinptr, :128]!
- vmlal.u32 $A6xB,$Ni,${N3}[0]
- vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]!
- vmlal.u32 $A7xB,$Ni,${N3}[1]
-
- vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]!
- vadd.u64 $temp,$temp,`&Dhi("$Temp")`
- veor $Z,$Z,$Z
- vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]!
- vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]!
- vst1.64 {$Z}, [$toutptr,:128]
- vshr.u64 $temp,$temp,#16
-
- b .LNEON_outer
+ vst1.64 {@ACC[2]-@ACC[3]},[$toutptr,:256]!
+ vst1.64 {@ACC[4]-@ACC[5]},[$toutptr,:256]!
+ vst1.64 {@ACC[6]-@ACC[7]},[$toutptr,:256]!
+ bne .LNEON_8n_init
+
+ add $tinptr,sp,#256
+ vld1.32 {$A0-$A3},[$aptr]!
+ add $bnptr,sp,#8
+ vld1.32 {${M0}[0]},[$n0,:32]
+ mov $outer,$num
+ b .LNEON_8n_outer
.align 4
-.LNEON_outer:
- vld1.32 {${Bi}[0]}, [$bptr,:32]!
- sub $nptr,$nptr,$num,lsl#2 @ rewind $nptr
- vld1.32 {$A0-$A3}, [$aptr]!
+.LNEON_8n_outer:
+ vld1.32 {${Bi}[0]},[$bptr,:32]! @ *b++
veor $zero,$zero,$zero
- mov $toutptr,sp
vzip.16 $Bi,$zero
+ add $toutptr,sp,#128
+ vld1.32 {$N0-$N3},[$nptr]!
+
+ vmlal.u32 @ACC[0],$Bi,${A0}[0]
+ vmlal.u32 @ACC[1],$Bi,${A0}[1]
+ veor $zero,$zero,$zero
+ vmlal.u32 @ACC[2],$Bi,${A1}[0]
+ vshl.i64 $Ni,@ACC[0]#hi,#16
+ vmlal.u32 @ACC[3],$Bi,${A1}[1]
+ vadd.u64 $Ni,$Ni,@ACC[0]#lo
+ vmlal.u32 @ACC[4],$Bi,${A2}[0]
+ vmul.u32 $Ni,$Ni,$M0
+ vmlal.u32 @ACC[5],$Bi,${A2}[1]
+ vst1.32 {$Bi},[sp,:64] @ put aside smashed b[8*i+0]
+ vmlal.u32 @ACC[6],$Bi,${A3}[0]
+ vzip.16 $Ni,$zero
+ vmlal.u32 @ACC[7],$Bi,${A3}[1]
+___
+for ($i=0; $i<7;) {
+$code.=<<___;
+ vld1.32 {${Bi}[0]},[$bptr,:32]! @ *b++
+ vmlal.u32 @ACC[0],$Ni,${N0}[0]
+ veor $temp,$temp,$temp
+ vmlal.u32 @ACC[1],$Ni,${N0}[1]
+ vzip.16 $Bi,$temp
+ vmlal.u32 @ACC[2],$Ni,${N1}[0]
+ vshr.u64 @ACC[0]#lo,@ACC[0]#lo,#16
+ vmlal.u32 @ACC[3],$Ni,${N1}[1]
+ vmlal.u32 @ACC[4],$Ni,${N2}[0]
+ vadd.u64 @ACC[0]#lo,@ACC[0]#lo,@ACC[0]#hi
+ vmlal.u32 @ACC[5],$Ni,${N2}[1]
+ vshr.u64 @ACC[0]#lo,@ACC[0]#lo,#16
+ vmlal.u32 @ACC[6],$Ni,${N3}[0]
+ vmlal.u32 @ACC[7],$Ni,${N3}[1]
+ vadd.u64 @ACC[1]#lo,@ACC[1]#lo,@ACC[0]#lo
+ vst1.32 {$Ni},[$bnptr,:64]! @ put aside smashed m[8*i+$i]
+___
+ push(@ACC,shift(@ACC)); $i++;
+$code.=<<___;
+ vmlal.u32 @ACC[0],$Bi,${A0}[0]
+ vld1.64 {@ACC[7]},[$tinptr,:128]!
+ vmlal.u32 @ACC[1],$Bi,${A0}[1]
+ veor $zero,$zero,$zero
+ vmlal.u32 @ACC[2],$Bi,${A1}[0]
+ vshl.i64 $Ni,@ACC[0]#hi,#16
+ vmlal.u32 @ACC[3],$Bi,${A1}[1]
+ vadd.u64 $Ni,$Ni,@ACC[0]#lo
+ vmlal.u32 @ACC[4],$Bi,${A2}[0]
+ vmul.u32 $Ni,$Ni,$M0
+ vmlal.u32 @ACC[5],$Bi,${A2}[1]
+ vst1.32 {$Bi},[$bnptr,:64]! @ put aside smashed b[8*i+$i]
+ vmlal.u32 @ACC[6],$Bi,${A3}[0]
+ vzip.16 $Ni,$zero
+ vmlal.u32 @ACC[7],$Bi,${A3}[1]
+___
+}
+$code.=<<___;
+ vld1.32 {$Bi},[sp,:64] @ pull smashed b[8*i+0]
+ vmlal.u32 @ACC[0],$Ni,${N0}[0]
+ vld1.32 {$A0-$A3},[$aptr]!
+ vmlal.u32 @ACC[1],$Ni,${N0}[1]
+ vmlal.u32 @ACC[2],$Ni,${N1}[0]
+ vshr.u64 @ACC[0]#lo,@ACC[0]#lo,#16
+ vmlal.u32 @ACC[3],$Ni,${N1}[1]
+ vmlal.u32 @ACC[4],$Ni,${N2}[0]
+ vadd.u64 @ACC[0]#lo,@ACC[0]#lo,@ACC[0]#hi
+ vmlal.u32 @ACC[5],$Ni,${N2}[1]
+ vshr.u64 @ACC[0]#lo,@ACC[0]#lo,#16
+ vmlal.u32 @ACC[6],$Ni,${N3}[0]
+ vmlal.u32 @ACC[7],$Ni,${N3}[1]
+ vadd.u64 @ACC[1]#lo,@ACC[1]#lo,@ACC[0]#lo
+ vst1.32 {$Ni},[$bnptr,:64] @ put aside smashed m[8*i+$i]
+ add $bnptr,sp,#8 @ rewind
+___
+ push(@ACC,shift(@ACC));
+$code.=<<___;
sub $inner,$num,#8
- vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp
-
- vmlal.u32 $A0xB,$Bi,${A0}[0]
- vld1.64 {$A3xB-$A4xB},[$tinptr,:256]!
- vmlal.u32 $A1xB,$Bi,${A0}[1]
- vmlal.u32 $A2xB,$Bi,${A1}[0]
- vld1.64 {$A5xB-$A6xB},[$tinptr,:256]!
- vmlal.u32 $A3xB,$Bi,${A1}[1]
+ b .LNEON_8n_inner
- vshl.i64 $temp,`&Dhi("$A0xB")`,#16
- veor $zero,$zero,$zero
- vadd.u64 $temp,$temp,`&Dlo("$A0xB")`
- vld1.64 {$A7xB},[$tinptr,:128]!
- vmul.u32 $Ni,$temp,$M0
-
- vmlal.u32 $A4xB,$Bi,${A2}[0]
- vld1.32 {$N0-$N3}, [$nptr]!
- vmlal.u32 $A5xB,$Bi,${A2}[1]
- vmlal.u32 $A6xB,$Bi,${A3}[0]
- vzip.16 $Ni,$zero
- vmlal.u32 $A7xB,$Bi,${A3}[1]
-
-.LNEON_inner:
- vmlal.u32 $A0xB,$Ni,${N0}[0]
- vld1.32 {$A0-$A3}, [$aptr]!
- vmlal.u32 $A1xB,$Ni,${N0}[1]
- subs $inner,$inner,#8
- vmlal.u32 $A2xB,$Ni,${N1}[0]
- vmlal.u32 $A3xB,$Ni,${N1}[1]
- vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]!
-
- vmlal.u32 $A4xB,$Ni,${N2}[0]
- vld1.64 {$A0xB}, [$tinptr, :128]!
- vmlal.u32 $A5xB,$Ni,${N2}[1]
- vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]!
- vmlal.u32 $A6xB,$Ni,${N3}[0]
- vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]!
- vmlal.u32 $A7xB,$Ni,${N3}[1]
- vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]!
-
- vmlal.u32 $A0xB,$Bi,${A0}[0]
- vld1.64 {$A3xB-$A4xB}, [$tinptr, :256]!
- vmlal.u32 $A1xB,$Bi,${A0}[1]
- vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]!
- vmlal.u32 $A2xB,$Bi,${A1}[0]
- vld1.64 {$A5xB-$A6xB}, [$tinptr, :256]!
- vmlal.u32 $A3xB,$Bi,${A1}[1]
- vld1.32 {$N0-$N3}, [$nptr]!
-
- vmlal.u32 $A4xB,$Bi,${A2}[0]
- vld1.64 {$A7xB}, [$tinptr, :128]!
- vmlal.u32 $A5xB,$Bi,${A2}[1]
- vmlal.u32 $A6xB,$Bi,${A3}[0]
- vmlal.u32 $A7xB,$Bi,${A3}[1]
-
- bne .LNEON_inner
-
- vmlal.u32 $A0xB,$Ni,${N0}[0]
- add $tinptr,sp,#16
- vmlal.u32 $A1xB,$Ni,${N0}[1]
- sub $aptr,$aptr,$num,lsl#2 @ rewind $aptr
- vmlal.u32 $A2xB,$Ni,${N1}[0]
- vld1.64 {$Temp}, [sp,:128]
- vmlal.u32 $A3xB,$Ni,${N1}[1]
- subs $outer,$outer,#1
-
- vmlal.u32 $A4xB,$Ni,${N2}[0]
- vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]!
- vmlal.u32 $A5xB,$Ni,${N2}[1]
- vld1.64 {$A0xB}, [$tinptr, :128]!
- vshr.u64 $temp,$temp,#16
- vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]!
- vmlal.u32 $A6xB,$Ni,${N3}[0]
- vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]!
- vmlal.u32 $A7xB,$Ni,${N3}[1]
-
- vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]!
- vadd.u64 $temp,$temp,`&Dhi("$Temp")`
- vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]!
- vshr.u64 $temp,$temp,#16
-
- bne .LNEON_outer
+.align 4
+.LNEON_8n_inner:
+ subs $inner,$inner,#8
+ vmlal.u32 @ACC[0],$Bi,${A0}[0]
+ vld1.64 {@ACC[7]},[$tinptr,:128]
+ vmlal.u32 @ACC[1],$Bi,${A0}[1]
+ vld1.32 {$Ni},[$bnptr,:64]! @ pull smashed m[8*i+0]
+ vmlal.u32 @ACC[2],$Bi,${A1}[0]
+ vld1.32 {$N0-$N3},[$nptr]!
+ vmlal.u32 @ACC[3],$Bi,${A1}[1]
+ it ne
+ addne $tinptr,$tinptr,#16 @ don't advance in last iteration
+ vmlal.u32 @ACC[4],$Bi,${A2}[0]
+ vmlal.u32 @ACC[5],$Bi,${A2}[1]
+ vmlal.u32 @ACC[6],$Bi,${A3}[0]
+ vmlal.u32 @ACC[7],$Bi,${A3}[1]
+___
+for ($i=1; $i<8; $i++) {
+$code.=<<___;
+ vld1.32 {$Bi},[$bnptr,:64]! @ pull smashed b[8*i+$i]
+ vmlal.u32 @ACC[0],$Ni,${N0}[0]
+ vmlal.u32 @ACC[1],$Ni,${N0}[1]
+ vmlal.u32 @ACC[2],$Ni,${N1}[0]
+ vmlal.u32 @ACC[3],$Ni,${N1}[1]
+ vmlal.u32 @ACC[4],$Ni,${N2}[0]
+ vmlal.u32 @ACC[5],$Ni,${N2}[1]
+ vmlal.u32 @ACC[6],$Ni,${N3}[0]
+ vmlal.u32 @ACC[7],$Ni,${N3}[1]
+ vst1.64 {@ACC[0]},[$toutptr,:128]!
+___
+ push(@ACC,shift(@ACC));
+$code.=<<___;
+ vmlal.u32 @ACC[0],$Bi,${A0}[0]
+ vld1.64 {@ACC[7]},[$tinptr,:128]
+ vmlal.u32 @ACC[1],$Bi,${A0}[1]
+ vld1.32 {$Ni},[$bnptr,:64]! @ pull smashed m[8*i+$i]
+ vmlal.u32 @ACC[2],$Bi,${A1}[0]
+ it ne
+ addne $tinptr,$tinptr,#16 @ don't advance in last iteration
+ vmlal.u32 @ACC[3],$Bi,${A1}[1]
+ vmlal.u32 @ACC[4],$Bi,${A2}[0]
+ vmlal.u32 @ACC[5],$Bi,${A2}[1]
+ vmlal.u32 @ACC[6],$Bi,${A3}[0]
+ vmlal.u32 @ACC[7],$Bi,${A3}[1]
+___
+}
+$code.=<<___;
+ it eq
+ subeq $aptr,$aptr,$num,lsl#2 @ rewind
+ vmlal.u32 @ACC[0],$Ni,${N0}[0]
+ vld1.32 {$Bi},[sp,:64] @ pull smashed b[8*i+0]
+ vmlal.u32 @ACC[1],$Ni,${N0}[1]
+ vld1.32 {$A0-$A3},[$aptr]!
+ vmlal.u32 @ACC[2],$Ni,${N1}[0]
+ add $bnptr,sp,#8 @ rewind
+ vmlal.u32 @ACC[3],$Ni,${N1}[1]
+ vmlal.u32 @ACC[4],$Ni,${N2}[0]
+ vmlal.u32 @ACC[5],$Ni,${N2}[1]
+ vmlal.u32 @ACC[6],$Ni,${N3}[0]
+ vst1.64 {@ACC[0]},[$toutptr,:128]!
+ vmlal.u32 @ACC[7],$Ni,${N3}[1]
+
+ bne .LNEON_8n_inner
+___
+ push(@ACC,shift(@ACC));
+$code.=<<___;
+ add $tinptr,sp,#128
+ vst1.64 {@ACC[0]-@ACC[1]},[$toutptr,:256]!
+ veor q2,q2,q2 @ $N0-$N1
+ vst1.64 {@ACC[2]-@ACC[3]},[$toutptr,:256]!
+ veor q3,q3,q3 @ $N2-$N3
+ vst1.64 {@ACC[4]-@ACC[5]},[$toutptr,:256]!
+ vst1.64 {@ACC[6]},[$toutptr,:128]
+
+ subs $outer,$outer,#8
+ vld1.64 {@ACC[0]-@ACC[1]},[$tinptr,:256]!
+ vld1.64 {@ACC[2]-@ACC[3]},[$tinptr,:256]!
+ vld1.64 {@ACC[4]-@ACC[5]},[$tinptr,:256]!
+ vld1.64 {@ACC[6]-@ACC[7]},[$tinptr,:256]!
+
+ itt ne
+ subne $nptr,$nptr,$num,lsl#2 @ rewind
+ bne .LNEON_8n_outer
+
+ add $toutptr,sp,#128
+ vst1.64 {q2-q3}, [sp,:256]! @ start wiping stack frame
+ vshr.u64 $temp,@ACC[0]#lo,#16
+ vst1.64 {q2-q3},[sp,:256]!
+ vadd.u64 @ACC[0]#hi,@ACC[0]#hi,$temp
+ vst1.64 {q2-q3}, [sp,:256]!
+ vshr.u64 $temp,@ACC[0]#hi,#16
+ vst1.64 {q2-q3}, [sp,:256]!
+ vzip.16 @ACC[0]#lo,@ACC[0]#hi
- mov $toutptr,sp
mov $inner,$num
+ b .LNEON_tail_entry
+.align 4
.LNEON_tail:
- vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp
- vld1.64 {$A3xB-$A4xB}, [$tinptr, :256]!
- vshr.u64 $temp,`&Dlo("$A0xB")`,#16
- vadd.u64 `&Dhi("$A0xB")`,`&Dhi("$A0xB")`,$temp
- vld1.64 {$A5xB-$A6xB}, [$tinptr, :256]!
- vshr.u64 $temp,`&Dhi("$A0xB")`,#16
- vld1.64 {$A7xB}, [$tinptr, :128]!
- vzip.16 `&Dlo("$A0xB")`,`&Dhi("$A0xB")`
-
-.LNEON_tail2:
- vadd.u64 `&Dlo("$A1xB")`,`&Dlo("$A1xB")`,$temp
- vst1.32 {`&Dlo("$A0xB")`[0]}, [$toutptr, :32]!
- vshr.u64 $temp,`&Dlo("$A1xB")`,#16
- vadd.u64 `&Dhi("$A1xB")`,`&Dhi("$A1xB")`,$temp
- vshr.u64 $temp,`&Dhi("$A1xB")`,#16
- vzip.16 `&Dlo("$A1xB")`,`&Dhi("$A1xB")`
-
- vadd.u64 `&Dlo("$A2xB")`,`&Dlo("$A2xB")`,$temp
- vst1.32 {`&Dlo("$A1xB")`[0]}, [$toutptr, :32]!
- vshr.u64 $temp,`&Dlo("$A2xB")`,#16
- vadd.u64 `&Dhi("$A2xB")`,`&Dhi("$A2xB")`,$temp
- vshr.u64 $temp,`&Dhi("$A2xB")`,#16
- vzip.16 `&Dlo("$A2xB")`,`&Dhi("$A2xB")`
-
- vadd.u64 `&Dlo("$A3xB")`,`&Dlo("$A3xB")`,$temp
- vst1.32 {`&Dlo("$A2xB")`[0]}, [$toutptr, :32]!
- vshr.u64 $temp,`&Dlo("$A3xB")`,#16
- vadd.u64 `&Dhi("$A3xB")`,`&Dhi("$A3xB")`,$temp
- vshr.u64 $temp,`&Dhi("$A3xB")`,#16
- vzip.16 `&Dlo("$A3xB")`,`&Dhi("$A3xB")`
-
- vadd.u64 `&Dlo("$A4xB")`,`&Dlo("$A4xB")`,$temp
- vst1.32 {`&Dlo("$A3xB")`[0]}, [$toutptr, :32]!
- vshr.u64 $temp,`&Dlo("$A4xB")`,#16
- vadd.u64 `&Dhi("$A4xB")`,`&Dhi("$A4xB")`,$temp
- vshr.u64 $temp,`&Dhi("$A4xB")`,#16
- vzip.16 `&Dlo("$A4xB")`,`&Dhi("$A4xB")`
-
- vadd.u64 `&Dlo("$A5xB")`,`&Dlo("$A5xB")`,$temp
- vst1.32 {`&Dlo("$A4xB")`[0]}, [$toutptr, :32]!
- vshr.u64 $temp,`&Dlo("$A5xB")`,#16
- vadd.u64 `&Dhi("$A5xB")`,`&Dhi("$A5xB")`,$temp
- vshr.u64 $temp,`&Dhi("$A5xB")`,#16
- vzip.16 `&Dlo("$A5xB")`,`&Dhi("$A5xB")`
-
- vadd.u64 `&Dlo("$A6xB")`,`&Dlo("$A6xB")`,$temp
- vst1.32 {`&Dlo("$A5xB")`[0]}, [$toutptr, :32]!
- vshr.u64 $temp,`&Dlo("$A6xB")`,#16
- vadd.u64 `&Dhi("$A6xB")`,`&Dhi("$A6xB")`,$temp
- vld1.64 {$A0xB}, [$tinptr, :128]!
- vshr.u64 $temp,`&Dhi("$A6xB")`,#16
- vzip.16 `&Dlo("$A6xB")`,`&Dhi("$A6xB")`
-
- vadd.u64 `&Dlo("$A7xB")`,`&Dlo("$A7xB")`,$temp
- vst1.32 {`&Dlo("$A6xB")`[0]}, [$toutptr, :32]!
- vshr.u64 $temp,`&Dlo("$A7xB")`,#16
- vadd.u64 `&Dhi("$A7xB")`,`&Dhi("$A7xB")`,$temp
- vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]!
- vshr.u64 $temp,`&Dhi("$A7xB")`,#16
- vzip.16 `&Dlo("$A7xB")`,`&Dhi("$A7xB")`
+ vadd.u64 @ACC[0]#lo,@ACC[0]#lo,$temp
+ vshr.u64 $temp,@ACC[0]#lo,#16
+ vld1.64 {@ACC[2]-@ACC[3]}, [$tinptr, :256]!
+ vadd.u64 @ACC[0]#hi,@ACC[0]#hi,$temp
+ vld1.64 {@ACC[4]-@ACC[5]}, [$tinptr, :256]!
+ vshr.u64 $temp,@ACC[0]#hi,#16
+ vld1.64 {@ACC[6]-@ACC[7]}, [$tinptr, :256]!
+ vzip.16 @ACC[0]#lo,@ACC[0]#hi
+
+.LNEON_tail_entry:
+___
+for ($i=1; $i<8; $i++) {
+$code.=<<___;
+ vadd.u64 @ACC[1]#lo,@ACC[1]#lo,$temp
+ vst1.32 {@ACC[0]#lo[0]}, [$toutptr, :32]!
+ vshr.u64 $temp,@ACC[1]#lo,#16
+ vadd.u64 @ACC[1]#hi,@ACC[1]#hi,$temp
+ vshr.u64 $temp,@ACC[1]#hi,#16
+ vzip.16 @ACC[1]#lo,@ACC[1]#hi
+___
+ push(@ACC,shift(@ACC));
+}
+ push(@ACC,shift(@ACC));
+$code.=<<___;
+ vld1.64 {@ACC[0]-@ACC[1]}, [$tinptr, :256]!
subs $inner,$inner,#8
- vst1.32 {`&Dlo("$A7xB")`[0]}, [$toutptr, :32]!
-
+ vst1.32 {@ACC[7]#lo[0]}, [$toutptr, :32]!
bne .LNEON_tail
vst1.32 {${temp}[0]}, [$toutptr, :32] @ top-most bit
@@ -622,8 +689,9 @@ bn_mul8x_mont_neon:
bne .LNEON_sub
ldr r10, [$aptr] @ load top-most bit
+ mov r11,sp
veor q0,q0,q0
- sub r11,$bptr,sp @ this is num*4
+ sub r11,$bptr,r11 @ this is num*4
veor q1,q1,q1
mov $aptr,sp
sub $rptr,$rptr,r11 @ rewind $rptr
@@ -633,27 +701,33 @@ bn_mul8x_mont_neon:
.LNEON_copy_n_zap:
ldmia $aptr!, {r4-r7}
ldmia $rptr, {r8-r11}
+ it cc
movcc r8, r4
vst1.64 {q0-q1}, [$nptr,:256]! @ wipe
+ itt cc
movcc r9, r5
movcc r10,r6
vst1.64 {q0-q1}, [$nptr,:256]! @ wipe
+ it cc
movcc r11,r7
ldmia $aptr, {r4-r7}
stmia $rptr!, {r8-r11}
sub $aptr,$aptr,#16
ldmia $rptr, {r8-r11}
+ it cc
movcc r8, r4
vst1.64 {q0-q1}, [$aptr,:256]! @ wipe
+ itt cc
movcc r9, r5
movcc r10,r6
vst1.64 {q0-q1}, [$nptr,:256]! @ wipe
+ it cc
movcc r11,r7
teq $aptr,$bptr @ preserves carry
stmia $rptr!, {r8-r11}
bne .LNEON_copy_n_zap
- sub sp,ip,#96
+ mov sp,ip
vldmia sp!,{d8-d15}
ldmia sp!,{r4-r11}
ret @ bx lr
@@ -669,8 +743,14 @@ $code.=<<___;
#endif
___
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
-$code =~ s/\bret\b/bx lr/gm;
-print $code;
+foreach (split("\n",$code)) {
+ s/\`([^\`]*)\`/eval $1/ge;
+
+ s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/ge or
+ s/\bret\b/bx lr/g or
+ s/\bbx\s+lr\b/.word\t0xe12fff1e/g; # make it possible to compile with -march=armv4
+
+ print $_,"\n";
+}
+
close STDOUT;
diff --git a/deps/openssl/openssl/crypto/bn/asm/armv8-mont.pl b/deps/openssl/openssl/crypto/bn/asm/armv8-mont.pl
new file mode 100755
index 0000000000..5d5af1b6be
--- /dev/null
+++ b/deps/openssl/openssl/crypto/bn/asm/armv8-mont.pl
@@ -0,0 +1,1510 @@
+#! /usr/bin/env perl
+# Copyright 2015-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+
+# March 2015
+#
+# "Teaser" Montgomery multiplication module for ARMv8. Needs more
+# work. While it does improve RSA sign performance by 20-30% (less for
+# longer keys) on most processors, for some reason RSA2048 is not
+# faster and RSA4096 goes 15-20% slower on Cortex-A57. Multiplication
+# instruction issue rate is limited on processor in question, meaning
+# that dedicated squaring procedure is a must. Well, actually all
+# contemporary AArch64 processors seem to have limited multiplication
+# issue rate, i.e. they can't issue multiplication every cycle, which
+# explains moderate improvement coefficients in comparison to
+# compiler-generated code. Recall that compiler is instructed to use
+# umulh and therefore uses same amount of multiplication instructions
+# to do the job. Assembly's edge is to minimize number of "collateral"
+# instructions and of course instruction scheduling.
+#
+# April 2015
+#
+# Squaring procedure that handles lengths divisible by 8 improves
+# RSA/DSA performance by 25-40-60% depending on processor and key
+# length. Overall improvement coefficients are always positive in
+# comparison to compiler-generated code. On Cortex-A57 improvement
+# is still modest on longest key lengths, while others exhibit e.g.
+# 50-70% improvement for RSA4096 sign. RSA2048 sign is ~25% faster
+# on Cortex-A57 and ~60-100% faster on others.
+
+$flavour = shift;
+$output = shift;
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+die "can't locate arm-xlate.pl";
+
+open OUT,"| \"$^X\" $xlate $flavour $output";
+*STDOUT=*OUT;
+
+($lo0,$hi0,$aj,$m0,$alo,$ahi,
+ $lo1,$hi1,$nj,$m1,$nlo,$nhi,
+ $ovf, $i,$j,$tp,$tj) = map("x$_",6..17,19..24);
+
+# int bn_mul_mont(
+$rp="x0"; # BN_ULONG *rp,
+$ap="x1"; # const BN_ULONG *ap,
+$bp="x2"; # const BN_ULONG *bp,
+$np="x3"; # const BN_ULONG *np,
+$n0="x4"; # const BN_ULONG *n0,
+$num="x5"; # int num);
+
+$code.=<<___;
+.text
+
+.globl bn_mul_mont
+.type bn_mul_mont,%function
+.align 5
+bn_mul_mont:
+ tst $num,#7
+ b.eq __bn_sqr8x_mont
+ tst $num,#3
+ b.eq __bn_mul4x_mont
+.Lmul_mont:
+ stp x29,x30,[sp,#-64]!
+ add x29,sp,#0
+ stp x19,x20,[sp,#16]
+ stp x21,x22,[sp,#32]
+ stp x23,x24,[sp,#48]
+
+ ldr $m0,[$bp],#8 // bp[0]
+ sub $tp,sp,$num,lsl#3
+ ldp $hi0,$aj,[$ap],#16 // ap[0..1]
+ lsl $num,$num,#3
+ ldr $n0,[$n0] // *n0
+ and $tp,$tp,#-16 // ABI says so
+ ldp $hi1,$nj,[$np],#16 // np[0..1]
+
+ mul $lo0,$hi0,$m0 // ap[0]*bp[0]
+ sub $j,$num,#16 // j=num-2
+ umulh $hi0,$hi0,$m0
+ mul $alo,$aj,$m0 // ap[1]*bp[0]
+ umulh $ahi,$aj,$m0
+
+ mul $m1,$lo0,$n0 // "tp[0]"*n0
+ mov sp,$tp // alloca
+
+ // (*) mul $lo1,$hi1,$m1 // np[0]*m1
+ umulh $hi1,$hi1,$m1
+ mul $nlo,$nj,$m1 // np[1]*m1
+ // (*) adds $lo1,$lo1,$lo0 // discarded
+ // (*) As for removal of first multiplication and addition
+ // instructions. The outcome of first addition is
+ // guaranteed to be zero, which leaves two computationally
+ // significant outcomes: it either carries or not. Then
+ // question is when does it carry? Is there alternative
+ // way to deduce it? If you follow operations, you can
+ // observe that condition for carry is quite simple:
+ // $lo0 being non-zero. So that carry can be calculated
+ // by adding -1 to $lo0. That's what next instruction does.
+ subs xzr,$lo0,#1 // (*)
+ umulh $nhi,$nj,$m1
+ adc $hi1,$hi1,xzr
+ cbz $j,.L1st_skip
+
+.L1st:
+ ldr $aj,[$ap],#8
+ adds $lo0,$alo,$hi0
+ sub $j,$j,#8 // j--
+ adc $hi0,$ahi,xzr
+
+ ldr $nj,[$np],#8
+ adds $lo1,$nlo,$hi1
+ mul $alo,$aj,$m0 // ap[j]*bp[0]
+ adc $hi1,$nhi,xzr
+ umulh $ahi,$aj,$m0
+
+ adds $lo1,$lo1,$lo0
+ mul $nlo,$nj,$m1 // np[j]*m1
+ adc $hi1,$hi1,xzr
+ umulh $nhi,$nj,$m1
+ str $lo1,[$tp],#8 // tp[j-1]
+ cbnz $j,.L1st
+
+.L1st_skip:
+ adds $lo0,$alo,$hi0
+ sub $ap,$ap,$num // rewind $ap
+ adc $hi0,$ahi,xzr
+
+ adds $lo1,$nlo,$hi1
+ sub $np,$np,$num // rewind $np
+ adc $hi1,$nhi,xzr
+
+ adds $lo1,$lo1,$lo0
+ sub $i,$num,#8 // i=num-1
+ adcs $hi1,$hi1,$hi0
+
+ adc $ovf,xzr,xzr // upmost overflow bit
+ stp $lo1,$hi1,[$tp]
+
+.Louter:
+ ldr $m0,[$bp],#8 // bp[i]
+ ldp $hi0,$aj,[$ap],#16
+ ldr $tj,[sp] // tp[0]
+ add $tp,sp,#8
+
+ mul $lo0,$hi0,$m0 // ap[0]*bp[i]
+ sub $j,$num,#16 // j=num-2
+ umulh $hi0,$hi0,$m0
+ ldp $hi1,$nj,[$np],#16
+ mul $alo,$aj,$m0 // ap[1]*bp[i]
+ adds $lo0,$lo0,$tj
+ umulh $ahi,$aj,$m0
+ adc $hi0,$hi0,xzr
+
+ mul $m1,$lo0,$n0
+ sub $i,$i,#8 // i--
+
+ // (*) mul $lo1,$hi1,$m1 // np[0]*m1
+ umulh $hi1,$hi1,$m1
+ mul $nlo,$nj,$m1 // np[1]*m1
+ // (*) adds $lo1,$lo1,$lo0
+ subs xzr,$lo0,#1 // (*)
+ umulh $nhi,$nj,$m1
+ cbz $j,.Linner_skip
+
+.Linner:
+ ldr $aj,[$ap],#8
+ adc $hi1,$hi1,xzr
+ ldr $tj,[$tp],#8 // tp[j]
+ adds $lo0,$alo,$hi0
+ sub $j,$j,#8 // j--
+ adc $hi0,$ahi,xzr
+
+ adds $lo1,$nlo,$hi1
+ ldr $nj,[$np],#8
+ adc $hi1,$nhi,xzr
+
+ mul $alo,$aj,$m0 // ap[j]*bp[i]
+ adds $lo0,$lo0,$tj
+ umulh $ahi,$aj,$m0
+ adc $hi0,$hi0,xzr
+
+ mul $nlo,$nj,$m1 // np[j]*m1
+ adds $lo1,$lo1,$lo0
+ umulh $nhi,$nj,$m1
+ str $lo1,[$tp,#-16] // tp[j-1]
+ cbnz $j,.Linner
+
+.Linner_skip:
+ ldr $tj,[$tp],#8 // tp[j]
+ adc $hi1,$hi1,xzr
+ adds $lo0,$alo,$hi0
+ sub $ap,$ap,$num // rewind $ap
+ adc $hi0,$ahi,xzr
+
+ adds $lo1,$nlo,$hi1
+ sub $np,$np,$num // rewind $np
+ adcs $hi1,$nhi,$ovf
+ adc $ovf,xzr,xzr
+
+ adds $lo0,$lo0,$tj
+ adc $hi0,$hi0,xzr
+
+ adds $lo1,$lo1,$lo0
+ adcs $hi1,$hi1,$hi0
+ adc $ovf,$ovf,xzr // upmost overflow bit
+ stp $lo1,$hi1,[$tp,#-16]
+
+ cbnz $i,.Louter
+
+ // Final step. We see if result is larger than modulus, and
+ // if it is, subtract the modulus. But comparison implies
+ // subtraction. So we subtract modulus, see if it borrowed,
+ // and conditionally copy original value.
+ ldr $tj,[sp] // tp[0]
+ add $tp,sp,#8
+ ldr $nj,[$np],#8 // np[0]
+ subs $j,$num,#8 // j=num-1 and clear borrow
+ mov $ap,$rp
+.Lsub:
+ sbcs $aj,$tj,$nj // tp[j]-np[j]
+ ldr $tj,[$tp],#8
+ sub $j,$j,#8 // j--
+ ldr $nj,[$np],#8
+ str $aj,[$ap],#8 // rp[j]=tp[j]-np[j]
+ cbnz $j,.Lsub
+
+ sbcs $aj,$tj,$nj
+ sbcs $ovf,$ovf,xzr // did it borrow?
+ str $aj,[$ap],#8 // rp[num-1]
+
+ ldr $tj,[sp] // tp[0]
+ add $tp,sp,#8
+ ldr $aj,[$rp],#8 // rp[0]
+ sub $num,$num,#8 // num--
+ nop
+.Lcond_copy:
+ sub $num,$num,#8 // num--
+ csel $nj,$tj,$aj,lo // did it borrow?
+ ldr $tj,[$tp],#8
+ ldr $aj,[$rp],#8
+ str xzr,[$tp,#-16] // wipe tp
+ str $nj,[$rp,#-16]
+ cbnz $num,.Lcond_copy
+
+ csel $nj,$tj,$aj,lo
+ str xzr,[$tp,#-8] // wipe tp
+ str $nj,[$rp,#-8]
+
+ ldp x19,x20,[x29,#16]
+ mov sp,x29
+ ldp x21,x22,[x29,#32]
+ mov x0,#1
+ ldp x23,x24,[x29,#48]
+ ldr x29,[sp],#64
+ ret
+.size bn_mul_mont,.-bn_mul_mont
+___
+{
+########################################################################
+# Following is ARMv8 adaptation of sqrx8x_mont from x86_64-mont5 module.
+
+my ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("x$_",(6..13));
+my ($t0,$t1,$t2,$t3)=map("x$_",(14..17));
+my ($acc0,$acc1,$acc2,$acc3,$acc4,$acc5,$acc6,$acc7)=map("x$_",(19..26));
+my ($cnt,$carry,$topmost)=("x27","x28","x30");
+my ($tp,$ap_end,$na0)=($bp,$np,$carry);
+
+$code.=<<___;
+.type __bn_sqr8x_mont,%function
+.align 5
+__bn_sqr8x_mont:
+ cmp $ap,$bp
+ b.ne __bn_mul4x_mont
+.Lsqr8x_mont:
+ stp x29,x30,[sp,#-128]!
+ add x29,sp,#0
+ stp x19,x20,[sp,#16]
+ stp x21,x22,[sp,#32]
+ stp x23,x24,[sp,#48]
+ stp x25,x26,[sp,#64]
+ stp x27,x28,[sp,#80]
+ stp $rp,$np,[sp,#96] // offload rp and np
+
+ ldp $a0,$a1,[$ap,#8*0]
+ ldp $a2,$a3,[$ap,#8*2]
+ ldp $a4,$a5,[$ap,#8*4]
+ ldp $a6,$a7,[$ap,#8*6]
+
+ sub $tp,sp,$num,lsl#4
+ lsl $num,$num,#3
+ ldr $n0,[$n0] // *n0
+ mov sp,$tp // alloca
+ sub $cnt,$num,#8*8
+ b .Lsqr8x_zero_start
+
+.Lsqr8x_zero:
+ sub $cnt,$cnt,#8*8
+ stp xzr,xzr,[$tp,#8*0]
+ stp xzr,xzr,[$tp,#8*2]
+ stp xzr,xzr,[$tp,#8*4]
+ stp xzr,xzr,[$tp,#8*6]
+.Lsqr8x_zero_start:
+ stp xzr,xzr,[$tp,#8*8]
+ stp xzr,xzr,[$tp,#8*10]
+ stp xzr,xzr,[$tp,#8*12]
+ stp xzr,xzr,[$tp,#8*14]
+ add $tp,$tp,#8*16
+ cbnz $cnt,.Lsqr8x_zero
+
+ add $ap_end,$ap,$num
+ add $ap,$ap,#8*8
+ mov $acc0,xzr
+ mov $acc1,xzr
+ mov $acc2,xzr
+ mov $acc3,xzr
+ mov $acc4,xzr
+ mov $acc5,xzr
+ mov $acc6,xzr
+ mov $acc7,xzr
+ mov $tp,sp
+ str $n0,[x29,#112] // offload n0
+
+ // Multiply everything but a[i]*a[i]
+.align 4
+.Lsqr8x_outer_loop:
+ // a[1]a[0] (i)
+ // a[2]a[0]
+ // a[3]a[0]
+ // a[4]a[0]
+ // a[5]a[0]
+ // a[6]a[0]
+ // a[7]a[0]
+ // a[2]a[1] (ii)
+ // a[3]a[1]
+ // a[4]a[1]
+ // a[5]a[1]
+ // a[6]a[1]
+ // a[7]a[1]
+ // a[3]a[2] (iii)
+ // a[4]a[2]
+ // a[5]a[2]
+ // a[6]a[2]
+ // a[7]a[2]
+ // a[4]a[3] (iv)
+ // a[5]a[3]
+ // a[6]a[3]
+ // a[7]a[3]
+ // a[5]a[4] (v)
+ // a[6]a[4]
+ // a[7]a[4]
+ // a[6]a[5] (vi)
+ // a[7]a[5]
+ // a[7]a[6] (vii)
+
+ mul $t0,$a1,$a0 // lo(a[1..7]*a[0]) (i)
+ mul $t1,$a2,$a0
+ mul $t2,$a3,$a0
+ mul $t3,$a4,$a0
+ adds $acc1,$acc1,$t0 // t[1]+lo(a[1]*a[0])
+ mul $t0,$a5,$a0
+ adcs $acc2,$acc2,$t1
+ mul $t1,$a6,$a0
+ adcs $acc3,$acc3,$t2
+ mul $t2,$a7,$a0
+ adcs $acc4,$acc4,$t3
+ umulh $t3,$a1,$a0 // hi(a[1..7]*a[0])
+ adcs $acc5,$acc5,$t0
+ umulh $t0,$a2,$a0
+ adcs $acc6,$acc6,$t1
+ umulh $t1,$a3,$a0
+ adcs $acc7,$acc7,$t2
+ umulh $t2,$a4,$a0
+ stp $acc0,$acc1,[$tp],#8*2 // t[0..1]
+ adc $acc0,xzr,xzr // t[8]
+ adds $acc2,$acc2,$t3 // t[2]+lo(a[1]*a[0])
+ umulh $t3,$a5,$a0
+ adcs $acc3,$acc3,$t0
+ umulh $t0,$a6,$a0
+ adcs $acc4,$acc4,$t1
+ umulh $t1,$a7,$a0
+ adcs $acc5,$acc5,$t2
+ mul $t2,$a2,$a1 // lo(a[2..7]*a[1]) (ii)
+ adcs $acc6,$acc6,$t3
+ mul $t3,$a3,$a1
+ adcs $acc7,$acc7,$t0
+ mul $t0,$a4,$a1
+ adc $acc0,$acc0,$t1
+
+ mul $t1,$a5,$a1
+ adds $acc3,$acc3,$t2
+ mul $t2,$a6,$a1
+ adcs $acc4,$acc4,$t3
+ mul $t3,$a7,$a1
+ adcs $acc5,$acc5,$t0
+ umulh $t0,$a2,$a1 // hi(a[2..7]*a[1])
+ adcs $acc6,$acc6,$t1
+ umulh $t1,$a3,$a1
+ adcs $acc7,$acc7,$t2
+ umulh $t2,$a4,$a1
+ adcs $acc0,$acc0,$t3
+ umulh $t3,$a5,$a1
+ stp $acc2,$acc3,[$tp],#8*2 // t[2..3]
+ adc $acc1,xzr,xzr // t[9]
+ adds $acc4,$acc4,$t0
+ umulh $t0,$a6,$a1
+ adcs $acc5,$acc5,$t1
+ umulh $t1,$a7,$a1
+ adcs $acc6,$acc6,$t2
+ mul $t2,$a3,$a2 // lo(a[3..7]*a[2]) (iii)
+ adcs $acc7,$acc7,$t3
+ mul $t3,$a4,$a2
+ adcs $acc0,$acc0,$t0
+ mul $t0,$a5,$a2
+ adc $acc1,$acc1,$t1
+
+ mul $t1,$a6,$a2
+ adds $acc5,$acc5,$t2
+ mul $t2,$a7,$a2
+ adcs $acc6,$acc6,$t3
+ umulh $t3,$a3,$a2 // hi(a[3..7]*a[2])
+ adcs $acc7,$acc7,$t0
+ umulh $t0,$a4,$a2
+ adcs $acc0,$acc0,$t1
+ umulh $t1,$a5,$a2
+ adcs $acc1,$acc1,$t2
+ umulh $t2,$a6,$a2
+ stp $acc4,$acc5,[$tp],#8*2 // t[4..5]
+ adc $acc2,xzr,xzr // t[10]
+ adds $acc6,$acc6,$t3
+ umulh $t3,$a7,$a2
+ adcs $acc7,$acc7,$t0
+ mul $t0,$a4,$a3 // lo(a[4..7]*a[3]) (iv)
+ adcs $acc0,$acc0,$t1
+ mul $t1,$a5,$a3
+ adcs $acc1,$acc1,$t2
+ mul $t2,$a6,$a3
+ adc $acc2,$acc2,$t3
+
+ mul $t3,$a7,$a3
+ adds $acc7,$acc7,$t0
+ umulh $t0,$a4,$a3 // hi(a[4..7]*a[3])
+ adcs $acc0,$acc0,$t1
+ umulh $t1,$a5,$a3
+ adcs $acc1,$acc1,$t2
+ umulh $t2,$a6,$a3
+ adcs $acc2,$acc2,$t3
+ umulh $t3,$a7,$a3
+ stp $acc6,$acc7,[$tp],#8*2 // t[6..7]
+ adc $acc3,xzr,xzr // t[11]
+ adds $acc0,$acc0,$t0
+ mul $t0,$a5,$a4 // lo(a[5..7]*a[4]) (v)
+ adcs $acc1,$acc1,$t1
+ mul $t1,$a6,$a4
+ adcs $acc2,$acc2,$t2
+ mul $t2,$a7,$a4
+ adc $acc3,$acc3,$t3
+
+ umulh $t3,$a5,$a4 // hi(a[5..7]*a[4])
+ adds $acc1,$acc1,$t0
+ umulh $t0,$a6,$a4
+ adcs $acc2,$acc2,$t1
+ umulh $t1,$a7,$a4
+ adcs $acc3,$acc3,$t2
+ mul $t2,$a6,$a5 // lo(a[6..7]*a[5]) (vi)
+ adc $acc4,xzr,xzr // t[12]
+ adds $acc2,$acc2,$t3
+ mul $t3,$a7,$a5
+ adcs $acc3,$acc3,$t0
+ umulh $t0,$a6,$a5 // hi(a[6..7]*a[5])
+ adc $acc4,$acc4,$t1
+
+ umulh $t1,$a7,$a5
+ adds $acc3,$acc3,$t2
+ mul $t2,$a7,$a6 // lo(a[7]*a[6]) (vii)
+ adcs $acc4,$acc4,$t3
+ umulh $t3,$a7,$a6 // hi(a[7]*a[6])
+ adc $acc5,xzr,xzr // t[13]
+ adds $acc4,$acc4,$t0
+ sub $cnt,$ap_end,$ap // done yet?
+ adc $acc5,$acc5,$t1
+
+ adds $acc5,$acc5,$t2
+ sub $t0,$ap_end,$num // rewinded ap
+ adc $acc6,xzr,xzr // t[14]
+ add $acc6,$acc6,$t3
+
+ cbz $cnt,.Lsqr8x_outer_break
+
+ mov $n0,$a0
+ ldp $a0,$a1,[$tp,#8*0]
+ ldp $a2,$a3,[$tp,#8*2]
+ ldp $a4,$a5,[$tp,#8*4]
+ ldp $a6,$a7,[$tp,#8*6]
+ adds $acc0,$acc0,$a0
+ adcs $acc1,$acc1,$a1
+ ldp $a0,$a1,[$ap,#8*0]
+ adcs $acc2,$acc2,$a2
+ adcs $acc3,$acc3,$a3
+ ldp $a2,$a3,[$ap,#8*2]
+ adcs $acc4,$acc4,$a4
+ adcs $acc5,$acc5,$a5
+ ldp $a4,$a5,[$ap,#8*4]
+ adcs $acc6,$acc6,$a6
+ mov $rp,$ap
+ adcs $acc7,xzr,$a7
+ ldp $a6,$a7,[$ap,#8*6]
+ add $ap,$ap,#8*8
+ //adc $carry,xzr,xzr // moved below
+ mov $cnt,#-8*8
+
+ // a[8]a[0]
+ // a[9]a[0]
+ // a[a]a[0]
+ // a[b]a[0]
+ // a[c]a[0]
+ // a[d]a[0]
+ // a[e]a[0]
+ // a[f]a[0]
+ // a[8]a[1]
+ // a[f]a[1]........................
+ // a[8]a[2]
+ // a[f]a[2]........................
+ // a[8]a[3]
+ // a[f]a[3]........................
+ // a[8]a[4]
+ // a[f]a[4]........................
+ // a[8]a[5]
+ // a[f]a[5]........................
+ // a[8]a[6]
+ // a[f]a[6]........................
+ // a[8]a[7]
+ // a[f]a[7]........................
+.Lsqr8x_mul:
+ mul $t0,$a0,$n0
+ adc $carry,xzr,xzr // carry bit, modulo-scheduled
+ mul $t1,$a1,$n0
+ add $cnt,$cnt,#8
+ mul $t2,$a2,$n0
+ mul $t3,$a3,$n0
+ adds $acc0,$acc0,$t0
+ mul $t0,$a4,$n0
+ adcs $acc1,$acc1,$t1
+ mul $t1,$a5,$n0
+ adcs $acc2,$acc2,$t2
+ mul $t2,$a6,$n0
+ adcs $acc3,$acc3,$t3
+ mul $t3,$a7,$n0
+ adcs $acc4,$acc4,$t0
+ umulh $t0,$a0,$n0
+ adcs $acc5,$acc5,$t1
+ umulh $t1,$a1,$n0
+ adcs $acc6,$acc6,$t2
+ umulh $t2,$a2,$n0
+ adcs $acc7,$acc7,$t3
+ umulh $t3,$a3,$n0
+ adc $carry,$carry,xzr
+ str $acc0,[$tp],#8
+ adds $acc0,$acc1,$t0
+ umulh $t0,$a4,$n0
+ adcs $acc1,$acc2,$t1
+ umulh $t1,$a5,$n0
+ adcs $acc2,$acc3,$t2
+ umulh $t2,$a6,$n0
+ adcs $acc3,$acc4,$t3
+ umulh $t3,$a7,$n0
+ ldr $n0,[$rp,$cnt]
+ adcs $acc4,$acc5,$t0
+ adcs $acc5,$acc6,$t1
+ adcs $acc6,$acc7,$t2
+ adcs $acc7,$carry,$t3
+ //adc $carry,xzr,xzr // moved above
+ cbnz $cnt,.Lsqr8x_mul
+ // note that carry flag is guaranteed
+ // to be zero at this point
+ cmp $ap,$ap_end // done yet?
+ b.eq .Lsqr8x_break
+
+ ldp $a0,$a1,[$tp,#8*0]
+ ldp $a2,$a3,[$tp,#8*2]
+ ldp $a4,$a5,[$tp,#8*4]
+ ldp $a6,$a7,[$tp,#8*6]
+ adds $acc0,$acc0,$a0
+ ldr $n0,[$rp,#-8*8]
+ adcs $acc1,$acc1,$a1
+ ldp $a0,$a1,[$ap,#8*0]
+ adcs $acc2,$acc2,$a2
+ adcs $acc3,$acc3,$a3
+ ldp $a2,$a3,[$ap,#8*2]
+ adcs $acc4,$acc4,$a4
+ adcs $acc5,$acc5,$a5
+ ldp $a4,$a5,[$ap,#8*4]
+ adcs $acc6,$acc6,$a6
+ mov $cnt,#-8*8
+ adcs $acc7,$acc7,$a7
+ ldp $a6,$a7,[$ap,#8*6]
+ add $ap,$ap,#8*8
+ //adc $carry,xzr,xzr // moved above
+ b .Lsqr8x_mul
+
+.align 4
+.Lsqr8x_break:
+ ldp $a0,$a1,[$rp,#8*0]
+ add $ap,$rp,#8*8
+ ldp $a2,$a3,[$rp,#8*2]
+ sub $t0,$ap_end,$ap // is it last iteration?
+ ldp $a4,$a5,[$rp,#8*4]
+ sub $t1,$tp,$t0
+ ldp $a6,$a7,[$rp,#8*6]
+ cbz $t0,.Lsqr8x_outer_loop
+
+ stp $acc0,$acc1,[$tp,#8*0]
+ ldp $acc0,$acc1,[$t1,#8*0]
+ stp $acc2,$acc3,[$tp,#8*2]
+ ldp $acc2,$acc3,[$t1,#8*2]
+ stp $acc4,$acc5,[$tp,#8*4]
+ ldp $acc4,$acc5,[$t1,#8*4]
+ stp $acc6,$acc7,[$tp,#8*6]
+ mov $tp,$t1
+ ldp $acc6,$acc7,[$t1,#8*6]
+ b .Lsqr8x_outer_loop
+
+.align 4
+.Lsqr8x_outer_break:
+ // Now multiply above result by 2 and add a[n-1]*a[n-1]|...|a[0]*a[0]
+ ldp $a1,$a3,[$t0,#8*0] // recall that $t0 is &a[0]
+ ldp $t1,$t2,[sp,#8*1]
+ ldp $a5,$a7,[$t0,#8*2]
+ add $ap,$t0,#8*4
+ ldp $t3,$t0,[sp,#8*3]
+
+ stp $acc0,$acc1,[$tp,#8*0]
+ mul $acc0,$a1,$a1
+ stp $acc2,$acc3,[$tp,#8*2]
+ umulh $a1,$a1,$a1
+ stp $acc4,$acc5,[$tp,#8*4]
+ mul $a2,$a3,$a3
+ stp $acc6,$acc7,[$tp,#8*6]
+ mov $tp,sp
+ umulh $a3,$a3,$a3
+ adds $acc1,$a1,$t1,lsl#1
+ extr $t1,$t2,$t1,#63
+ sub $cnt,$num,#8*4
+
+.Lsqr4x_shift_n_add:
+ adcs $acc2,$a2,$t1
+ extr $t2,$t3,$t2,#63
+ sub $cnt,$cnt,#8*4
+ adcs $acc3,$a3,$t2
+ ldp $t1,$t2,[$tp,#8*5]
+ mul $a4,$a5,$a5
+ ldp $a1,$a3,[$ap],#8*2
+ umulh $a5,$a5,$a5
+ mul $a6,$a7,$a7
+ umulh $a7,$a7,$a7
+ extr $t3,$t0,$t3,#63
+ stp $acc0,$acc1,[$tp,#8*0]
+ adcs $acc4,$a4,$t3
+ extr $t0,$t1,$t0,#63
+ stp $acc2,$acc3,[$tp,#8*2]
+ adcs $acc5,$a5,$t0
+ ldp $t3,$t0,[$tp,#8*7]
+ extr $t1,$t2,$t1,#63
+ adcs $acc6,$a6,$t1
+ extr $t2,$t3,$t2,#63
+ adcs $acc7,$a7,$t2
+ ldp $t1,$t2,[$tp,#8*9]
+ mul $a0,$a1,$a1
+ ldp $a5,$a7,[$ap],#8*2
+ umulh $a1,$a1,$a1
+ mul $a2,$a3,$a3
+ umulh $a3,$a3,$a3
+ stp $acc4,$acc5,[$tp,#8*4]
+ extr $t3,$t0,$t3,#63
+ stp $acc6,$acc7,[$tp,#8*6]
+ add $tp,$tp,#8*8
+ adcs $acc0,$a0,$t3
+ extr $t0,$t1,$t0,#63
+ adcs $acc1,$a1,$t0
+ ldp $t3,$t0,[$tp,#8*3]
+ extr $t1,$t2,$t1,#63
+ cbnz $cnt,.Lsqr4x_shift_n_add
+___
+my ($np,$np_end)=($ap,$ap_end);
+$code.=<<___;
+ ldp $np,$n0,[x29,#104] // pull np and n0
+
+ adcs $acc2,$a2,$t1
+ extr $t2,$t3,$t2,#63
+ adcs $acc3,$a3,$t2
+ ldp $t1,$t2,[$tp,#8*5]
+ mul $a4,$a5,$a5
+ umulh $a5,$a5,$a5
+ stp $acc0,$acc1,[$tp,#8*0]
+ mul $a6,$a7,$a7
+ umulh $a7,$a7,$a7
+ stp $acc2,$acc3,[$tp,#8*2]
+ extr $t3,$t0,$t3,#63
+ adcs $acc4,$a4,$t3
+ extr $t0,$t1,$t0,#63
+ ldp $acc0,$acc1,[sp,#8*0]
+ adcs $acc5,$a5,$t0
+ extr $t1,$t2,$t1,#63
+ ldp $a0,$a1,[$np,#8*0]
+ adcs $acc6,$a6,$t1
+ extr $t2,xzr,$t2,#63
+ ldp $a2,$a3,[$np,#8*2]
+ adc $acc7,$a7,$t2
+ ldp $a4,$a5,[$np,#8*4]
+
+ // Reduce by 512 bits per iteration
+ mul $na0,$n0,$acc0 // t[0]*n0
+ ldp $a6,$a7,[$np,#8*6]
+ add $np_end,$np,$num
+ ldp $acc2,$acc3,[sp,#8*2]
+ stp $acc4,$acc5,[$tp,#8*4]
+ ldp $acc4,$acc5,[sp,#8*4]
+ stp $acc6,$acc7,[$tp,#8*6]
+ ldp $acc6,$acc7,[sp,#8*6]
+ add $np,$np,#8*8
+ mov $topmost,xzr // initial top-most carry
+ mov $tp,sp
+ mov $cnt,#8
+
+.Lsqr8x_reduction:
+ // (*) mul $t0,$a0,$na0 // lo(n[0-7])*lo(t[0]*n0)
+ mul $t1,$a1,$na0
+ sub $cnt,$cnt,#1
+ mul $t2,$a2,$na0
+ str $na0,[$tp],#8 // put aside t[0]*n0 for tail processing
+ mul $t3,$a3,$na0
+ // (*) adds xzr,$acc0,$t0
+ subs xzr,$acc0,#1 // (*)
+ mul $t0,$a4,$na0
+ adcs $acc0,$acc1,$t1
+ mul $t1,$a5,$na0
+ adcs $acc1,$acc2,$t2
+ mul $t2,$a6,$na0
+ adcs $acc2,$acc3,$t3
+ mul $t3,$a7,$na0
+ adcs $acc3,$acc4,$t0
+ umulh $t0,$a0,$na0 // hi(n[0-7])*lo(t[0]*n0)
+ adcs $acc4,$acc5,$t1
+ umulh $t1,$a1,$na0
+ adcs $acc5,$acc6,$t2
+ umulh $t2,$a2,$na0
+ adcs $acc6,$acc7,$t3
+ umulh $t3,$a3,$na0
+ adc $acc7,xzr,xzr
+ adds $acc0,$acc0,$t0
+ umulh $t0,$a4,$na0
+ adcs $acc1,$acc1,$t1
+ umulh $t1,$a5,$na0
+ adcs $acc2,$acc2,$t2
+ umulh $t2,$a6,$na0
+ adcs $acc3,$acc3,$t3
+ umulh $t3,$a7,$na0
+ mul $na0,$n0,$acc0 // next t[0]*n0
+ adcs $acc4,$acc4,$t0
+ adcs $acc5,$acc5,$t1
+ adcs $acc6,$acc6,$t2
+ adc $acc7,$acc7,$t3
+ cbnz $cnt,.Lsqr8x_reduction
+
+ ldp $t0,$t1,[$tp,#8*0]
+ ldp $t2,$t3,[$tp,#8*2]
+ mov $rp,$tp
+ sub $cnt,$np_end,$np // done yet?
+ adds $acc0,$acc0,$t0
+ adcs $acc1,$acc1,$t1
+ ldp $t0,$t1,[$tp,#8*4]
+ adcs $acc2,$acc2,$t2
+ adcs $acc3,$acc3,$t3
+ ldp $t2,$t3,[$tp,#8*6]
+ adcs $acc4,$acc4,$t0
+ adcs $acc5,$acc5,$t1
+ adcs $acc6,$acc6,$t2
+ adcs $acc7,$acc7,$t3
+ //adc $carry,xzr,xzr // moved below
+ cbz $cnt,.Lsqr8x8_post_condition
+
+ ldr $n0,[$tp,#-8*8]
+ ldp $a0,$a1,[$np,#8*0]
+ ldp $a2,$a3,[$np,#8*2]
+ ldp $a4,$a5,[$np,#8*4]
+ mov $cnt,#-8*8
+ ldp $a6,$a7,[$np,#8*6]
+ add $np,$np,#8*8
+
+.Lsqr8x_tail:
+ mul $t0,$a0,$n0
+ adc $carry,xzr,xzr // carry bit, modulo-scheduled
+ mul $t1,$a1,$n0
+ add $cnt,$cnt,#8
+ mul $t2,$a2,$n0
+ mul $t3,$a3,$n0
+ adds $acc0,$acc0,$t0
+ mul $t0,$a4,$n0
+ adcs $acc1,$acc1,$t1
+ mul $t1,$a5,$n0
+ adcs $acc2,$acc2,$t2
+ mul $t2,$a6,$n0
+ adcs $acc3,$acc3,$t3
+ mul $t3,$a7,$n0
+ adcs $acc4,$acc4,$t0
+ umulh $t0,$a0,$n0
+ adcs $acc5,$acc5,$t1
+ umulh $t1,$a1,$n0
+ adcs $acc6,$acc6,$t2
+ umulh $t2,$a2,$n0
+ adcs $acc7,$acc7,$t3
+ umulh $t3,$a3,$n0
+ adc $carry,$carry,xzr
+ str $acc0,[$tp],#8
+ adds $acc0,$acc1,$t0
+ umulh $t0,$a4,$n0
+ adcs $acc1,$acc2,$t1
+ umulh $t1,$a5,$n0
+ adcs $acc2,$acc3,$t2
+ umulh $t2,$a6,$n0
+ adcs $acc3,$acc4,$t3
+ umulh $t3,$a7,$n0
+ ldr $n0,[$rp,$cnt]
+ adcs $acc4,$acc5,$t0
+ adcs $acc5,$acc6,$t1
+ adcs $acc6,$acc7,$t2
+ adcs $acc7,$carry,$t3
+ //adc $carry,xzr,xzr // moved above
+ cbnz $cnt,.Lsqr8x_tail
+ // note that carry flag is guaranteed
+ // to be zero at this point
+ ldp $a0,$a1,[$tp,#8*0]
+ sub $cnt,$np_end,$np // done yet?
+ sub $t2,$np_end,$num // rewinded np
+ ldp $a2,$a3,[$tp,#8*2]
+ ldp $a4,$a5,[$tp,#8*4]
+ ldp $a6,$a7,[$tp,#8*6]
+ cbz $cnt,.Lsqr8x_tail_break
+
+ ldr $n0,[$rp,#-8*8]
+ adds $acc0,$acc0,$a0
+ adcs $acc1,$acc1,$a1
+ ldp $a0,$a1,[$np,#8*0]
+ adcs $acc2,$acc2,$a2
+ adcs $acc3,$acc3,$a3
+ ldp $a2,$a3,[$np,#8*2]
+ adcs $acc4,$acc4,$a4
+ adcs $acc5,$acc5,$a5
+ ldp $a4,$a5,[$np,#8*4]
+ adcs $acc6,$acc6,$a6
+ mov $cnt,#-8*8
+ adcs $acc7,$acc7,$a7
+ ldp $a6,$a7,[$np,#8*6]
+ add $np,$np,#8*8
+ //adc $carry,xzr,xzr // moved above
+ b .Lsqr8x_tail
+
+.align 4
+.Lsqr8x_tail_break:
+ ldr $n0,[x29,#112] // pull n0
+ add $cnt,$tp,#8*8 // end of current t[num] window
+
+ subs xzr,$topmost,#1 // "move" top-most carry to carry bit
+ adcs $t0,$acc0,$a0
+ adcs $t1,$acc1,$a1
+ ldp $acc0,$acc1,[$rp,#8*0]
+ adcs $acc2,$acc2,$a2
+ ldp $a0,$a1,[$t2,#8*0] // recall that $t2 is &n[0]
+ adcs $acc3,$acc3,$a3
+ ldp $a2,$a3,[$t2,#8*2]
+ adcs $acc4,$acc4,$a4
+ adcs $acc5,$acc5,$a5
+ ldp $a4,$a5,[$t2,#8*4]
+ adcs $acc6,$acc6,$a6
+ adcs $acc7,$acc7,$a7
+ ldp $a6,$a7,[$t2,#8*6]
+ add $np,$t2,#8*8
+ adc $topmost,xzr,xzr // top-most carry
+ mul $na0,$n0,$acc0
+ stp $t0,$t1,[$tp,#8*0]
+ stp $acc2,$acc3,[$tp,#8*2]
+ ldp $acc2,$acc3,[$rp,#8*2]
+ stp $acc4,$acc5,[$tp,#8*4]
+ ldp $acc4,$acc5,[$rp,#8*4]
+ cmp $cnt,x29 // did we hit the bottom?
+ stp $acc6,$acc7,[$tp,#8*6]
+ mov $tp,$rp // slide the window
+ ldp $acc6,$acc7,[$rp,#8*6]
+ mov $cnt,#8
+ b.ne .Lsqr8x_reduction
+
+ // Final step. We see if result is larger than modulus, and
+ // if it is, subtract the modulus. But comparison implies
+ // subtraction. So we subtract modulus, see if it borrowed,
+ // and conditionally copy original value.
+ ldr $rp,[x29,#96] // pull rp
+ add $tp,$tp,#8*8
+ subs $t0,$acc0,$a0
+ sbcs $t1,$acc1,$a1
+ sub $cnt,$num,#8*8
+ mov $ap_end,$rp // $rp copy
+
+.Lsqr8x_sub:
+ sbcs $t2,$acc2,$a2
+ ldp $a0,$a1,[$np,#8*0]
+ sbcs $t3,$acc3,$a3
+ stp $t0,$t1,[$rp,#8*0]
+ sbcs $t0,$acc4,$a4
+ ldp $a2,$a3,[$np,#8*2]
+ sbcs $t1,$acc5,$a5
+ stp $t2,$t3,[$rp,#8*2]
+ sbcs $t2,$acc6,$a6
+ ldp $a4,$a5,[$np,#8*4]
+ sbcs $t3,$acc7,$a7
+ ldp $a6,$a7,[$np,#8*6]
+ add $np,$np,#8*8
+ ldp $acc0,$acc1,[$tp,#8*0]
+ sub $cnt,$cnt,#8*8
+ ldp $acc2,$acc3,[$tp,#8*2]
+ ldp $acc4,$acc5,[$tp,#8*4]
+ ldp $acc6,$acc7,[$tp,#8*6]
+ add $tp,$tp,#8*8
+ stp $t0,$t1,[$rp,#8*4]
+ sbcs $t0,$acc0,$a0
+ stp $t2,$t3,[$rp,#8*6]
+ add $rp,$rp,#8*8
+ sbcs $t1,$acc1,$a1
+ cbnz $cnt,.Lsqr8x_sub
+
+ sbcs $t2,$acc2,$a2
+ mov $tp,sp
+ add $ap,sp,$num
+ ldp $a0,$a1,[$ap_end,#8*0]
+ sbcs $t3,$acc3,$a3
+ stp $t0,$t1,[$rp,#8*0]
+ sbcs $t0,$acc4,$a4
+ ldp $a2,$a3,[$ap_end,#8*2]
+ sbcs $t1,$acc5,$a5
+ stp $t2,$t3,[$rp,#8*2]
+ sbcs $t2,$acc6,$a6
+ ldp $acc0,$acc1,[$ap,#8*0]
+ sbcs $t3,$acc7,$a7
+ ldp $acc2,$acc3,[$ap,#8*2]
+ sbcs xzr,$topmost,xzr // did it borrow?
+ ldr x30,[x29,#8] // pull return address
+ stp $t0,$t1,[$rp,#8*4]
+ stp $t2,$t3,[$rp,#8*6]
+
+ sub $cnt,$num,#8*4
+.Lsqr4x_cond_copy:
+ sub $cnt,$cnt,#8*4
+ csel $t0,$acc0,$a0,lo
+ stp xzr,xzr,[$tp,#8*0]
+ csel $t1,$acc1,$a1,lo
+ ldp $a0,$a1,[$ap_end,#8*4]
+ ldp $acc0,$acc1,[$ap,#8*4]
+ csel $t2,$acc2,$a2,lo
+ stp xzr,xzr,[$tp,#8*2]
+ add $tp,$tp,#8*4
+ csel $t3,$acc3,$a3,lo
+ ldp $a2,$a3,[$ap_end,#8*6]
+ ldp $acc2,$acc3,[$ap,#8*6]
+ add $ap,$ap,#8*4
+ stp $t0,$t1,[$ap_end,#8*0]
+ stp $t2,$t3,[$ap_end,#8*2]
+ add $ap_end,$ap_end,#8*4
+ stp xzr,xzr,[$ap,#8*0]
+ stp xzr,xzr,[$ap,#8*2]
+ cbnz $cnt,.Lsqr4x_cond_copy
+
+ csel $t0,$acc0,$a0,lo
+ stp xzr,xzr,[$tp,#8*0]
+ csel $t1,$acc1,$a1,lo
+ stp xzr,xzr,[$tp,#8*2]
+ csel $t2,$acc2,$a2,lo
+ csel $t3,$acc3,$a3,lo
+ stp $t0,$t1,[$ap_end,#8*0]
+ stp $t2,$t3,[$ap_end,#8*2]
+
+ b .Lsqr8x_done
+
+.align 4
+.Lsqr8x8_post_condition:
+ adc $carry,xzr,xzr
+ ldr x30,[x29,#8] // pull return address
+ // $acc0-7,$carry hold result, $a0-7 hold modulus
+ subs $a0,$acc0,$a0
+ ldr $ap,[x29,#96] // pull rp
+ sbcs $a1,$acc1,$a1
+ stp xzr,xzr,[sp,#8*0]
+ sbcs $a2,$acc2,$a2
+ stp xzr,xzr,[sp,#8*2]
+ sbcs $a3,$acc3,$a3
+ stp xzr,xzr,[sp,#8*4]
+ sbcs $a4,$acc4,$a4
+ stp xzr,xzr,[sp,#8*6]
+ sbcs $a5,$acc5,$a5
+ stp xzr,xzr,[sp,#8*8]
+ sbcs $a6,$acc6,$a6
+ stp xzr,xzr,[sp,#8*10]
+ sbcs $a7,$acc7,$a7
+ stp xzr,xzr,[sp,#8*12]
+ sbcs $carry,$carry,xzr // did it borrow?
+ stp xzr,xzr,[sp,#8*14]
+
+ // $a0-7 hold result-modulus
+ csel $a0,$acc0,$a0,lo
+ csel $a1,$acc1,$a1,lo
+ csel $a2,$acc2,$a2,lo
+ csel $a3,$acc3,$a3,lo
+ stp $a0,$a1,[$ap,#8*0]
+ csel $a4,$acc4,$a4,lo
+ csel $a5,$acc5,$a5,lo
+ stp $a2,$a3,[$ap,#8*2]
+ csel $a6,$acc6,$a6,lo
+ csel $a7,$acc7,$a7,lo
+ stp $a4,$a5,[$ap,#8*4]
+ stp $a6,$a7,[$ap,#8*6]
+
+.Lsqr8x_done:
+ ldp x19,x20,[x29,#16]
+ mov sp,x29
+ ldp x21,x22,[x29,#32]
+ mov x0,#1
+ ldp x23,x24,[x29,#48]
+ ldp x25,x26,[x29,#64]
+ ldp x27,x28,[x29,#80]
+ ldr x29,[sp],#128
+ ret
+.size __bn_sqr8x_mont,.-__bn_sqr8x_mont
+___
+}
+
+{
+########################################################################
+# Even though this might look as ARMv8 adaptation of mulx4x_mont from
+# x86_64-mont5 module, it's different in sense that it performs
+# reduction 256 bits at a time.
+
+my ($a0,$a1,$a2,$a3,
+ $t0,$t1,$t2,$t3,
+ $m0,$m1,$m2,$m3,
+ $acc0,$acc1,$acc2,$acc3,$acc4,
+ $bi,$mi,$tp,$ap_end,$cnt) = map("x$_",(6..17,19..28));
+my $bp_end=$rp;
+my ($carry,$topmost) = ($rp,"x30");
+
+$code.=<<___;
+.type __bn_mul4x_mont,%function
+.align 5
+__bn_mul4x_mont:
+ stp x29,x30,[sp,#-128]!
+ add x29,sp,#0
+ stp x19,x20,[sp,#16]
+ stp x21,x22,[sp,#32]
+ stp x23,x24,[sp,#48]
+ stp x25,x26,[sp,#64]
+ stp x27,x28,[sp,#80]
+
+ sub $tp,sp,$num,lsl#3
+ lsl $num,$num,#3
+ ldr $n0,[$n0] // *n0
+ sub sp,$tp,#8*4 // alloca
+
+ add $t0,$bp,$num
+ add $ap_end,$ap,$num
+ stp $rp,$t0,[x29,#96] // offload rp and &b[num]
+
+ ldr $bi,[$bp,#8*0] // b[0]
+ ldp $a0,$a1,[$ap,#8*0] // a[0..3]
+ ldp $a2,$a3,[$ap,#8*2]
+ add $ap,$ap,#8*4
+ mov $acc0,xzr
+ mov $acc1,xzr
+ mov $acc2,xzr
+ mov $acc3,xzr
+ ldp $m0,$m1,[$np,#8*0] // n[0..3]
+ ldp $m2,$m3,[$np,#8*2]
+ adds $np,$np,#8*4 // clear carry bit
+ mov $carry,xzr
+ mov $cnt,#0
+ mov $tp,sp
+
+.Loop_mul4x_1st_reduction:
+ mul $t0,$a0,$bi // lo(a[0..3]*b[0])
+ adc $carry,$carry,xzr // modulo-scheduled
+ mul $t1,$a1,$bi
+ add $cnt,$cnt,#8
+ mul $t2,$a2,$bi
+ and $cnt,$cnt,#31
+ mul $t3,$a3,$bi
+ adds $acc0,$acc0,$t0
+ umulh $t0,$a0,$bi // hi(a[0..3]*b[0])
+ adcs $acc1,$acc1,$t1
+ mul $mi,$acc0,$n0 // t[0]*n0
+ adcs $acc2,$acc2,$t2
+ umulh $t1,$a1,$bi
+ adcs $acc3,$acc3,$t3
+ umulh $t2,$a2,$bi
+ adc $acc4,xzr,xzr
+ umulh $t3,$a3,$bi
+ ldr $bi,[$bp,$cnt] // next b[i] (or b[0])
+ adds $acc1,$acc1,$t0
+ // (*) mul $t0,$m0,$mi // lo(n[0..3]*t[0]*n0)
+ str $mi,[$tp],#8 // put aside t[0]*n0 for tail processing
+ adcs $acc2,$acc2,$t1
+ mul $t1,$m1,$mi
+ adcs $acc3,$acc3,$t2
+ mul $t2,$m2,$mi
+ adc $acc4,$acc4,$t3 // can't overflow
+ mul $t3,$m3,$mi
+ // (*) adds xzr,$acc0,$t0
+ subs xzr,$acc0,#1 // (*)
+ umulh $t0,$m0,$mi // hi(n[0..3]*t[0]*n0)
+ adcs $acc0,$acc1,$t1
+ umulh $t1,$m1,$mi
+ adcs $acc1,$acc2,$t2
+ umulh $t2,$m2,$mi
+ adcs $acc2,$acc3,$t3
+ umulh $t3,$m3,$mi
+ adcs $acc3,$acc4,$carry
+ adc $carry,xzr,xzr
+ adds $acc0,$acc0,$t0
+ sub $t0,$ap_end,$ap
+ adcs $acc1,$acc1,$t1
+ adcs $acc2,$acc2,$t2
+ adcs $acc3,$acc3,$t3
+ //adc $carry,$carry,xzr
+ cbnz $cnt,.Loop_mul4x_1st_reduction
+
+ cbz $t0,.Lmul4x4_post_condition
+
+ ldp $a0,$a1,[$ap,#8*0] // a[4..7]
+ ldp $a2,$a3,[$ap,#8*2]
+ add $ap,$ap,#8*4
+ ldr $mi,[sp] // a[0]*n0
+ ldp $m0,$m1,[$np,#8*0] // n[4..7]
+ ldp $m2,$m3,[$np,#8*2]
+ add $np,$np,#8*4
+
+.Loop_mul4x_1st_tail:
+ mul $t0,$a0,$bi // lo(a[4..7]*b[i])
+ adc $carry,$carry,xzr // modulo-scheduled
+ mul $t1,$a1,$bi
+ add $cnt,$cnt,#8
+ mul $t2,$a2,$bi
+ and $cnt,$cnt,#31
+ mul $t3,$a3,$bi
+ adds $acc0,$acc0,$t0
+ umulh $t0,$a0,$bi // hi(a[4..7]*b[i])
+ adcs $acc1,$acc1,$t1
+ umulh $t1,$a1,$bi
+ adcs $acc2,$acc2,$t2
+ umulh $t2,$a2,$bi
+ adcs $acc3,$acc3,$t3
+ umulh $t3,$a3,$bi
+ adc $acc4,xzr,xzr
+ ldr $bi,[$bp,$cnt] // next b[i] (or b[0])
+ adds $acc1,$acc1,$t0
+ mul $t0,$m0,$mi // lo(n[4..7]*a[0]*n0)
+ adcs $acc2,$acc2,$t1
+ mul $t1,$m1,$mi
+ adcs $acc3,$acc3,$t2
+ mul $t2,$m2,$mi
+ adc $acc4,$acc4,$t3 // can't overflow
+ mul $t3,$m3,$mi
+ adds $acc0,$acc0,$t0
+ umulh $t0,$m0,$mi // hi(n[4..7]*a[0]*n0)
+ adcs $acc1,$acc1,$t1
+ umulh $t1,$m1,$mi
+ adcs $acc2,$acc2,$t2
+ umulh $t2,$m2,$mi
+ adcs $acc3,$acc3,$t3
+ adcs $acc4,$acc4,$carry
+ umulh $t3,$m3,$mi
+ adc $carry,xzr,xzr
+ ldr $mi,[sp,$cnt] // next t[0]*n0
+ str $acc0,[$tp],#8 // result!!!
+ adds $acc0,$acc1,$t0
+ sub $t0,$ap_end,$ap // done yet?
+ adcs $acc1,$acc2,$t1
+ adcs $acc2,$acc3,$t2
+ adcs $acc3,$acc4,$t3
+ //adc $carry,$carry,xzr
+ cbnz $cnt,.Loop_mul4x_1st_tail
+
+ sub $t1,$ap_end,$num // rewinded $ap
+ cbz $t0,.Lmul4x_proceed
+
+ ldp $a0,$a1,[$ap,#8*0]
+ ldp $a2,$a3,[$ap,#8*2]
+ add $ap,$ap,#8*4
+ ldp $m0,$m1,[$np,#8*0]
+ ldp $m2,$m3,[$np,#8*2]
+ add $np,$np,#8*4
+ b .Loop_mul4x_1st_tail
+
+.align 5
+.Lmul4x_proceed:
+ ldr $bi,[$bp,#8*4]! // *++b
+ adc $topmost,$carry,xzr
+ ldp $a0,$a1,[$t1,#8*0] // a[0..3]
+ sub $np,$np,$num // rewind np
+ ldp $a2,$a3,[$t1,#8*2]
+ add $ap,$t1,#8*4
+
+ stp $acc0,$acc1,[$tp,#8*0] // result!!!
+ ldp $acc0,$acc1,[sp,#8*4] // t[0..3]
+ stp $acc2,$acc3,[$tp,#8*2] // result!!!
+ ldp $acc2,$acc3,[sp,#8*6]
+
+ ldp $m0,$m1,[$np,#8*0] // n[0..3]
+ mov $tp,sp
+ ldp $m2,$m3,[$np,#8*2]
+ adds $np,$np,#8*4 // clear carry bit
+ mov $carry,xzr
+
+.align 4
+.Loop_mul4x_reduction:
+ mul $t0,$a0,$bi // lo(a[0..3]*b[4])
+ adc $carry,$carry,xzr // modulo-scheduled
+ mul $t1,$a1,$bi
+ add $cnt,$cnt,#8
+ mul $t2,$a2,$bi
+ and $cnt,$cnt,#31
+ mul $t3,$a3,$bi
+ adds $acc0,$acc0,$t0
+ umulh $t0,$a0,$bi // hi(a[0..3]*b[4])
+ adcs $acc1,$acc1,$t1
+ mul $mi,$acc0,$n0 // t[0]*n0
+ adcs $acc2,$acc2,$t2
+ umulh $t1,$a1,$bi
+ adcs $acc3,$acc3,$t3
+ umulh $t2,$a2,$bi
+ adc $acc4,xzr,xzr
+ umulh $t3,$a3,$bi
+ ldr $bi,[$bp,$cnt] // next b[i]
+ adds $acc1,$acc1,$t0
+ // (*) mul $t0,$m0,$mi
+ str $mi,[$tp],#8 // put aside t[0]*n0 for tail processing
+ adcs $acc2,$acc2,$t1
+ mul $t1,$m1,$mi // lo(n[0..3]*t[0]*n0
+ adcs $acc3,$acc3,$t2
+ mul $t2,$m2,$mi
+ adc $acc4,$acc4,$t3 // can't overflow
+ mul $t3,$m3,$mi
+ // (*) adds xzr,$acc0,$t0
+ subs xzr,$acc0,#1 // (*)
+ umulh $t0,$m0,$mi // hi(n[0..3]*t[0]*n0
+ adcs $acc0,$acc1,$t1
+ umulh $t1,$m1,$mi
+ adcs $acc1,$acc2,$t2
+ umulh $t2,$m2,$mi
+ adcs $acc2,$acc3,$t3
+ umulh $t3,$m3,$mi
+ adcs $acc3,$acc4,$carry
+ adc $carry,xzr,xzr
+ adds $acc0,$acc0,$t0
+ adcs $acc1,$acc1,$t1
+ adcs $acc2,$acc2,$t2
+ adcs $acc3,$acc3,$t3
+ //adc $carry,$carry,xzr
+ cbnz $cnt,.Loop_mul4x_reduction
+
+ adc $carry,$carry,xzr
+ ldp $t0,$t1,[$tp,#8*4] // t[4..7]
+ ldp $t2,$t3,[$tp,#8*6]
+ ldp $a0,$a1,[$ap,#8*0] // a[4..7]
+ ldp $a2,$a3,[$ap,#8*2]
+ add $ap,$ap,#8*4
+ adds $acc0,$acc0,$t0
+ adcs $acc1,$acc1,$t1
+ adcs $acc2,$acc2,$t2
+ adcs $acc3,$acc3,$t3
+ //adc $carry,$carry,xzr
+
+ ldr $mi,[sp] // t[0]*n0
+ ldp $m0,$m1,[$np,#8*0] // n[4..7]
+ ldp $m2,$m3,[$np,#8*2]
+ add $np,$np,#8*4
+
+.align 4
+.Loop_mul4x_tail:
+ mul $t0,$a0,$bi // lo(a[4..7]*b[4])
+ adc $carry,$carry,xzr // modulo-scheduled
+ mul $t1,$a1,$bi
+ add $cnt,$cnt,#8
+ mul $t2,$a2,$bi
+ and $cnt,$cnt,#31
+ mul $t3,$a3,$bi
+ adds $acc0,$acc0,$t0
+ umulh $t0,$a0,$bi // hi(a[4..7]*b[4])
+ adcs $acc1,$acc1,$t1
+ umulh $t1,$a1,$bi
+ adcs $acc2,$acc2,$t2
+ umulh $t2,$a2,$bi
+ adcs $acc3,$acc3,$t3
+ umulh $t3,$a3,$bi
+ adc $acc4,xzr,xzr
+ ldr $bi,[$bp,$cnt] // next b[i]
+ adds $acc1,$acc1,$t0
+ mul $t0,$m0,$mi // lo(n[4..7]*t[0]*n0)
+ adcs $acc2,$acc2,$t1
+ mul $t1,$m1,$mi
+ adcs $acc3,$acc3,$t2
+ mul $t2,$m2,$mi
+ adc $acc4,$acc4,$t3 // can't overflow
+ mul $t3,$m3,$mi
+ adds $acc0,$acc0,$t0
+ umulh $t0,$m0,$mi // hi(n[4..7]*t[0]*n0)
+ adcs $acc1,$acc1,$t1
+ umulh $t1,$m1,$mi
+ adcs $acc2,$acc2,$t2
+ umulh $t2,$m2,$mi
+ adcs $acc3,$acc3,$t3
+ umulh $t3,$m3,$mi
+ adcs $acc4,$acc4,$carry
+ ldr $mi,[sp,$cnt] // next a[0]*n0
+ adc $carry,xzr,xzr
+ str $acc0,[$tp],#8 // result!!!
+ adds $acc0,$acc1,$t0
+ sub $t0,$ap_end,$ap // done yet?
+ adcs $acc1,$acc2,$t1
+ adcs $acc2,$acc3,$t2
+ adcs $acc3,$acc4,$t3
+ //adc $carry,$carry,xzr
+ cbnz $cnt,.Loop_mul4x_tail
+
+ sub $t1,$np,$num // rewinded np?
+ adc $carry,$carry,xzr
+ cbz $t0,.Loop_mul4x_break
+
+ ldp $t0,$t1,[$tp,#8*4]
+ ldp $t2,$t3,[$tp,#8*6]
+ ldp $a0,$a1,[$ap,#8*0]
+ ldp $a2,$a3,[$ap,#8*2]
+ add $ap,$ap,#8*4
+ adds $acc0,$acc0,$t0
+ adcs $acc1,$acc1,$t1
+ adcs $acc2,$acc2,$t2
+ adcs $acc3,$acc3,$t3
+ //adc $carry,$carry,xzr
+ ldp $m0,$m1,[$np,#8*0]
+ ldp $m2,$m3,[$np,#8*2]
+ add $np,$np,#8*4
+ b .Loop_mul4x_tail
+
+.align 4
+.Loop_mul4x_break:
+ ldp $t2,$t3,[x29,#96] // pull rp and &b[num]
+ adds $acc0,$acc0,$topmost
+ add $bp,$bp,#8*4 // bp++
+ adcs $acc1,$acc1,xzr
+ sub $ap,$ap,$num // rewind ap
+ adcs $acc2,$acc2,xzr
+ stp $acc0,$acc1,[$tp,#8*0] // result!!!
+ adcs $acc3,$acc3,xzr
+ ldp $acc0,$acc1,[sp,#8*4] // t[0..3]
+ adc $topmost,$carry,xzr
+ stp $acc2,$acc3,[$tp,#8*2] // result!!!
+ cmp $bp,$t3 // done yet?
+ ldp $acc2,$acc3,[sp,#8*6]
+ ldp $m0,$m1,[$t1,#8*0] // n[0..3]
+ ldp $m2,$m3,[$t1,#8*2]
+ add $np,$t1,#8*4
+ b.eq .Lmul4x_post
+
+ ldr $bi,[$bp]
+ ldp $a0,$a1,[$ap,#8*0] // a[0..3]
+ ldp $a2,$a3,[$ap,#8*2]
+ adds $ap,$ap,#8*4 // clear carry bit
+ mov $carry,xzr
+ mov $tp,sp
+ b .Loop_mul4x_reduction
+
+.align 4
+.Lmul4x_post:
+ // Final step. We see if result is larger than modulus, and
+ // if it is, subtract the modulus. But comparison implies
+ // subtraction. So we subtract modulus, see if it borrowed,
+ // and conditionally copy original value.
+ mov $rp,$t2
+ mov $ap_end,$t2 // $rp copy
+ subs $t0,$acc0,$m0
+ add $tp,sp,#8*8
+ sbcs $t1,$acc1,$m1
+ sub $cnt,$num,#8*4
+
+.Lmul4x_sub:
+ sbcs $t2,$acc2,$m2
+ ldp $m0,$m1,[$np,#8*0]
+ sub $cnt,$cnt,#8*4
+ ldp $acc0,$acc1,[$tp,#8*0]
+ sbcs $t3,$acc3,$m3
+ ldp $m2,$m3,[$np,#8*2]
+ add $np,$np,#8*4
+ ldp $acc2,$acc3,[$tp,#8*2]
+ add $tp,$tp,#8*4
+ stp $t0,$t1,[$rp,#8*0]
+ sbcs $t0,$acc0,$m0
+ stp $t2,$t3,[$rp,#8*2]
+ add $rp,$rp,#8*4
+ sbcs $t1,$acc1,$m1
+ cbnz $cnt,.Lmul4x_sub
+
+ sbcs $t2,$acc2,$m2
+ mov $tp,sp
+ add $ap,sp,#8*4
+ ldp $a0,$a1,[$ap_end,#8*0]
+ sbcs $t3,$acc3,$m3
+ stp $t0,$t1,[$rp,#8*0]
+ ldp $a2,$a3,[$ap_end,#8*2]
+ stp $t2,$t3,[$rp,#8*2]
+ ldp $acc0,$acc1,[$ap,#8*0]
+ ldp $acc2,$acc3,[$ap,#8*2]
+ sbcs xzr,$topmost,xzr // did it borrow?
+ ldr x30,[x29,#8] // pull return address
+
+ sub $cnt,$num,#8*4
+.Lmul4x_cond_copy:
+ sub $cnt,$cnt,#8*4
+ csel $t0,$acc0,$a0,lo
+ stp xzr,xzr,[$tp,#8*0]
+ csel $t1,$acc1,$a1,lo
+ ldp $a0,$a1,[$ap_end,#8*4]
+ ldp $acc0,$acc1,[$ap,#8*4]
+ csel $t2,$acc2,$a2,lo
+ stp xzr,xzr,[$tp,#8*2]
+ add $tp,$tp,#8*4
+ csel $t3,$acc3,$a3,lo
+ ldp $a2,$a3,[$ap_end,#8*6]
+ ldp $acc2,$acc3,[$ap,#8*6]
+ add $ap,$ap,#8*4
+ stp $t0,$t1,[$ap_end,#8*0]
+ stp $t2,$t3,[$ap_end,#8*2]
+ add $ap_end,$ap_end,#8*4
+ cbnz $cnt,.Lmul4x_cond_copy
+
+ csel $t0,$acc0,$a0,lo
+ stp xzr,xzr,[$tp,#8*0]
+ csel $t1,$acc1,$a1,lo
+ stp xzr,xzr,[$tp,#8*2]
+ csel $t2,$acc2,$a2,lo
+ stp xzr,xzr,[$tp,#8*3]
+ csel $t3,$acc3,$a3,lo
+ stp xzr,xzr,[$tp,#8*4]
+ stp $t0,$t1,[$ap_end,#8*0]
+ stp $t2,$t3,[$ap_end,#8*2]
+
+ b .Lmul4x_done
+
+.align 4
+.Lmul4x4_post_condition:
+ adc $carry,$carry,xzr
+ ldr $ap,[x29,#96] // pull rp
+ // $acc0-3,$carry hold result, $m0-7 hold modulus
+ subs $a0,$acc0,$m0
+ ldr x30,[x29,#8] // pull return address
+ sbcs $a1,$acc1,$m1
+ stp xzr,xzr,[sp,#8*0]
+ sbcs $a2,$acc2,$m2
+ stp xzr,xzr,[sp,#8*2]
+ sbcs $a3,$acc3,$m3
+ stp xzr,xzr,[sp,#8*4]
+ sbcs xzr,$carry,xzr // did it borrow?
+ stp xzr,xzr,[sp,#8*6]
+
+ // $a0-3 hold result-modulus
+ csel $a0,$acc0,$a0,lo
+ csel $a1,$acc1,$a1,lo
+ csel $a2,$acc2,$a2,lo
+ csel $a3,$acc3,$a3,lo
+ stp $a0,$a1,[$ap,#8*0]
+ stp $a2,$a3,[$ap,#8*2]
+
+.Lmul4x_done:
+ ldp x19,x20,[x29,#16]
+ mov sp,x29
+ ldp x21,x22,[x29,#32]
+ mov x0,#1
+ ldp x23,x24,[x29,#48]
+ ldp x25,x26,[x29,#64]
+ ldp x27,x28,[x29,#80]
+ ldr x29,[sp],#128
+ ret
+.size __bn_mul4x_mont,.-__bn_mul4x_mont
+___
+}
+$code.=<<___;
+.asciz "Montgomery Multiplication for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
+.align 4
+___
+
+print $code;
+
+close STDOUT;
diff --git a/deps/openssl/openssl/crypto/bn/asm/bn-586.pl b/deps/openssl/openssl/crypto/bn/asm/bn-586.pl
index 332ef3e91d..1ca1bbf7d4 100644
--- a/deps/openssl/openssl/crypto/bn/asm/bn-586.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/bn-586.pl
@@ -1,9 +1,19 @@
-#!/usr/local/bin/perl
+#! /usr/bin/env perl
+# Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
push(@INC,"${dir}","${dir}../../perlasm");
require "x86asm.pl";
+$output = pop;
+open STDOUT,">$output";
+
&asm_init($ARGV[0],$0);
$sse2=0;
@@ -21,6 +31,8 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
&asm_finish();
+close STDOUT;
+
sub bn_mul_add_words
{
local($name)=@_;
@@ -771,4 +783,3 @@ sub bn_sub_part_words
&function_end($name);
}
-
diff --git a/deps/openssl/openssl/crypto/bn/asm/bn-c64xplus.asm b/deps/openssl/openssl/crypto/bn/asm/bn-c64xplus.asm
new file mode 100644
index 0000000000..de6d37728f
--- /dev/null
+++ b/deps/openssl/openssl/crypto/bn/asm/bn-c64xplus.asm
@@ -0,0 +1,382 @@
+;; Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
+;;
+;; Licensed under the OpenSSL license (the "License"). You may not use
+;; this file except in compliance with the License. You can obtain a copy
+;; in the file LICENSE in the source distribution or at
+;; https://www.openssl.org/source/license.html
+;;
+;;====================================================================
+;; Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+;; project.
+;;
+;; Rights for redistribution and usage in source and binary forms are
+;; granted according to the OpenSSL license. Warranty of any kind is
+;; disclaimed.
+;;====================================================================
+;; Compiler-generated multiply-n-add SPLOOP runs at 12*n cycles, n
+;; being the number of 32-bit words, addition - 8*n. Corresponding 4x
+;; unrolled SPLOOP-free loops - at ~8*n and ~5*n. Below assembler
+;; SPLOOPs spin at ... 2*n cycles [plus epilogue].
+;;====================================================================
+ .text
+
+ .if .ASSEMBLER_VERSION<7000000
+ .asg 0,__TI_EABI__
+ .endif
+ .if __TI_EABI__
+ .asg bn_mul_add_words,_bn_mul_add_words
+ .asg bn_mul_words,_bn_mul_words
+ .asg bn_sqr_words,_bn_sqr_words
+ .asg bn_add_words,_bn_add_words
+ .asg bn_sub_words,_bn_sub_words
+ .asg bn_div_words,_bn_div_words
+ .asg bn_sqr_comba8,_bn_sqr_comba8
+ .asg bn_mul_comba8,_bn_mul_comba8
+ .asg bn_sqr_comba4,_bn_sqr_comba4
+ .asg bn_mul_comba4,_bn_mul_comba4
+ .endif
+
+ .asg B3,RA
+ .asg A4,ARG0
+ .asg B4,ARG1
+ .asg A6,ARG2
+ .asg B6,ARG3
+ .asg A8,ARG4
+ .asg B8,ARG5
+ .asg A4,RET
+ .asg A15,FP
+ .asg B14,DP
+ .asg B15,SP
+
+ .global _bn_mul_add_words
+_bn_mul_add_words:
+ .asmfunc
+ MV ARG2,B0
+ [!B0] BNOP RA
+||[!B0] MVK 0,RET
+ [B0] MVC B0,ILC
+ [B0] ZERO A19 ; high part of accumulator
+|| [B0] MV ARG0,A2
+|| [B0] MV ARG3,A3
+ NOP 3
+
+ SPLOOP 2 ; 2*n+10
+;;====================================================================
+ LDW *ARG1++,B7 ; ap[i]
+ NOP 3
+ LDW *ARG0++,A7 ; rp[i]
+ MPY32U B7,A3,A17:A16
+ NOP 3 ; [2,0] in epilogue
+ ADDU A16,A7,A21:A20
+ ADDU A19,A21:A20,A19:A18
+|| MV.S A17,A23
+ SPKERNEL 2,1 ; leave slot for "return value"
+|| STW A18,*A2++ ; rp[i]
+|| ADD A19,A23,A19
+;;====================================================================
+ BNOP RA,4
+ MV A19,RET ; return value
+ .endasmfunc
+
+ .global _bn_mul_words
+_bn_mul_words:
+ .asmfunc
+ MV ARG2,B0
+ [!B0] BNOP RA
+||[!B0] MVK 0,RET
+ [B0] MVC B0,ILC
+ [B0] ZERO A19 ; high part of accumulator
+ NOP 3
+
+ SPLOOP 2 ; 2*n+10
+;;====================================================================
+ LDW *ARG1++,A7 ; ap[i]
+ NOP 4
+ MPY32U A7,ARG3,A17:A16
+ NOP 4 ; [2,0] in epiloque
+ ADDU A19,A16,A19:A18
+|| MV.S A17,A21
+ SPKERNEL 2,1 ; leave slot for "return value"
+|| STW A18,*ARG0++ ; rp[i]
+|| ADD.L A19,A21,A19
+;;====================================================================
+ BNOP RA,4
+ MV A19,RET ; return value
+ .endasmfunc
+
+ .global _bn_sqr_words
+_bn_sqr_words:
+ .asmfunc
+ MV ARG2,B0
+ [!B0] BNOP RA
+||[!B0] MVK 0,RET
+ [B0] MVC B0,ILC
+ [B0] MV ARG0,B2
+|| [B0] ADD 4,ARG0,ARG0
+ NOP 3
+
+ SPLOOP 2 ; 2*n+10
+;;====================================================================
+ LDW *ARG1++,B7 ; ap[i]
+ NOP 4
+ MPY32U B7,B7,B1:B0
+ NOP 3 ; [2,0] in epilogue
+ STW B0,*B2++(8) ; rp[2*i]
+ MV B1,A1
+ SPKERNEL 2,0 ; fully overlap BNOP RA,5
+|| STW A1,*ARG0++(8) ; rp[2*i+1]
+;;====================================================================
+ BNOP RA,5
+ .endasmfunc
+
+ .global _bn_add_words
+_bn_add_words:
+ .asmfunc
+ MV ARG3,B0
+ [!B0] BNOP RA
+||[!B0] MVK 0,RET
+ [B0] MVC B0,ILC
+ [B0] ZERO A1 ; carry flag
+|| [B0] MV ARG0,A3
+ NOP 3
+
+ SPLOOP 2 ; 2*n+6
+;;====================================================================
+ LDW *ARG2++,A7 ; bp[i]
+|| LDW *ARG1++,B7 ; ap[i]
+ NOP 4
+ ADDU A7,B7,A9:A8
+ ADDU A1,A9:A8,A1:A0
+ SPKERNEL 0,0 ; fully overlap BNOP RA,5
+|| STW A0,*A3++ ; write result
+|| MV A1,RET ; keep carry flag in RET
+;;====================================================================
+ BNOP RA,5
+ .endasmfunc
+
+ .global _bn_sub_words
+_bn_sub_words:
+ .asmfunc
+ MV ARG3,B0
+ [!B0] BNOP RA
+||[!B0] MVK 0,RET
+ [B0] MVC B0,ILC
+ [B0] ZERO A2 ; borrow flag
+|| [B0] MV ARG0,A3
+ NOP 3
+
+ SPLOOP 2 ; 2*n+6
+;;====================================================================
+ LDW *ARG2++,A7 ; bp[i]
+|| LDW *ARG1++,B7 ; ap[i]
+ NOP 4
+ SUBU B7,A7,A1:A0
+ [A2] SUB A1:A0,1,A1:A0
+ SPKERNEL 0,1 ; leave slot for "return borrow flag"
+|| STW A0,*A3++ ; write result
+|| AND 1,A1,A2 ; pass on borrow flag
+;;====================================================================
+ BNOP RA,4
+ AND 1,A1,RET ; return borrow flag
+ .endasmfunc
+
+ .global _bn_div_words
+_bn_div_words:
+ .asmfunc
+ LMBD 1,A6,A0 ; leading zero bits in dv
+ LMBD 1,A4,A1 ; leading zero bits in hi
+|| MVK 32,B0
+ CMPLTU A1,A0,A2
+|| ADD A0,B0,B0
+ [ A2] BNOP RA
+||[ A2] MVK -1,A4 ; return overflow
+||[!A2] MV A4,A3 ; reassign hi
+ [!A2] MV B4,A4 ; reassign lo, will be quotient
+||[!A2] MVC B0,ILC
+ [!A2] SHL A6,A0,A6 ; normalize dv
+|| MVK 1,A1
+
+ [!A2] CMPLTU A3,A6,A1 ; hi<dv?
+||[!A2] SHL A4,1,A5:A4 ; lo<<1
+ [!A1] SUB A3,A6,A3 ; hi-=dv
+||[!A1] OR 1,A4,A4
+ [!A2] SHRU A3,31,A1 ; upper bit
+||[!A2] ADDAH A5,A3,A3 ; hi<<1|lo>>31
+
+ SPLOOP 3
+ [!A1] CMPLTU A3,A6,A1 ; hi<dv?
+||[ A1] ZERO A1
+|| SHL A4,1,A5:A4 ; lo<<1
+ [!A1] SUB A3,A6,A3 ; hi-=dv
+||[!A1] OR 1,A4,A4 ; quotient
+ SHRU A3,31,A1 ; upper bit
+|| ADDAH A5,A3,A3 ; hi<<1|lo>>31
+ SPKERNEL
+
+ BNOP RA,5
+ .endasmfunc
+
+;;====================================================================
+;; Not really Comba algorithm, just straightforward NxM... Dedicated
+;; fully unrolled real Comba implementations are asymptotically 2x
+;; faster, but naturally larger undertaking. Purpose of this exercise
+;; was rather to learn to master nested SPLOOPs...
+;;====================================================================
+ .global _bn_sqr_comba8
+ .global _bn_mul_comba8
+_bn_sqr_comba8:
+ MV ARG1,ARG2
+_bn_mul_comba8:
+ .asmfunc
+ MVK 8,B0 ; N, RILC
+|| MVK 8,A0 ; M, outer loop counter
+|| MV ARG1,A5 ; copy ap
+|| MV ARG0,B4 ; copy rp
+|| ZERO B19 ; high part of accumulator
+ MVC B0,RILC
+|| SUB B0,2,B1 ; N-2, initial ILC
+|| SUB B0,1,B2 ; const B2=N-1
+|| LDW *A5++,B6 ; ap[0]
+|| MV A0,A3 ; const A3=M
+sploopNxM?: ; for best performance arrange M<=N
+ [A0] SPLOOPD 2 ; 2*n+10
+|| MVC B1,ILC
+|| ADDAW B4,B0,B5
+|| ZERO B7
+|| LDW *A5++,A9 ; pre-fetch ap[1]
+|| ZERO A1
+|| SUB A0,1,A0
+;;====================================================================
+;; SPLOOP from bn_mul_add_words, but with flipped A<>B register files.
+;; This is because of Advisory 15 from TI publication SPRZ247I.
+ LDW *ARG2++,A7 ; bp[i]
+ NOP 3
+ [A1] LDW *B5++,B7 ; rp[i]
+ MPY32U A7,B6,B17:B16
+ NOP 3
+ ADDU B16,B7,B21:B20
+ ADDU B19,B21:B20,B19:B18
+|| MV.S B17,B23
+ SPKERNEL
+|| STW B18,*B4++ ; rp[i]
+|| ADD.S B19,B23,B19
+;;====================================================================
+outer?: ; m*2*(n+1)+10
+ SUBAW ARG2,A3,ARG2 ; rewind bp to bp[0]
+ SPMASKR
+|| CMPGT A0,1,A2 ; done pre-fetching ap[i+1]?
+ MVD A9,B6 ; move through .M unit(*)
+ [A2] LDW *A5++,A9 ; pre-fetch ap[i+1]
+ SUBAW B5,B2,B5 ; rewind rp to rp[1]
+ MVK 1,A1
+ [A0] BNOP.S1 outer?,4
+|| [A0] SUB.L A0,1,A0
+ STW B19,*B4--[B2] ; rewind rp tp rp[1]
+|| ZERO.S B19 ; high part of accumulator
+;; end of outer?
+ BNOP RA,5 ; return
+ .endasmfunc
+;; (*) It should be noted that B6 is used as input to MPY32U in
+;; chronologically next cycle in *preceding* SPLOOP iteration.
+;; Normally such arrangement would require DINT, but at this
+;; point SPLOOP is draining and interrupts are disabled
+;; implicitly.
+
+ .global _bn_sqr_comba4
+ .global _bn_mul_comba4
+_bn_sqr_comba4:
+ MV ARG1,ARG2
+_bn_mul_comba4:
+ .asmfunc
+ .if 0
+ BNOP sploopNxM?,3
+ ;; Above mentioned m*2*(n+1)+10 does not apply in n=m=4 case,
+ ;; because of low-counter effect, when prologue phase finishes
+ ;; before SPKERNEL instruction is reached. As result it's 25%
+ ;; slower than expected...
+ MVK 4,B0 ; N, RILC
+|| MVK 4,A0 ; M, outer loop counter
+|| MV ARG1,A5 ; copy ap
+|| MV ARG0,B4 ; copy rp
+|| ZERO B19 ; high part of accumulator
+ MVC B0,RILC
+|| SUB B0,2,B1 ; first ILC
+|| SUB B0,1,B2 ; const B2=N-1
+|| LDW *A5++,B6 ; ap[0]
+|| MV A0,A3 ; const A3=M
+ .else
+ ;; This alternative is an exercise in fully unrolled Comba
+ ;; algorithm implementation that operates at n*(n+1)+12, or
+ ;; as little as 32 cycles...
+ LDW *ARG1[0],B16 ; a[0]
+|| LDW *ARG2[0],A16 ; b[0]
+ LDW *ARG1[1],B17 ; a[1]
+|| LDW *ARG2[1],A17 ; b[1]
+ LDW *ARG1[2],B18 ; a[2]
+|| LDW *ARG2[2],A18 ; b[2]
+ LDW *ARG1[3],B19 ; a[3]
+|| LDW *ARG2[3],A19 ; b[3]
+ NOP
+ MPY32U A16,B16,A1:A0 ; a[0]*b[0]
+ MPY32U A17,B16,A23:A22 ; a[0]*b[1]
+ MPY32U A16,B17,A25:A24 ; a[1]*b[0]
+ MPY32U A16,B18,A27:A26 ; a[2]*b[0]
+ STW A0,*ARG0[0]
+|| MPY32U A17,B17,A29:A28 ; a[1]*b[1]
+ MPY32U A18,B16,A31:A30 ; a[0]*b[2]
+|| ADDU A22,A1,A1:A0
+ MV A23,B0
+|| MPY32U A19,B16,A21:A20 ; a[3]*b[0]
+|| ADDU A24,A1:A0,A1:A0
+ ADDU A25,B0,B1:B0
+|| STW A0,*ARG0[1]
+|| MPY32U A18,B17,A23:A22 ; a[2]*b[1]
+|| ADDU A26,A1,A9:A8
+ ADDU A27,B1,B9:B8
+|| MPY32U A17,B18,A25:A24 ; a[1]*b[2]
+|| ADDU A28,A9:A8,A9:A8
+ ADDU A29,B9:B8,B9:B8
+|| MPY32U A16,B19,A27:A26 ; a[0]*b[3]
+|| ADDU A30,A9:A8,A9:A8
+ ADDU A31,B9:B8,B9:B8
+|| ADDU B0,A9:A8,A9:A8
+ STW A8,*ARG0[2]
+|| ADDU A20,A9,A1:A0
+ ADDU A21,B9,B1:B0
+|| MPY32U A19,B17,A21:A20 ; a[3]*b[1]
+|| ADDU A22,A1:A0,A1:A0
+ ADDU A23,B1:B0,B1:B0
+|| MPY32U A18,B18,A23:A22 ; a[2]*b[2]
+|| ADDU A24,A1:A0,A1:A0
+ ADDU A25,B1:B0,B1:B0
+|| MPY32U A17,B19,A25:A24 ; a[1]*b[3]
+|| ADDU A26,A1:A0,A1:A0
+ ADDU A27,B1:B0,B1:B0
+|| ADDU B8,A1:A0,A1:A0
+ STW A0,*ARG0[3]
+|| MPY32U A19,B18,A27:A26 ; a[3]*b[2]
+|| ADDU A20,A1,A9:A8
+ ADDU A21,B1,B9:B8
+|| MPY32U A18,B19,A29:A28 ; a[2]*b[3]
+|| ADDU A22,A9:A8,A9:A8
+ ADDU A23,B9:B8,B9:B8
+|| MPY32U A19,B19,A31:A30 ; a[3]*b[3]
+|| ADDU A24,A9:A8,A9:A8
+ ADDU A25,B9:B8,B9:B8
+|| ADDU B0,A9:A8,A9:A8
+ STW A8,*ARG0[4]
+|| ADDU A26,A9,A1:A0
+ ADDU A27,B9,B1:B0
+|| ADDU A28,A1:A0,A1:A0
+ ADDU A29,B1:B0,B1:B0
+|| BNOP RA
+|| ADDU B8,A1:A0,A1:A0
+ STW A0,*ARG0[5]
+|| ADDU A30,A1,A9:A8
+ ADD A31,B1,B8
+ ADDU B0,A9:A8,A9:A8 ; removed || to avoid cross-path stall below
+ ADD B8,A9,A9
+|| STW A8,*ARG0[6]
+ STW A9,*ARG0[7]
+ .endif
+ .endasmfunc
diff --git a/deps/openssl/openssl/crypto/bn/asm/c64xplus-gf2m.pl b/deps/openssl/openssl/crypto/bn/asm/c64xplus-gf2m.pl
new file mode 100644
index 0000000000..c0e5400807
--- /dev/null
+++ b/deps/openssl/openssl/crypto/bn/asm/c64xplus-gf2m.pl
@@ -0,0 +1,160 @@
+#! /usr/bin/env perl
+# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# February 2012
+#
+# The module implements bn_GF2m_mul_2x2 polynomial multiplication
+# used in bn_gf2m.c. It's kind of low-hanging mechanical port from
+# C for the time being... The subroutine runs in 37 cycles, which is
+# 4.5x faster than compiler-generated code. Though comparison is
+# totally unfair, because this module utilizes Galois Field Multiply
+# instruction.
+
+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
+open STDOUT,">$output";
+
+($rp,$a1,$a0,$b1,$b0)=("A4","B4","A6","B6","A8"); # argument vector
+
+($Alo,$Alox0,$Alox1,$Alox2,$Alox3)=map("A$_",(16..20));
+($Ahi,$Ahix0,$Ahix1,$Ahix2,$Ahix3)=map("B$_",(16..20));
+($B_0,$B_1,$B_2,$B_3)=("B5","A5","A7","B7");
+($A,$B)=($Alo,$B_1);
+$xFF="B1";
+
+sub mul_1x1_upper {
+my ($A,$B)=@_;
+$code.=<<___;
+ EXTU $B,8,24,$B_2 ; smash $B to 4 bytes
+|| AND $B,$xFF,$B_0
+|| SHRU $B,24,$B_3
+ SHRU $A,16, $Ahi ; smash $A to two halfwords
+|| EXTU $A,16,16,$Alo
+
+ XORMPY $Alo,$B_2,$Alox2 ; 16x8 bits muliplication
+|| XORMPY $Ahi,$B_2,$Ahix2
+|| EXTU $B,16,24,$B_1
+ XORMPY $Alo,$B_0,$Alox0
+|| XORMPY $Ahi,$B_0,$Ahix0
+ XORMPY $Alo,$B_3,$Alox3
+|| XORMPY $Ahi,$B_3,$Ahix3
+ XORMPY $Alo,$B_1,$Alox1
+|| XORMPY $Ahi,$B_1,$Ahix1
+___
+}
+sub mul_1x1_merged {
+my ($OUTlo,$OUThi,$A,$B)=@_;
+$code.=<<___;
+ EXTU $B,8,24,$B_2 ; smash $B to 4 bytes
+|| AND $B,$xFF,$B_0
+|| SHRU $B,24,$B_3
+ SHRU $A,16, $Ahi ; smash $A to two halfwords
+|| EXTU $A,16,16,$Alo
+
+ XOR $Ahix0,$Alox2,$Ahix0
+|| MV $Ahix2,$OUThi
+|| XORMPY $Alo,$B_2,$Alox2
+ XORMPY $Ahi,$B_2,$Ahix2
+|| EXTU $B,16,24,$B_1
+|| XORMPY $Alo,$B_0,A1 ; $Alox0
+ XOR $Ahix1,$Alox3,$Ahix1
+|| SHL $Ahix0,16,$OUTlo
+|| SHRU $Ahix0,16,$Ahix0
+ XOR $Alox0,$OUTlo,$OUTlo
+|| XOR $Ahix0,$OUThi,$OUThi
+|| XORMPY $Ahi,$B_0,$Ahix0
+|| XORMPY $Alo,$B_3,$Alox3
+|| SHL $Alox1,8,$Alox1
+|| SHL $Ahix3,8,$Ahix3
+ XOR $Alox1,$OUTlo,$OUTlo
+|| XOR $Ahix3,$OUThi,$OUThi
+|| XORMPY $Ahi,$B_3,$Ahix3
+|| SHL $Ahix1,24,$Alox1
+|| SHRU $Ahix1,8, $Ahix1
+ XOR $Alox1,$OUTlo,$OUTlo
+|| XOR $Ahix1,$OUThi,$OUThi
+|| XORMPY $Alo,$B_1,$Alox1
+|| XORMPY $Ahi,$B_1,$Ahix1
+|| MV A1,$Alox0
+___
+}
+sub mul_1x1_lower {
+my ($OUTlo,$OUThi)=@_;
+$code.=<<___;
+ ;NOP
+ XOR $Ahix0,$Alox2,$Ahix0
+|| MV $Ahix2,$OUThi
+ NOP
+ XOR $Ahix1,$Alox3,$Ahix1
+|| SHL $Ahix0,16,$OUTlo
+|| SHRU $Ahix0,16,$Ahix0
+ XOR $Alox0,$OUTlo,$OUTlo
+|| XOR $Ahix0,$OUThi,$OUThi
+|| SHL $Alox1,8,$Alox1
+|| SHL $Ahix3,8,$Ahix3
+ XOR $Alox1,$OUTlo,$OUTlo
+|| XOR $Ahix3,$OUThi,$OUThi
+|| SHL $Ahix1,24,$Alox1
+|| SHRU $Ahix1,8, $Ahix1
+ XOR $Alox1,$OUTlo,$OUTlo
+|| XOR $Ahix1,$OUThi,$OUThi
+___
+}
+$code.=<<___;
+ .text
+
+ .if .ASSEMBLER_VERSION<7000000
+ .asg 0,__TI_EABI__
+ .endif
+ .if __TI_EABI__
+ .asg bn_GF2m_mul_2x2,_bn_GF2m_mul_2x2
+ .endif
+
+ .global _bn_GF2m_mul_2x2
+_bn_GF2m_mul_2x2:
+ .asmfunc
+ MVK 0xFF,$xFF
+___
+ &mul_1x1_upper($a0,$b0); # a0·b0
+$code.=<<___;
+|| MV $b1,$B
+ MV $a1,$A
+___
+ &mul_1x1_merged("A28","B28",$A,$B); # a0·b0/a1·b1
+$code.=<<___;
+|| XOR $b0,$b1,$B
+ XOR $a0,$a1,$A
+___
+ &mul_1x1_merged("A31","B31",$A,$B); # a1·b1/(a0+a1)·(b0+b1)
+$code.=<<___;
+ XOR A28,A31,A29
+|| XOR B28,B31,B29 ; a0·b0+a1·b1
+___
+ &mul_1x1_lower("A30","B30"); # (a0+a1)·(b0+b1)
+$code.=<<___;
+|| BNOP B3
+ XOR A29,A30,A30
+|| XOR B29,B30,B30 ; (a0+a1)·(b0+b1)-a0·b0-a1·b1
+ XOR B28,A30,A30
+|| STW A28,*${rp}[0]
+ XOR B30,A31,A31
+|| STW A30,*${rp}[1]
+ STW A31,*${rp}[2]
+ STW B31,*${rp}[3]
+ .endasmfunc
+___
+
+print $code;
+close STDOUT;
diff --git a/deps/openssl/openssl/crypto/bn/asm/co-586.pl b/deps/openssl/openssl/crypto/bn/asm/co-586.pl
index 57101a6bd7..60d0363660 100644
--- a/deps/openssl/openssl/crypto/bn/asm/co-586.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/co-586.pl
@@ -1,9 +1,18 @@
-#!/usr/local/bin/perl
+#! /usr/bin/env perl
+# Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
push(@INC,"${dir}","${dir}../../perlasm");
require "x86asm.pl";
+$output = pop;
+open STDOUT,">$output";
+
&asm_init($ARGV[0],$0);
&bn_mul_comba("bn_mul_comba8",8);
@@ -13,6 +22,8 @@ require "x86asm.pl";
&asm_finish();
+close STDOUT;
+
sub mul_add_c
{
local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
diff --git a/deps/openssl/openssl/crypto/bn/asm/ia64-mont.pl b/deps/openssl/openssl/crypto/bn/asm/ia64-mont.pl
index e258658428..5cc5c599f9 100644
--- a/deps/openssl/openssl/crypto/bn/asm/ia64-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/ia64-mont.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
@@ -60,6 +67,8 @@
# hereafter less for longer keys, while verify - by 74-13%.
# DSA performance improves by 115-30%.
+$output=pop;
+
if ($^O eq "hpux") {
$ADDP="addp4";
for (@ARGV) { $ADDP="add" if (/[\+DD|\-mlp]64/); }
@@ -846,6 +855,6 @@ copyright:
stringz "Montgomery multiplication for IA-64, CRYPTOGAMS by <appro\@openssl.org>"
___
-$output=shift and open STDOUT,">$output";
+open STDOUT,">$output" if $output;
print $code;
close STDOUT;
diff --git a/deps/openssl/openssl/crypto/bn/asm/ia64.S b/deps/openssl/openssl/crypto/bn/asm/ia64.S
index a9a42abfc3..f2404a3c1e 100644
--- a/deps/openssl/openssl/crypto/bn/asm/ia64.S
+++ b/deps/openssl/openssl/crypto/bn/asm/ia64.S
@@ -3,6 +3,13 @@
.ident "ia64.S, Version 2.1"
.ident "IA-64 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
+// Copyright 2001-2016 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the OpenSSL license (the "License"). You may not use
+// this file except in compliance with the License. You can obtain a copy
+// in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+
//
// ====================================================================
// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
@@ -22,7 +29,7 @@
// ports is the same, i.e. 2, while I need 4. In other words, to this
// module Itanium2 remains effectively as "wide" as Itanium. Yet it's
// essentially different in respect to this module, and a re-tune was
-// required. Well, because some intruction latencies has changed. Most
+// required. Well, because some instruction latencies has changed. Most
// noticeably those intensively used:
//
// Itanium Itanium2
@@ -363,7 +370,7 @@ bn_mul_words:
// The loop therefore spins at the latency of xma minus 1, or in other
// words at 6*(n+4) ticks:-( Compare to the "production" loop above
// that runs in 2*(n+11) where the low latency problem is worked around
-// by moving the dependency to one-tick latent interger ALU. Note that
+// by moving the dependency to one-tick latent integer ALU. Note that
// "distance" between ldf8 and xma is not latency of ldf8, but the
// *difference* between xma and ldf8 latencies.
.L_bn_mul_words_ctop:
@@ -425,7 +432,7 @@ bn_mul_add_words:
// version was performing *all* additions in IALU and was starving
// for those even on Itanium 2. In this version one addition is
// moved to FPU and is folded with multiplication. This is at cost
-// of propogating the result from previous call to this subroutine
+// of propagating the result from previous call to this subroutine
// to L2 cache... In other words negligible even for shorter keys.
// *Overall* performance improvement [over previous version] varies
// from 11 to 22 percent depending on key length.
@@ -495,7 +502,7 @@ bn_sqr_words:
// scalability. The decision will very likely be reconsidered after the
// benchmark program is profiled. I.e. if perfomance gain on Itanium
// will appear larger than loss on "wider" IA-64, then the loop should
-// be explicitely split and the epilogue compressed.
+// be explicitly split and the epilogue compressed.
.L_bn_sqr_words_ctop:
{ .mfi; (p16) ldf8 f32=[r33],8
(p25) xmpy.lu f42=f41,f41
diff --git a/deps/openssl/openssl/crypto/bn/asm/mips-mont.pl b/deps/openssl/openssl/crypto/bn/asm/mips-mont.pl
index a33cdf4111..a907571bec 100644
--- a/deps/openssl/openssl/crypto/bn/asm/mips-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/mips-mont.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -67,7 +74,7 @@ $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0x00fff000 : 0x00ff0000;
#
######################################################################
-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
if ($flavour =~ /64|n32/i) {
diff --git a/deps/openssl/openssl/crypto/bn/asm/mips.pl b/deps/openssl/openssl/crypto/bn/asm/mips.pl
index acafde5e56..420f01f3a4 100644
--- a/deps/openssl/openssl/crypto/bn/asm/mips.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/mips.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
@@ -15,7 +22,7 @@
# This is drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c.
#
# The module is designed to work with either of the "new" MIPS ABI(5),
-# namely N32 or N64, offered by IRIX 6.x. It's not ment to work under
+# namely N32 or N64, offered by IRIX 6.x. It's not meant to work under
# IRIX 5.x not only because it doesn't support new ABIs but also
# because 5.x kernels put R4x00 CPU into 32-bit mode and all those
# 64-bit instructions (daddu, dmultu, etc.) found below gonna only
@@ -49,7 +56,7 @@
# key length, more for longer keys.
$flavour = shift || "o32";
-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
if ($flavour =~ /64|n32/i) {
diff --git a/deps/openssl/openssl/crypto/bn/asm/mips3-mont.pl b/deps/openssl/openssl/crypto/bn/asm/mips3-mont.pl
deleted file mode 100644
index 8f9156e02a..0000000000
--- a/deps/openssl/openssl/crypto/bn/asm/mips3-mont.pl
+++ /dev/null
@@ -1,327 +0,0 @@
-#!/usr/bin/env perl
-#
-# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-
-# This module doesn't present direct interest for OpenSSL, because it
-# doesn't provide better performance for longer keys. While 512-bit
-# RSA private key operations are 40% faster, 1024-bit ones are hardly
-# faster at all, while longer key operations are slower by up to 20%.
-# It might be of interest to embedded system developers though, as
-# it's smaller than 1KB, yet offers ~3x improvement over compiler
-# generated code.
-#
-# The module targets N32 and N64 MIPS ABIs and currently is a bit
-# IRIX-centric, i.e. is likely to require adaptation for other OSes.
-
-# int bn_mul_mont(
-$rp="a0"; # BN_ULONG *rp,
-$ap="a1"; # const BN_ULONG *ap,
-$bp="a2"; # const BN_ULONG *bp,
-$np="a3"; # const BN_ULONG *np,
-$n0="a4"; # const BN_ULONG *n0,
-$num="a5"; # int num);
-
-$lo0="a6";
-$hi0="a7";
-$lo1="v0";
-$hi1="v1";
-$aj="t0";
-$bi="t1";
-$nj="t2";
-$tp="t3";
-$alo="s0";
-$ahi="s1";
-$nlo="s2";
-$nhi="s3";
-$tj="s4";
-$i="s5";
-$j="s6";
-$fp="t8";
-$m1="t9";
-
-$FRAME=8*(2+8);
-
-$code=<<___;
-#include <asm.h>
-#include <regdef.h>
-
-.text
-
-.set noat
-.set reorder
-
-.align 5
-.globl bn_mul_mont
-.ent bn_mul_mont
-bn_mul_mont:
- .set noreorder
- PTR_SUB sp,64
- move $fp,sp
- .frame $fp,64,ra
- slt AT,$num,4
- li v0,0
- beqzl AT,.Lproceed
- nop
- jr ra
- PTR_ADD sp,$fp,64
- .set reorder
-.align 5
-.Lproceed:
- ld $n0,0($n0)
- ld $bi,0($bp) # bp[0]
- ld $aj,0($ap) # ap[0]
- ld $nj,0($np) # np[0]
- PTR_SUB sp,16 # place for two extra words
- sll $num,3
- li AT,-4096
- PTR_SUB sp,$num
- and sp,AT
-
- sd s0,0($fp)
- sd s1,8($fp)
- sd s2,16($fp)
- sd s3,24($fp)
- sd s4,32($fp)
- sd s5,40($fp)
- sd s6,48($fp)
- sd s7,56($fp)
-
- dmultu $aj,$bi
- ld $alo,8($ap)
- ld $nlo,8($np)
- mflo $lo0
- mfhi $hi0
- dmultu $lo0,$n0
- mflo $m1
-
- dmultu $alo,$bi
- mflo $alo
- mfhi $ahi
-
- dmultu $nj,$m1
- mflo $lo1
- mfhi $hi1
- dmultu $nlo,$m1
- daddu $lo1,$lo0
- sltu AT,$lo1,$lo0
- daddu $hi1,AT
- mflo $nlo
- mfhi $nhi
-
- move $tp,sp
- li $j,16
-.align 4
-.L1st:
- .set noreorder
- PTR_ADD $aj,$ap,$j
- ld $aj,($aj)
- PTR_ADD $nj,$np,$j
- ld $nj,($nj)
-
- dmultu $aj,$bi
- daddu $lo0,$alo,$hi0
- daddu $lo1,$nlo,$hi1
- sltu AT,$lo0,$hi0
- sltu s7,$lo1,$hi1
- daddu $hi0,$ahi,AT
- daddu $hi1,$nhi,s7
- mflo $alo
- mfhi $ahi
-
- daddu $lo1,$lo0
- sltu AT,$lo1,$lo0
- dmultu $nj,$m1
- daddu $hi1,AT
- addu $j,8
- sd $lo1,($tp)
- sltu s7,$j,$num
- mflo $nlo
- mfhi $nhi
-
- bnez s7,.L1st
- PTR_ADD $tp,8
- .set reorder
-
- daddu $lo0,$alo,$hi0
- sltu AT,$lo0,$hi0
- daddu $hi0,$ahi,AT
-
- daddu $lo1,$nlo,$hi1
- sltu s7,$lo1,$hi1
- daddu $hi1,$nhi,s7
- daddu $lo1,$lo0
- sltu AT,$lo1,$lo0
- daddu $hi1,AT
-
- sd $lo1,($tp)
-
- daddu $hi1,$hi0
- sltu AT,$hi1,$hi0
- sd $hi1,8($tp)
- sd AT,16($tp)
-
- li $i,8
-.align 4
-.Louter:
- PTR_ADD $bi,$bp,$i
- ld $bi,($bi)
- ld $aj,($ap)
- ld $alo,8($ap)
- ld $tj,(sp)
-
- dmultu $aj,$bi
- ld $nj,($np)
- ld $nlo,8($np)
- mflo $lo0
- mfhi $hi0
- daddu $lo0,$tj
- dmultu $lo0,$n0
- sltu AT,$lo0,$tj
- daddu $hi0,AT
- mflo $m1
-
- dmultu $alo,$bi
- mflo $alo
- mfhi $ahi
-
- dmultu $nj,$m1
- mflo $lo1
- mfhi $hi1
-
- dmultu $nlo,$m1
- daddu $lo1,$lo0
- sltu AT,$lo1,$lo0
- daddu $hi1,AT
- mflo $nlo
- mfhi $nhi
-
- move $tp,sp
- li $j,16
- ld $tj,8($tp)
-.align 4
-.Linner:
- .set noreorder
- PTR_ADD $aj,$ap,$j
- ld $aj,($aj)
- PTR_ADD $nj,$np,$j
- ld $nj,($nj)
-
- dmultu $aj,$bi
- daddu $lo0,$alo,$hi0
- daddu $lo1,$nlo,$hi1
- sltu AT,$lo0,$hi0
- sltu s7,$lo1,$hi1
- daddu $hi0,$ahi,AT
- daddu $hi1,$nhi,s7
- mflo $alo
- mfhi $ahi
-
- daddu $lo0,$tj
- addu $j,8
- dmultu $nj,$m1
- sltu AT,$lo0,$tj
- daddu $lo1,$lo0
- daddu $hi0,AT
- sltu s7,$lo1,$lo0
- ld $tj,16($tp)
- daddu $hi1,s7
- sltu AT,$j,$num
- mflo $nlo
- mfhi $nhi
- sd $lo1,($tp)
- bnez AT,.Linner
- PTR_ADD $tp,8
- .set reorder
-
- daddu $lo0,$alo,$hi0
- sltu AT,$lo0,$hi0
- daddu $hi0,$ahi,AT
- daddu $lo0,$tj
- sltu s7,$lo0,$tj
- daddu $hi0,s7
-
- ld $tj,16($tp)
- daddu $lo1,$nlo,$hi1
- sltu AT,$lo1,$hi1
- daddu $hi1,$nhi,AT
- daddu $lo1,$lo0
- sltu s7,$lo1,$lo0
- daddu $hi1,s7
- sd $lo1,($tp)
-
- daddu $lo1,$hi1,$hi0
- sltu $hi1,$lo1,$hi0
- daddu $lo1,$tj
- sltu AT,$lo1,$tj
- daddu $hi1,AT
- sd $lo1,8($tp)
- sd $hi1,16($tp)
-
- addu $i,8
- sltu s7,$i,$num
- bnez s7,.Louter
-
- .set noreorder
- PTR_ADD $tj,sp,$num # &tp[num]
- move $tp,sp
- move $ap,sp
- li $hi0,0 # clear borrow bit
-
-.align 4
-.Lsub: ld $lo0,($tp)
- ld $lo1,($np)
- PTR_ADD $tp,8
- PTR_ADD $np,8
- dsubu $lo1,$lo0,$lo1 # tp[i]-np[i]
- sgtu AT,$lo1,$lo0
- dsubu $lo0,$lo1,$hi0
- sgtu $hi0,$lo0,$lo1
- sd $lo0,($rp)
- or $hi0,AT
- sltu AT,$tp,$tj
- bnez AT,.Lsub
- PTR_ADD $rp,8
-
- dsubu $hi0,$hi1,$hi0 # handle upmost overflow bit
- move $tp,sp
- PTR_SUB $rp,$num # restore rp
- not $hi1,$hi0
-
- and $ap,$hi0,sp
- and $bp,$hi1,$rp
- or $ap,$ap,$bp # ap=borrow?tp:rp
-
-.align 4
-.Lcopy: ld $aj,($ap)
- PTR_ADD $ap,8
- PTR_ADD $tp,8
- sd zero,-8($tp)
- sltu AT,$tp,$tj
- sd $aj,($rp)
- bnez AT,.Lcopy
- PTR_ADD $rp,8
-
- ld s0,0($fp)
- ld s1,8($fp)
- ld s2,16($fp)
- ld s3,24($fp)
- ld s4,32($fp)
- ld s5,40($fp)
- ld s6,48($fp)
- ld s7,56($fp)
- li v0,1
- jr ra
- PTR_ADD sp,$fp,64
- .set reorder
-END(bn_mul_mont)
-.rdata
-.asciiz "Montgomery Multiplication for MIPS III/IV, CRYPTOGAMS by <appro\@openssl.org>"
-___
-
-print $code;
-close STDOUT;
diff --git a/deps/openssl/openssl/crypto/bn/asm/mips3.s b/deps/openssl/openssl/crypto/bn/asm/mips3.s
deleted file mode 100644
index dca4105c7d..0000000000
--- a/deps/openssl/openssl/crypto/bn/asm/mips3.s
+++ /dev/null
@@ -1,2201 +0,0 @@
-.rdata
-.asciiz "mips3.s, Version 1.1"
-.asciiz "MIPS III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
-
-/*
- * ====================================================================
- * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
- * project.
- *
- * Rights for redistribution and usage in source and binary forms are
- * granted according to the OpenSSL license. Warranty of any kind is
- * disclaimed.
- * ====================================================================
- */
-
-/*
- * This is my modest contributon to the OpenSSL project (see
- * http://www.openssl.org/ for more information about it) and is
- * a drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c
- * module. For updates see http://fy.chalmers.se/~appro/hpe/.
- *
- * The module is designed to work with either of the "new" MIPS ABI(5),
- * namely N32 or N64, offered by IRIX 6.x. It's not ment to work under
- * IRIX 5.x not only because it doesn't support new ABIs but also
- * because 5.x kernels put R4x00 CPU into 32-bit mode and all those
- * 64-bit instructions (daddu, dmultu, etc.) found below gonna only
- * cause illegal instruction exception:-(
- *
- * In addition the code depends on preprocessor flags set up by MIPSpro
- * compiler driver (either as or cc) and therefore (probably?) can't be
- * compiled by the GNU assembler. GNU C driver manages fine though...
- * I mean as long as -mmips-as is specified or is the default option,
- * because then it simply invokes /usr/bin/as which in turn takes
- * perfect care of the preprocessor definitions. Another neat feature
- * offered by the MIPSpro assembler is an optimization pass. This gave
- * me the opportunity to have the code looking more regular as all those
- * architecture dependent instruction rescheduling details were left to
- * the assembler. Cool, huh?
- *
- * Performance improvement is astonishing! 'apps/openssl speed rsa dsa'
- * goes way over 3 times faster!
- *
- * <appro@fy.chalmers.se>
- */
-#include <asm.h>
-#include <regdef.h>
-
-#if _MIPS_ISA>=4
-#define MOVNZ(cond,dst,src) \
- movn dst,src,cond
-#else
-#define MOVNZ(cond,dst,src) \
- .set noreorder; \
- bnezl cond,.+8; \
- move dst,src; \
- .set reorder
-#endif
-
-.text
-
-.set noat
-.set reorder
-
-#define MINUS4 v1
-
-.align 5
-LEAF(bn_mul_add_words)
- .set noreorder
- bgtzl a2,.L_bn_mul_add_words_proceed
- ld t0,0(a1)
- jr ra
- move v0,zero
- .set reorder
-
-.L_bn_mul_add_words_proceed:
- li MINUS4,-4
- and ta0,a2,MINUS4
- move v0,zero
- beqz ta0,.L_bn_mul_add_words_tail
-
-.L_bn_mul_add_words_loop:
- dmultu t0,a3
- ld t1,0(a0)
- ld t2,8(a1)
- ld t3,8(a0)
- ld ta0,16(a1)
- ld ta1,16(a0)
- daddu t1,v0
- sltu v0,t1,v0 /* All manuals say it "compares 32-bit
- * values", but it seems to work fine
- * even on 64-bit registers. */
- mflo AT
- mfhi t0
- daddu t1,AT
- daddu v0,t0
- sltu AT,t1,AT
- sd t1,0(a0)
- daddu v0,AT
-
- dmultu t2,a3
- ld ta2,24(a1)
- ld ta3,24(a0)
- daddu t3,v0
- sltu v0,t3,v0
- mflo AT
- mfhi t2
- daddu t3,AT
- daddu v0,t2
- sltu AT,t3,AT
- sd t3,8(a0)
- daddu v0,AT
-
- dmultu ta0,a3
- subu a2,4
- PTR_ADD a0,32
- PTR_ADD a1,32
- daddu ta1,v0
- sltu v0,ta1,v0
- mflo AT
- mfhi ta0
- daddu ta1,AT
- daddu v0,ta0
- sltu AT,ta1,AT
- sd ta1,-16(a0)
- daddu v0,AT
-
-
- dmultu ta2,a3
- and ta0,a2,MINUS4
- daddu ta3,v0
- sltu v0,ta3,v0
- mflo AT
- mfhi ta2
- daddu ta3,AT
- daddu v0,ta2
- sltu AT,ta3,AT
- sd ta3,-8(a0)
- daddu v0,AT
- .set noreorder
- bgtzl ta0,.L_bn_mul_add_words_loop
- ld t0,0(a1)
-
- bnezl a2,.L_bn_mul_add_words_tail
- ld t0,0(a1)
- .set reorder
-
-.L_bn_mul_add_words_return:
- jr ra
-
-.L_bn_mul_add_words_tail:
- dmultu t0,a3
- ld t1,0(a0)
- subu a2,1
- daddu t1,v0
- sltu v0,t1,v0
- mflo AT
- mfhi t0
- daddu t1,AT
- daddu v0,t0
- sltu AT,t1,AT
- sd t1,0(a0)
- daddu v0,AT
- beqz a2,.L_bn_mul_add_words_return
-
- ld t0,8(a1)
- dmultu t0,a3
- ld t1,8(a0)
- subu a2,1
- daddu t1,v0
- sltu v0,t1,v0
- mflo AT
- mfhi t0
- daddu t1,AT
- daddu v0,t0
- sltu AT,t1,AT
- sd t1,8(a0)
- daddu v0,AT
- beqz a2,.L_bn_mul_add_words_return
-
- ld t0,16(a1)
- dmultu t0,a3
- ld t1,16(a0)
- daddu t1,v0
- sltu v0,t1,v0
- mflo AT
- mfhi t0
- daddu t1,AT
- daddu v0,t0
- sltu AT,t1,AT
- sd t1,16(a0)
- daddu v0,AT
- jr ra
-END(bn_mul_add_words)
-
-.align 5
-LEAF(bn_mul_words)
- .set noreorder
- bgtzl a2,.L_bn_mul_words_proceed
- ld t0,0(a1)
- jr ra
- move v0,zero
- .set reorder
-
-.L_bn_mul_words_proceed:
- li MINUS4,-4
- and ta0,a2,MINUS4
- move v0,zero
- beqz ta0,.L_bn_mul_words_tail
-
-.L_bn_mul_words_loop:
- dmultu t0,a3
- ld t2,8(a1)
- ld ta0,16(a1)
- ld ta2,24(a1)
- mflo AT
- mfhi t0
- daddu v0,AT
- sltu t1,v0,AT
- sd v0,0(a0)
- daddu v0,t1,t0
-
- dmultu t2,a3
- subu a2,4
- PTR_ADD a0,32
- PTR_ADD a1,32
- mflo AT
- mfhi t2
- daddu v0,AT
- sltu t3,v0,AT
- sd v0,-24(a0)
- daddu v0,t3,t2
-
- dmultu ta0,a3
- mflo AT
- mfhi ta0
- daddu v0,AT
- sltu ta1,v0,AT
- sd v0,-16(a0)
- daddu v0,ta1,ta0
-
-
- dmultu ta2,a3
- and ta0,a2,MINUS4
- mflo AT
- mfhi ta2
- daddu v0,AT
- sltu ta3,v0,AT
- sd v0,-8(a0)
- daddu v0,ta3,ta2
- .set noreorder
- bgtzl ta0,.L_bn_mul_words_loop
- ld t0,0(a1)
-
- bnezl a2,.L_bn_mul_words_tail
- ld t0,0(a1)
- .set reorder
-
-.L_bn_mul_words_return:
- jr ra
-
-.L_bn_mul_words_tail:
- dmultu t0,a3
- subu a2,1
- mflo AT
- mfhi t0
- daddu v0,AT
- sltu t1,v0,AT
- sd v0,0(a0)
- daddu v0,t1,t0
- beqz a2,.L_bn_mul_words_return
-
- ld t0,8(a1)
- dmultu t0,a3
- subu a2,1
- mflo AT
- mfhi t0
- daddu v0,AT
- sltu t1,v0,AT
- sd v0,8(a0)
- daddu v0,t1,t0
- beqz a2,.L_bn_mul_words_return
-
- ld t0,16(a1)
- dmultu t0,a3
- mflo AT
- mfhi t0
- daddu v0,AT
- sltu t1,v0,AT
- sd v0,16(a0)
- daddu v0,t1,t0
- jr ra
-END(bn_mul_words)
-
-.align 5
-LEAF(bn_sqr_words)
- .set noreorder
- bgtzl a2,.L_bn_sqr_words_proceed
- ld t0,0(a1)
- jr ra
- move v0,zero
- .set reorder
-
-.L_bn_sqr_words_proceed:
- li MINUS4,-4
- and ta0,a2,MINUS4
- move v0,zero
- beqz ta0,.L_bn_sqr_words_tail
-
-.L_bn_sqr_words_loop:
- dmultu t0,t0
- ld t2,8(a1)
- ld ta0,16(a1)
- ld ta2,24(a1)
- mflo t1
- mfhi t0
- sd t1,0(a0)
- sd t0,8(a0)
-
- dmultu t2,t2
- subu a2,4
- PTR_ADD a0,64
- PTR_ADD a1,32
- mflo t3
- mfhi t2
- sd t3,-48(a0)
- sd t2,-40(a0)
-
- dmultu ta0,ta0
- mflo ta1
- mfhi ta0
- sd ta1,-32(a0)
- sd ta0,-24(a0)
-
-
- dmultu ta2,ta2
- and ta0,a2,MINUS4
- mflo ta3
- mfhi ta2
- sd ta3,-16(a0)
- sd ta2,-8(a0)
-
- .set noreorder
- bgtzl ta0,.L_bn_sqr_words_loop
- ld t0,0(a1)
-
- bnezl a2,.L_bn_sqr_words_tail
- ld t0,0(a1)
- .set reorder
-
-.L_bn_sqr_words_return:
- move v0,zero
- jr ra
-
-.L_bn_sqr_words_tail:
- dmultu t0,t0
- subu a2,1
- mflo t1
- mfhi t0
- sd t1,0(a0)
- sd t0,8(a0)
- beqz a2,.L_bn_sqr_words_return
-
- ld t0,8(a1)
- dmultu t0,t0
- subu a2,1
- mflo t1
- mfhi t0
- sd t1,16(a0)
- sd t0,24(a0)
- beqz a2,.L_bn_sqr_words_return
-
- ld t0,16(a1)
- dmultu t0,t0
- mflo t1
- mfhi t0
- sd t1,32(a0)
- sd t0,40(a0)
- jr ra
-END(bn_sqr_words)
-
-.align 5
-LEAF(bn_add_words)
- .set noreorder
- bgtzl a3,.L_bn_add_words_proceed
- ld t0,0(a1)
- jr ra
- move v0,zero
- .set reorder
-
-.L_bn_add_words_proceed:
- li MINUS4,-4
- and AT,a3,MINUS4
- move v0,zero
- beqz AT,.L_bn_add_words_tail
-
-.L_bn_add_words_loop:
- ld ta0,0(a2)
- subu a3,4
- ld t1,8(a1)
- and AT,a3,MINUS4
- ld t2,16(a1)
- PTR_ADD a2,32
- ld t3,24(a1)
- PTR_ADD a0,32
- ld ta1,-24(a2)
- PTR_ADD a1,32
- ld ta2,-16(a2)
- ld ta3,-8(a2)
- daddu ta0,t0
- sltu t8,ta0,t0
- daddu t0,ta0,v0
- sltu v0,t0,ta0
- sd t0,-32(a0)
- daddu v0,t8
-
- daddu ta1,t1
- sltu t9,ta1,t1
- daddu t1,ta1,v0
- sltu v0,t1,ta1
- sd t1,-24(a0)
- daddu v0,t9
-
- daddu ta2,t2
- sltu t8,ta2,t2
- daddu t2,ta2,v0
- sltu v0,t2,ta2
- sd t2,-16(a0)
- daddu v0,t8
-
- daddu ta3,t3
- sltu t9,ta3,t3
- daddu t3,ta3,v0
- sltu v0,t3,ta3
- sd t3,-8(a0)
- daddu v0,t9
-
- .set noreorder
- bgtzl AT,.L_bn_add_words_loop
- ld t0,0(a1)
-
- bnezl a3,.L_bn_add_words_tail
- ld t0,0(a1)
- .set reorder
-
-.L_bn_add_words_return:
- jr ra
-
-.L_bn_add_words_tail:
- ld ta0,0(a2)
- daddu ta0,t0
- subu a3,1
- sltu t8,ta0,t0
- daddu t0,ta0,v0
- sltu v0,t0,ta0
- sd t0,0(a0)
- daddu v0,t8
- beqz a3,.L_bn_add_words_return
-
- ld t1,8(a1)
- ld ta1,8(a2)
- daddu ta1,t1
- subu a3,1
- sltu t9,ta1,t1
- daddu t1,ta1,v0
- sltu v0,t1,ta1
- sd t1,8(a0)
- daddu v0,t9
- beqz a3,.L_bn_add_words_return
-
- ld t2,16(a1)
- ld ta2,16(a2)
- daddu ta2,t2
- sltu t8,ta2,t2
- daddu t2,ta2,v0
- sltu v0,t2,ta2
- sd t2,16(a0)
- daddu v0,t8
- jr ra
-END(bn_add_words)
-
-.align 5
-LEAF(bn_sub_words)
- .set noreorder
- bgtzl a3,.L_bn_sub_words_proceed
- ld t0,0(a1)
- jr ra
- move v0,zero
- .set reorder
-
-.L_bn_sub_words_proceed:
- li MINUS4,-4
- and AT,a3,MINUS4
- move v0,zero
- beqz AT,.L_bn_sub_words_tail
-
-.L_bn_sub_words_loop:
- ld ta0,0(a2)
- subu a3,4
- ld t1,8(a1)
- and AT,a3,MINUS4
- ld t2,16(a1)
- PTR_ADD a2,32
- ld t3,24(a1)
- PTR_ADD a0,32
- ld ta1,-24(a2)
- PTR_ADD a1,32
- ld ta2,-16(a2)
- ld ta3,-8(a2)
- sltu t8,t0,ta0
- dsubu t0,ta0
- dsubu ta0,t0,v0
- sd ta0,-32(a0)
- MOVNZ (t0,v0,t8)
-
- sltu t9,t1,ta1
- dsubu t1,ta1
- dsubu ta1,t1,v0
- sd ta1,-24(a0)
- MOVNZ (t1,v0,t9)
-
-
- sltu t8,t2,ta2
- dsubu t2,ta2
- dsubu ta2,t2,v0
- sd ta2,-16(a0)
- MOVNZ (t2,v0,t8)
-
- sltu t9,t3,ta3
- dsubu t3,ta3
- dsubu ta3,t3,v0
- sd ta3,-8(a0)
- MOVNZ (t3,v0,t9)
-
- .set noreorder
- bgtzl AT,.L_bn_sub_words_loop
- ld t0,0(a1)
-
- bnezl a3,.L_bn_sub_words_tail
- ld t0,0(a1)
- .set reorder
-
-.L_bn_sub_words_return:
- jr ra
-
-.L_bn_sub_words_tail:
- ld ta0,0(a2)
- subu a3,1
- sltu t8,t0,ta0
- dsubu t0,ta0
- dsubu ta0,t0,v0
- MOVNZ (t0,v0,t8)
- sd ta0,0(a0)
- beqz a3,.L_bn_sub_words_return
-
- ld t1,8(a1)
- subu a3,1
- ld ta1,8(a2)
- sltu t9,t1,ta1
- dsubu t1,ta1
- dsubu ta1,t1,v0
- MOVNZ (t1,v0,t9)
- sd ta1,8(a0)
- beqz a3,.L_bn_sub_words_return
-
- ld t2,16(a1)
- ld ta2,16(a2)
- sltu t8,t2,ta2
- dsubu t2,ta2
- dsubu ta2,t2,v0
- MOVNZ (t2,v0,t8)
- sd ta2,16(a0)
- jr ra
-END(bn_sub_words)
-
-#undef MINUS4
-
-.align 5
-LEAF(bn_div_3_words)
- .set reorder
- move a3,a0 /* we know that bn_div_words doesn't
- * touch a3, ta2, ta3 and preserves a2
- * so that we can save two arguments
- * and return address in registers
- * instead of stack:-)
- */
- ld a0,(a3)
- move ta2,a1
- ld a1,-8(a3)
- bne a0,a2,.L_bn_div_3_words_proceed
- li v0,-1
- jr ra
-.L_bn_div_3_words_proceed:
- move ta3,ra
- bal bn_div_words
- move ra,ta3
- dmultu ta2,v0
- ld t2,-16(a3)
- move ta0,zero
- mfhi t1
- mflo t0
- sltu t8,t1,v1
-.L_bn_div_3_words_inner_loop:
- bnez t8,.L_bn_div_3_words_inner_loop_done
- sgeu AT,t2,t0
- seq t9,t1,v1
- and AT,t9
- sltu t3,t0,ta2
- daddu v1,a2
- dsubu t1,t3
- dsubu t0,ta2
- sltu t8,t1,v1
- sltu ta0,v1,a2
- or t8,ta0
- .set noreorder
- beqzl AT,.L_bn_div_3_words_inner_loop
- dsubu v0,1
- .set reorder
-.L_bn_div_3_words_inner_loop_done:
- jr ra
-END(bn_div_3_words)
-
-.align 5
-LEAF(bn_div_words)
- .set noreorder
- bnezl a2,.L_bn_div_words_proceed
- move v1,zero
- jr ra
- li v0,-1 /* I'd rather signal div-by-zero
- * which can be done with 'break 7' */
-
-.L_bn_div_words_proceed:
- bltz a2,.L_bn_div_words_body
- move t9,v1
- dsll a2,1
- bgtz a2,.-4
- addu t9,1
-
- .set reorder
- negu t1,t9
- li t2,-1
- dsll t2,t1
- and t2,a0
- dsrl AT,a1,t1
- .set noreorder
- bnezl t2,.+8
- break 6 /* signal overflow */
- .set reorder
- dsll a0,t9
- dsll a1,t9
- or a0,AT
-
-#define QT ta0
-#define HH ta1
-#define DH v1
-.L_bn_div_words_body:
- dsrl DH,a2,32
- sgeu AT,a0,a2
- .set noreorder
- bnezl AT,.+8
- dsubu a0,a2
- .set reorder
-
- li QT,-1
- dsrl HH,a0,32
- dsrl QT,32 /* q=0xffffffff */
- beq DH,HH,.L_bn_div_words_skip_div1
- ddivu zero,a0,DH
- mflo QT
-.L_bn_div_words_skip_div1:
- dmultu a2,QT
- dsll t3,a0,32
- dsrl AT,a1,32
- or t3,AT
- mflo t0
- mfhi t1
-.L_bn_div_words_inner_loop1:
- sltu t2,t3,t0
- seq t8,HH,t1
- sltu AT,HH,t1
- and t2,t8
- sltu v0,t0,a2
- or AT,t2
- .set noreorder
- beqz AT,.L_bn_div_words_inner_loop1_done
- dsubu t1,v0
- dsubu t0,a2
- b .L_bn_div_words_inner_loop1
- dsubu QT,1
- .set reorder
-.L_bn_div_words_inner_loop1_done:
-
- dsll a1,32
- dsubu a0,t3,t0
- dsll v0,QT,32
-
- li QT,-1
- dsrl HH,a0,32
- dsrl QT,32 /* q=0xffffffff */
- beq DH,HH,.L_bn_div_words_skip_div2
- ddivu zero,a0,DH
- mflo QT
-.L_bn_div_words_skip_div2:
-#undef DH
- dmultu a2,QT
- dsll t3,a0,32
- dsrl AT,a1,32
- or t3,AT
- mflo t0
- mfhi t1
-.L_bn_div_words_inner_loop2:
- sltu t2,t3,t0
- seq t8,HH,t1
- sltu AT,HH,t1
- and t2,t8
- sltu v1,t0,a2
- or AT,t2
- .set noreorder
- beqz AT,.L_bn_div_words_inner_loop2_done
- dsubu t1,v1
- dsubu t0,a2
- b .L_bn_div_words_inner_loop2
- dsubu QT,1
- .set reorder
-.L_bn_div_words_inner_loop2_done:
-#undef HH
-
- dsubu a0,t3,t0
- or v0,QT
- dsrl v1,a0,t9 /* v1 contains remainder if anybody wants it */
- dsrl a2,t9 /* restore a2 */
- jr ra
-#undef QT
-END(bn_div_words)
-
-#define a_0 t0
-#define a_1 t1
-#define a_2 t2
-#define a_3 t3
-#define b_0 ta0
-#define b_1 ta1
-#define b_2 ta2
-#define b_3 ta3
-
-#define a_4 s0
-#define a_5 s2
-#define a_6 s4
-#define a_7 a1 /* once we load a[7] we don't need a anymore */
-#define b_4 s1
-#define b_5 s3
-#define b_6 s5
-#define b_7 a2 /* once we load b[7] we don't need b anymore */
-
-#define t_1 t8
-#define t_2 t9
-
-#define c_1 v0
-#define c_2 v1
-#define c_3 a3
-
-#define FRAME_SIZE 48
-
-.align 5
-LEAF(bn_mul_comba8)
- .set noreorder
- PTR_SUB sp,FRAME_SIZE
- .frame sp,64,ra
- .set reorder
- ld a_0,0(a1) /* If compiled with -mips3 option on
- * R5000 box assembler barks on this
- * line with "shouldn't have mult/div
- * as last instruction in bb (R10K
- * bug)" warning. If anybody out there
- * has a clue about how to circumvent
- * this do send me a note.
- * <appro@fy.chalmers.se>
- */
- ld b_0,0(a2)
- ld a_1,8(a1)
- ld a_2,16(a1)
- ld a_3,24(a1)
- ld b_1,8(a2)
- ld b_2,16(a2)
- ld b_3,24(a2)
- dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
- sd s0,0(sp)
- sd s1,8(sp)
- sd s2,16(sp)
- sd s3,24(sp)
- sd s4,32(sp)
- sd s5,40(sp)
- mflo c_1
- mfhi c_2
-
- dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */
- ld a_4,32(a1)
- ld a_5,40(a1)
- ld a_6,48(a1)
- ld a_7,56(a1)
- ld b_4,32(a2)
- ld b_5,40(a2)
- mflo t_1
- mfhi t_2
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu c_3,t_2,AT
- dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */
- ld b_6,48(a2)
- ld b_7,56(a2)
- sd c_1,0(a0) /* r[0]=c1; */
- mflo t_1
- mfhi t_2
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu c_1,c_3,t_2
- sd c_2,8(a0) /* r[1]=c2; */
-
- dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu c_2,c_1,t_2
- dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- sd c_3,16(a0) /* r[2]=c3; */
-
- dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu c_3,c_2,t_2
- dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- sd c_1,24(a0) /* r[3]=c1; */
-
- dmultu a_4,b_0 /* mul_add_c(a[4],b[0],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu c_1,c_3,t_2
- dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- dmultu a_0,b_4 /* mul_add_c(a[0],b[4],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- sd c_2,32(a0) /* r[4]=c2; */
-
- dmultu a_0,b_5 /* mul_add_c(a[0],b[5],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu c_2,c_1,t_2
- dmultu a_1,b_4 /* mul_add_c(a[1],b[4],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- dmultu a_4,b_1 /* mul_add_c(a[4],b[1],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- dmultu a_5,b_0 /* mul_add_c(a[5],b[0],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- sd c_3,40(a0) /* r[5]=c3; */
-
- dmultu a_6,b_0 /* mul_add_c(a[6],b[0],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu c_3,c_2,t_2
- dmultu a_5,b_1 /* mul_add_c(a[5],b[1],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- dmultu a_4,b_2 /* mul_add_c(a[4],b[2],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- dmultu a_2,b_4 /* mul_add_c(a[2],b[4],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- dmultu a_1,b_5 /* mul_add_c(a[1],b[5],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- dmultu a_0,b_6 /* mul_add_c(a[0],b[6],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- sd c_1,48(a0) /* r[6]=c1; */
-
- dmultu a_0,b_7 /* mul_add_c(a[0],b[7],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu c_1,c_3,t_2
- dmultu a_1,b_6 /* mul_add_c(a[1],b[6],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- dmultu a_2,b_5 /* mul_add_c(a[2],b[5],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- dmultu a_3,b_4 /* mul_add_c(a[3],b[4],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- dmultu a_4,b_3 /* mul_add_c(a[4],b[3],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- dmultu a_5,b_2 /* mul_add_c(a[5],b[2],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- dmultu a_6,b_1 /* mul_add_c(a[6],b[1],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- dmultu a_7,b_0 /* mul_add_c(a[7],b[0],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- sd c_2,56(a0) /* r[7]=c2; */
-
- dmultu a_7,b_1 /* mul_add_c(a[7],b[1],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu c_2,c_1,t_2
- dmultu a_6,b_2 /* mul_add_c(a[6],b[2],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- dmultu a_5,b_3 /* mul_add_c(a[5],b[3],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- dmultu a_4,b_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- dmultu a_3,b_5 /* mul_add_c(a[3],b[5],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- dmultu a_2,b_6 /* mul_add_c(a[2],b[6],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- dmultu a_1,b_7 /* mul_add_c(a[1],b[7],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- sd c_3,64(a0) /* r[8]=c3; */
-
- dmultu a_2,b_7 /* mul_add_c(a[2],b[7],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu c_3,c_2,t_2
- dmultu a_3,b_6 /* mul_add_c(a[3],b[6],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- dmultu a_4,b_5 /* mul_add_c(a[4],b[5],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- dmultu a_5,b_4 /* mul_add_c(a[5],b[4],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- dmultu a_6,b_3 /* mul_add_c(a[6],b[3],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- dmultu a_7,b_2 /* mul_add_c(a[7],b[2],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- sd c_1,72(a0) /* r[9]=c1; */
-
- dmultu a_7,b_3 /* mul_add_c(a[7],b[3],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu c_1,c_3,t_2
- dmultu a_6,b_4 /* mul_add_c(a[6],b[4],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- dmultu a_5,b_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- dmultu a_4,b_6 /* mul_add_c(a[4],b[6],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- dmultu a_3,b_7 /* mul_add_c(a[3],b[7],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- sd c_2,80(a0) /* r[10]=c2; */
-
- dmultu a_4,b_7 /* mul_add_c(a[4],b[7],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu c_2,c_1,t_2
- dmultu a_5,b_6 /* mul_add_c(a[5],b[6],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- dmultu a_6,b_5 /* mul_add_c(a[6],b[5],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- dmultu a_7,b_4 /* mul_add_c(a[7],b[4],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- sd c_3,88(a0) /* r[11]=c3; */
-
- dmultu a_7,b_5 /* mul_add_c(a[7],b[5],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu c_3,c_2,t_2
- dmultu a_6,b_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- dmultu a_5,b_7 /* mul_add_c(a[5],b[7],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- sd c_1,96(a0) /* r[12]=c1; */
-
- dmultu a_6,b_7 /* mul_add_c(a[6],b[7],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu c_1,c_3,t_2
- dmultu a_7,b_6 /* mul_add_c(a[7],b[6],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- sd c_2,104(a0) /* r[13]=c2; */
-
- dmultu a_7,b_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */
- ld s0,0(sp)
- ld s1,8(sp)
- ld s2,16(sp)
- ld s3,24(sp)
- ld s4,32(sp)
- ld s5,40(sp)
- mflo t_1
- mfhi t_2
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sd c_3,112(a0) /* r[14]=c3; */
- sd c_1,120(a0) /* r[15]=c1; */
-
- PTR_ADD sp,FRAME_SIZE
-
- jr ra
-END(bn_mul_comba8)
-
-.align 5
-LEAF(bn_mul_comba4)
- .set reorder
- ld a_0,0(a1)
- ld b_0,0(a2)
- ld a_1,8(a1)
- ld a_2,16(a1)
- dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
- ld a_3,24(a1)
- ld b_1,8(a2)
- ld b_2,16(a2)
- ld b_3,24(a2)
- mflo c_1
- mfhi c_2
- sd c_1,0(a0)
-
- dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu c_3,t_2,AT
- dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu c_1,c_3,t_2
- sd c_2,8(a0)
-
- dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu c_2,c_1,t_2
- dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- sd c_3,16(a0)
-
- dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu c_3,c_2,t_2
- dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- sd c_1,24(a0)
-
- dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu c_1,c_3,t_2
- dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- sd c_2,32(a0)
-
- dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu c_2,c_1,t_2
- dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- sd c_3,40(a0)
-
- dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sd c_1,48(a0)
- sd c_2,56(a0)
-
- jr ra
-END(bn_mul_comba4)
-
-#undef a_4
-#undef a_5
-#undef a_6
-#undef a_7
-#define a_4 b_0
-#define a_5 b_1
-#define a_6 b_2
-#define a_7 b_3
-
-.align 5
-LEAF(bn_sqr_comba8)
- .set reorder
- ld a_0,0(a1)
- ld a_1,8(a1)
- ld a_2,16(a1)
- ld a_3,24(a1)
-
- dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
- ld a_4,32(a1)
- ld a_5,40(a1)
- ld a_6,48(a1)
- ld a_7,56(a1)
- mflo c_1
- mfhi c_2
- sd c_1,0(a0)
-
- dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- slt c_1,t_2,zero
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu c_3,t_2,AT
- sd c_2,8(a0)
-
- dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- slt c_2,t_2,zero
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- sd c_3,16(a0)
-
- dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- slt c_3,t_2,zero
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- dmultu a_1,a_2 /* mul_add_c2(a[1],b[2],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- slt AT,t_2,zero
- daddu c_3,AT
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- sd c_1,24(a0)
-
- dmultu a_4,a_0 /* mul_add_c2(a[4],b[0],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- slt c_1,t_2,zero
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- slt AT,t_2,zero
- daddu c_1,AT
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- sd c_2,32(a0)
-
- dmultu a_0,a_5 /* mul_add_c2(a[0],b[5],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- slt c_2,t_2,zero
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- dmultu a_1,a_4 /* mul_add_c2(a[1],b[4],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- slt AT,t_2,zero
- daddu c_2,AT
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- slt AT,t_2,zero
- daddu c_2,AT
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- sd c_3,40(a0)
-
- dmultu a_6,a_0 /* mul_add_c2(a[6],b[0],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- slt c_3,t_2,zero
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- dmultu a_5,a_1 /* mul_add_c2(a[5],b[1],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- slt AT,t_2,zero
- daddu c_3,AT
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- dmultu a_4,a_2 /* mul_add_c2(a[4],b[2],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- slt AT,t_2,zero
- daddu c_3,AT
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- sd c_1,48(a0)
-
- dmultu a_0,a_7 /* mul_add_c2(a[0],b[7],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- slt c_1,t_2,zero
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- dmultu a_1,a_6 /* mul_add_c2(a[1],b[6],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- slt AT,t_2,zero
- daddu c_1,AT
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- dmultu a_2,a_5 /* mul_add_c2(a[2],b[5],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- slt AT,t_2,zero
- daddu c_1,AT
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- dmultu a_3,a_4 /* mul_add_c2(a[3],b[4],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- slt AT,t_2,zero
- daddu c_1,AT
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- sd c_2,56(a0)
-
- dmultu a_7,a_1 /* mul_add_c2(a[7],b[1],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- slt c_2,t_2,zero
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- dmultu a_6,a_2 /* mul_add_c2(a[6],b[2],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- slt AT,t_2,zero
- daddu c_2,AT
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- dmultu a_5,a_3 /* mul_add_c2(a[5],b[3],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- slt AT,t_2,zero
- daddu c_2,AT
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- dmultu a_4,a_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- sd c_3,64(a0)
-
- dmultu a_2,a_7 /* mul_add_c2(a[2],b[7],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- slt c_3,t_2,zero
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- dmultu a_3,a_6 /* mul_add_c2(a[3],b[6],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- slt AT,t_2,zero
- daddu c_3,AT
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- dmultu a_4,a_5 /* mul_add_c2(a[4],b[5],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- slt AT,t_2,zero
- daddu c_3,AT
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- sd c_1,72(a0)
-
- dmultu a_7,a_3 /* mul_add_c2(a[7],b[3],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- slt c_1,t_2,zero
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- dmultu a_6,a_4 /* mul_add_c2(a[6],b[4],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- slt AT,t_2,zero
- daddu c_1,AT
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- dmultu a_5,a_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- sd c_2,80(a0)
-
- dmultu a_4,a_7 /* mul_add_c2(a[4],b[7],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- slt c_2,t_2,zero
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- dmultu a_5,a_6 /* mul_add_c2(a[5],b[6],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- slt AT,t_2,zero
- daddu c_2,AT
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- sd c_3,88(a0)
-
- dmultu a_7,a_5 /* mul_add_c2(a[7],b[5],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- slt c_3,t_2,zero
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- dmultu a_6,a_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- sd c_1,96(a0)
-
- dmultu a_6,a_7 /* mul_add_c2(a[6],b[7],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- slt c_1,t_2,zero
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- sd c_2,104(a0)
-
- dmultu a_7,a_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sd c_3,112(a0)
- sd c_1,120(a0)
-
- jr ra
-END(bn_sqr_comba8)
-
-.align 5
-LEAF(bn_sqr_comba4)
- .set reorder
- ld a_0,0(a1)
- ld a_1,8(a1)
- ld a_2,16(a1)
- ld a_3,24(a1)
- dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
- mflo c_1
- mfhi c_2
- sd c_1,0(a0)
-
- dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- slt c_1,t_2,zero
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu c_3,t_2,AT
- sd c_2,8(a0)
-
- dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- slt c_2,t_2,zero
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- sd c_3,16(a0)
-
- dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- slt c_3,t_2,zero
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- dmultu a_1,a_2 /* mul_add_c(a2[1],b[2],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- slt AT,t_2,zero
- daddu c_3,AT
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sltu AT,c_2,t_2
- daddu c_3,AT
- sd c_1,24(a0)
-
- dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- slt c_1,t_2,zero
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
- mflo t_1
- mfhi t_2
- daddu c_2,t_1
- sltu AT,c_2,t_1
- daddu t_2,AT
- daddu c_3,t_2
- sltu AT,c_3,t_2
- daddu c_1,AT
- sd c_2,32(a0)
-
- dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */
- mflo t_1
- mfhi t_2
- slt c_2,t_2,zero
- dsll t_2,1
- slt a2,t_1,zero
- daddu t_2,a2
- dsll t_1,1
- daddu c_3,t_1
- sltu AT,c_3,t_1
- daddu t_2,AT
- daddu c_1,t_2
- sltu AT,c_1,t_2
- daddu c_2,AT
- sd c_3,40(a0)
-
- dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
- mflo t_1
- mfhi t_2
- daddu c_1,t_1
- sltu AT,c_1,t_1
- daddu t_2,AT
- daddu c_2,t_2
- sd c_1,48(a0)
- sd c_2,56(a0)
-
- jr ra
-END(bn_sqr_comba4)
diff --git a/deps/openssl/openssl/crypto/bn/asm/pa-risc2.s b/deps/openssl/openssl/crypto/bn/asm/pa-risc2.s
index f3b16290eb..413eac7123 100644
--- a/deps/openssl/openssl/crypto/bn/asm/pa-risc2.s
+++ b/deps/openssl/openssl/crypto/bn/asm/pa-risc2.s
@@ -1,3 +1,9 @@
+; Copyright 1998-2016 The OpenSSL Project Authors. All Rights Reserved.
+;
+; Licensed under the OpenSSL license (the "License"). You may not use
+; this file except in compliance with the License. You can obtain a copy
+; in the file LICENSE in the source distribution or at
+; https://www.openssl.org/source/license.html
;
; PA-RISC 2.0 implementation of bn_asm code, based on the
; 64-bit version of the code. This code is effectively the
diff --git a/deps/openssl/openssl/crypto/bn/asm/pa-risc2W.s b/deps/openssl/openssl/crypto/bn/asm/pa-risc2W.s
index a99545754d..97381172e7 100644
--- a/deps/openssl/openssl/crypto/bn/asm/pa-risc2W.s
+++ b/deps/openssl/openssl/crypto/bn/asm/pa-risc2W.s
@@ -1,3 +1,10 @@
+; Copyright 2000-2016 The OpenSSL Project Authors. All Rights Reserved.
+;
+; Licensed under the OpenSSL license (the "License"). You may not use
+; this file except in compliance with the License. You can obtain a copy
+; in the file LICENSE in the source distribution or at
+; https://www.openssl.org/source/license.html
+
;
; PA-RISC 64-bit implementation of bn_asm code
;
diff --git a/deps/openssl/openssl/crypto/bn/asm/parisc-mont.pl b/deps/openssl/openssl/crypto/bn/asm/parisc-mont.pl
index c02ef6f014..8aa94e8511 100644
--- a/deps/openssl/openssl/crypto/bn/asm/parisc-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/parisc-mont.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
@@ -126,7 +133,7 @@ $fp="%r3";
$hi1="%r2";
$hi0="%r1";
-$xfer=$n0; # accomodates [-16..15] offset in fld[dw]s
+$xfer=$n0; # accommodates [-16..15] offset in fld[dw]s
$fm0="%fr4"; $fti=$fm0;
$fbi="%fr5L";
diff --git a/deps/openssl/openssl/crypto/bn/asm/ppc-mont.pl b/deps/openssl/openssl/crypto/bn/asm/ppc-mont.pl
index 6930a3aceb..5802260ca6 100644
--- a/deps/openssl/openssl/crypto/bn/asm/ppc-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/ppc-mont.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2006-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
diff --git a/deps/openssl/openssl/crypto/bn/asm/ppc.pl b/deps/openssl/openssl/crypto/bn/asm/ppc.pl
index 446d8ba949..4ea534a1c7 100644
--- a/deps/openssl/openssl/crypto/bn/asm/ppc.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/ppc.pl
@@ -1,5 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2004-2016 The OpenSSL Project Authors. All Rights Reserved.
#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# Implemented as a Perl wrapper as we want to support several different
# architectures with single file. We pick up the target based on the
# file name we are asked to generate.
@@ -419,7 +425,7 @@ $data=<<EOF;
# r9,r10, r11 are the equivalents of c1,c2, c3.
#
# Possible optimization of loading all 8 longs of a into registers
-# doesnt provide any speedup
+# doesn't provide any speedup
#
xor r0,r0,r0 #set r0 = 0.Used in addze
@@ -1009,7 +1015,7 @@ $data=<<EOF;
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r11,r8
- addze r12,r9 # since we didnt set r12 to zero before.
+ addze r12,r9 # since we didn't set r12 to zero before.
addze r10,r0
#mul_add_c(a[1],b[0],c2,c3,c1);
$LD r6,`1*$BNSZ`(r4)
diff --git a/deps/openssl/openssl/crypto/bn/asm/ppc64-mont.pl b/deps/openssl/openssl/crypto/bn/asm/ppc64-mont.pl
index 595fc6d31f..1e19c958a1 100644
--- a/deps/openssl/openssl/crypto/bn/asm/ppc64-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/ppc64-mont.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
diff --git a/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl b/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl
index 2b3f8b0e21..46d746b7d0 100755
--- a/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
##############################################################################
# #
@@ -103,7 +110,7 @@ if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9])\.([0-
$addx = ($ver>=3.03);
}
-open OUT,"| \"$^X\" $xlate $flavour $output";
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
*STDOUT = *OUT;
if ($avx>1) {{{
diff --git a/deps/openssl/openssl/crypto/bn/asm/rsaz-x86_64.pl b/deps/openssl/openssl/crypto/bn/asm/rsaz-x86_64.pl
index 87ce2c34d9..6f3b664f7a 100755
--- a/deps/openssl/openssl/crypto/bn/asm/rsaz-x86_64.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/rsaz-x86_64.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
##############################################################################
# #
@@ -95,7 +102,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
-open OUT,"| \"$^X\" $xlate $flavour $output";
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
*STDOUT=*OUT;
if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
@@ -1767,7 +1774,7 @@ ___
{ # __rsaz_512_mul
#
# input: %rsi - ap, %rbp - bp
- # ouput:
+ # output:
# clobbers: everything
my ($ap,$bp) = ("%rsi","%rbp");
$code.=<<___;
@@ -1919,7 +1926,7 @@ if ($addx) {
# __rsaz_512_mulx
#
# input: %rsi - ap, %rbp - bp
- # ouput:
+ # output:
# clobbers: everything
my ($ap,$bp,$zero) = ("%rsi","%rbp","%rdi");
$code.=<<___;
diff --git a/deps/openssl/openssl/crypto/bn/asm/s390x-gf2m.pl b/deps/openssl/openssl/crypto/bn/asm/s390x-gf2m.pl
index 9d18d40e77..cbd16f4214 100644
--- a/deps/openssl/openssl/crypto/bn/asm/s390x-gf2m.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/s390x-gf2m.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -35,7 +42,7 @@ if ($flavour =~ /3[12]/) {
$g="g";
}
-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
$stdframe=16*$SIZE_T+4*8;
diff --git a/deps/openssl/openssl/crypto/bn/asm/s390x-mont.pl b/deps/openssl/openssl/crypto/bn/asm/s390x-mont.pl
index 9fd64e81ee..2205bc2ca0 100644
--- a/deps/openssl/openssl/crypto/bn/asm/s390x-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/s390x-mont.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
@@ -54,7 +61,7 @@ if ($flavour =~ /3[12]/) {
$g="g";
}
-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
$stdframe=16*$SIZE_T+4*8;
diff --git a/deps/openssl/openssl/crypto/bn/asm/s390x.S b/deps/openssl/openssl/crypto/bn/asm/s390x.S
index f5eebe413a..292a7a9998 100755..100644
--- a/deps/openssl/openssl/crypto/bn/asm/s390x.S
+++ b/deps/openssl/openssl/crypto/bn/asm/s390x.S
@@ -1,11 +1,11 @@
.ident "s390x.S, version 1.1"
// ====================================================================
-// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-// project.
+// Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
//
-// Rights for redistribution and usage in source and binary forms are
-// granted according to the OpenSSL license. Warranty of any kind is
-// disclaimed.
+// Licensed under the OpenSSL license (the "License"). You may not use
+// this file except in compliance with the License. You can obtain a copy
+// in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
// ====================================================================
.text
diff --git a/deps/openssl/openssl/crypto/bn/asm/sparct4-mont.pl b/deps/openssl/openssl/crypto/bn/asm/sparct4-mont.pl
index 71b45002a4..4faf66f10a 100755
--- a/deps/openssl/openssl/crypto/bn/asm/sparct4-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/sparct4-mont.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by David S. Miller <davem@devemloft.net> and Andy Polyakov
@@ -76,6 +83,9 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
push(@INC,"${dir}","${dir}../../perlasm");
require "sparcv9_modes.pl";
+$output = pop;
+open STDOUT,">$output";
+
$code.=<<___;
#include "sparc_arch.h"
diff --git a/deps/openssl/openssl/crypto/bn/asm/sparcv8.S b/deps/openssl/openssl/crypto/bn/asm/sparcv8.S
index 88c5dc480a..9c31073b24 100644
--- a/deps/openssl/openssl/crypto/bn/asm/sparcv8.S
+++ b/deps/openssl/openssl/crypto/bn/asm/sparcv8.S
@@ -3,12 +3,12 @@
/*
* ====================================================================
- * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
- * project.
+ * Copyright 1999-2016 The OpenSSL Project Authors. All Rights Reserved.
*
- * Rights for redistribution and usage in source and binary forms are
- * granted according to the OpenSSL license. Warranty of any kind is
- * disclaimed.
+ * Licensed under the OpenSSL license (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
* ====================================================================
*/
diff --git a/deps/openssl/openssl/crypto/bn/asm/sparcv8plus.S b/deps/openssl/openssl/crypto/bn/asm/sparcv8plus.S
index 63de1860f2..714a136675 100644
--- a/deps/openssl/openssl/crypto/bn/asm/sparcv8plus.S
+++ b/deps/openssl/openssl/crypto/bn/asm/sparcv8plus.S
@@ -3,12 +3,12 @@
/*
* ====================================================================
- * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
- * project.
+ * Copyright 1999-2016 The OpenSSL Project Authors. All Rights Reserved.
*
- * Rights for redistribution and usage in source and binary forms are
- * granted according to the OpenSSL license. Warranty of any kind is
- * disclaimed.
+ * Licensed under the OpenSSL license (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
* ====================================================================
*/
@@ -52,7 +52,7 @@
* # cd ../..
* # make; make test
*
- * Q. V8plus achitecture? What kind of beast is that?
+ * Q. V8plus architecture? What kind of beast is that?
* A. Well, it's rather a programming model than an architecture...
* It's actually v9-compliant, i.e. *any* UltraSPARC, CPU under
* special conditions, namely when kernel doesn't preserve upper
@@ -71,7 +71,7 @@
*
* Q. 64-bit registers under 32-bit kernels? Didn't you just say it
* doesn't work?
- * A. You can't adress *all* registers as 64-bit wide:-( The catch is
+ * A. You can't address *all* registers as 64-bit wide:-( The catch is
* that you actually may rely upon %o0-%o5 and %g1-%g4 being fully
* preserved if you're in a leaf function, i.e. such never calling
* any other functions. All functions in this module are leaf and
@@ -144,6 +144,10 @@
* }
*/
+#ifdef OPENSSL_FIPSCANISTER
+#include <openssl/fipssyms.h>
+#endif
+
#if defined(__SUNPRO_C) && defined(__sparcv9)
/* They've said -xarch=v9 at command line */
.register %g2,#scratch
diff --git a/deps/openssl/openssl/crypto/bn/asm/sparcv9-gf2m.pl b/deps/openssl/openssl/crypto/bn/asm/sparcv9-gf2m.pl
index ab94cd917c..dcf11a87a1 100644
--- a/deps/openssl/openssl/crypto/bn/asm/sparcv9-gf2m.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/sparcv9-gf2m.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -18,6 +25,9 @@
# ~100-230% faster than gcc-generated code and ~35-90% faster than
# the pure SPARCv9 code path.
+$output = pop;
+open STDOUT,">$output";
+
$locals=16*8;
$tab="%l0";
diff --git a/deps/openssl/openssl/crypto/bn/asm/sparcv9-mont.pl b/deps/openssl/openssl/crypto/bn/asm/sparcv9-mont.pl
index d866287800..6807c8b6e0 100644
--- a/deps/openssl/openssl/crypto/bn/asm/sparcv9-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/sparcv9-mont.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
@@ -13,7 +20,7 @@
# for undertaken effort are multiple. First of all, UltraSPARC is not
# the whole SPARCv9 universe and other VIS-free implementations deserve
# optimized code as much. Secondly, newly introduced UltraSPARC T1,
-# a.k.a. Niagara, has shared FPU and concurrent FPU-intensive pathes,
+# a.k.a. Niagara, has shared FPU and concurrent FPU-intensive paths,
# such as sparcv9a-mont, will simply sink it. Yes, T1 is equipped with
# several integrated RSA/DSA accelerator circuits accessible through
# kernel driver [only(*)], but having decent user-land software
@@ -23,7 +30,7 @@
# instructions...
# (*) Engine accessing the driver in question is on my TODO list.
-# For reference, acceleator is estimated to give 6 to 10 times
+# For reference, accelerator is estimated to give 6 to 10 times
# improvement on single-threaded RSA sign. It should be noted
# that 6-10x improvement coefficient does not actually mean
# something extraordinary in terms of absolute [single-threaded]
@@ -42,6 +49,9 @@
# module still have hidden potential [see TODO list there], which is
# estimated to be larger than 20%...
+$output = pop;
+open STDOUT,">$output";
+
# int bn_mul_mont(
$rp="%i0"; # BN_ULONG *rp,
$ap="%i1"; # const BN_ULONG *ap,
@@ -50,10 +60,8 @@ $np="%i3"; # const BN_ULONG *np,
$n0="%i4"; # const BN_ULONG *n0,
$num="%i5"; # int num);
-$bits=32;
-for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
-if ($bits==64) { $bias=2047; $frame=192; }
-else { $bias=0; $frame=128; }
+$frame="STACK_FRAME";
+$bias="STACK_BIAS";
$car0="%o0";
$car1="%o1";
@@ -76,6 +84,8 @@ $tpj="%l7";
$fname="bn_mul_mont_int";
$code=<<___;
+#include "sparc_arch.h"
+
.section ".text",#alloc,#execinstr
.global $fname
@@ -105,7 +115,7 @@ $fname:
ld [$np],$car1 ! np[0]
sub %o7,$bias,%sp ! alloca
ld [$np+4],$npj ! np[1]
- be,pt `$bits==32?"%icc":"%xcc"`,.Lbn_sqr_mont
+ be,pt SIZE_T_CC,.Lbn_sqr_mont
mov 12,$j
mulx $car0,$mul0,$car0 ! ap[0]*bp[0]
diff --git a/deps/openssl/openssl/crypto/bn/asm/sparcv9a-mont.pl b/deps/openssl/openssl/crypto/bn/asm/sparcv9a-mont.pl
index a14205f2f0..50b690653f 100755
--- a/deps/openssl/openssl/crypto/bn/asm/sparcv9a-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/sparcv9a-mont.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
@@ -51,21 +58,17 @@
#
# Modulo-scheduled inner loops allow to interleave floating point and
# integer instructions and minimize Read-After-Write penalties. This
-# results in *further* 20-50% perfromance improvement [depending on
+# results in *further* 20-50% performance improvement [depending on
# key length, more for longer keys] on USI&II cores and 30-80% - on
# USIII&IV.
+$output = pop;
+open STDOUT,">$output";
+
$fname="bn_mul_mont_fpu";
-$bits=32;
-for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
-
-if ($bits==64) {
- $bias=2047;
- $frame=192;
-} else {
- $bias=0;
- $frame=128; # 96 rounded up to largest known cache-line
-}
+
+$frame="STACK_FRAME";
+$bias="STACK_BIAS";
$locals=64;
# In order to provide for 32-/64-bit ABI duality, I keep integers wider
@@ -121,6 +124,8 @@ $nhia="%f56"; $nhib="%f58"; $nhic="%f60"; $nhid="%f62";
$ASI_FL16_P=0xD2; # magic ASI value to engage 16-bit FP load
$code=<<___;
+#include "sparc_arch.h"
+
.section ".text",#alloc,#execinstr
.global $fname
@@ -867,7 +872,7 @@ ___
$code =~ s/\`([^\`]*)\`/eval($1)/gem;
# Below substitution makes it possible to compile without demanding
-# VIS extentions on command line, e.g. -xarch=v9 vs. -xarch=v9a. I
+# VIS extensions on command line, e.g. -xarch=v9 vs. -xarch=v9a. I
# dare to do this, because VIS capability is detected at run-time now
# and this routine is not called on CPU not capable to execute it. Do
# note that fzeros is not the only VIS dependency! Another dependency
diff --git a/deps/openssl/openssl/crypto/bn/asm/via-mont.pl b/deps/openssl/openssl/crypto/bn/asm/via-mont.pl
index c046a514c8..9f81bc822e 100644
--- a/deps/openssl/openssl/crypto/bn/asm/via-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/via-mont.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2006-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
@@ -81,6 +88,9 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
push(@INC,"${dir}","${dir}../../perlasm");
require "x86asm.pl";
+$output = pop;
+open STDOUT,">$output";
+
&asm_init($ARGV[0],"via-mont.pl");
# int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
@@ -240,3 +250,5 @@ $sp=&DWP(28,"esp");
&asciz("Padlock Montgomery Multiplication, CRYPTOGAMS by <appro\@openssl.org>");
&asm_finish();
+
+close STDOUT;
diff --git a/deps/openssl/openssl/crypto/bn/asm/vis3-mont.pl b/deps/openssl/openssl/crypto/bn/asm/vis3-mont.pl
index 263ac02b6f..64dba4480f 100644
--- a/deps/openssl/openssl/crypto/bn/asm/vis3-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/vis3-mont.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -18,16 +25,20 @@
# for reference purposes, because T4 has dedicated Montgomery
# multiplication and squaring *instructions* that deliver even more.
-$bits=32;
-for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
-if ($bits==64) { $bias=2047; $frame=192; }
-else { $bias=0; $frame=112; }
+$output = pop;
+open STDOUT,">$output";
+
+$frame = "STACK_FRAME";
+$bias = "STACK_BIAS";
+
+$code.=<<___;
+#include "sparc_arch.h"
-$code.=<<___ if ($bits==64);
+#ifdef __arch64__
.register %g2,#scratch
.register %g3,#scratch
-___
-$code.=<<___;
+#endif
+
.section ".text",#alloc,#execinstr
___
@@ -333,7 +344,7 @@ ___
# Purpose of these subroutines is to explicitly encode VIS instructions,
# so that one can compile the module without having to specify VIS
-# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
+# extensions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
# Idea is to reserve for option to produce "universal" binary and let
# programmer detect if current CPU is VIS capable at run-time.
sub unvis3 {
diff --git a/deps/openssl/openssl/crypto/bn/asm/vms.mar b/deps/openssl/openssl/crypto/bn/asm/vms.mar
deleted file mode 100644
index aefab15cdb..0000000000
--- a/deps/openssl/openssl/crypto/bn/asm/vms.mar
+++ /dev/null
@@ -1,6440 +0,0 @@
- .title vax_bn_mul_add_words unsigned multiply & add, 32*32+32+32=>64
-;
-; w.j.m. 15-jan-1999
-;
-; it's magic ...
-;
-; ULONG bn_mul_add_words(ULONG r[],ULONG a[],int n,ULONG w) {
-; ULONG c = 0;
-; int i;
-; for(i = 0; i < n; i++) <c,r[i]> := r[i] + c + a[i] * w ;
-; return c;
-; }
-
-r=4 ;(AP)
-a=8 ;(AP)
-n=12 ;(AP) n by value (input)
-w=16 ;(AP) w by value (input)
-
-
- .psect code,nowrt
-
-.entry bn_mul_add_words,^m<r2,r3,r4,r5,r6>
-
- moval @r(ap),r2
- moval @a(ap),r3
- movl n(ap),r4 ; assumed >0 by C code
- movl w(ap),r5
- clrl r6 ; c
-
-0$:
- emul r5,(r3),(r2),r0 ; w, a[], r[] considered signed
-
- ; fixup for "negative" r[]
- tstl (r2)
- bgeq 10$
- incl r1
-10$:
-
- ; add in c
- addl2 r6,r0
- adwc #0,r1
-
- ; combined fixup for "negative" w, a[]
- tstl r5
- bgeq 20$
- addl2 (r3),r1
-20$:
- tstl (r3)
- bgeq 30$
- addl2 r5,r1
-30$:
-
- movl r0,(r2)+ ; store lo result in r[] & advance
- addl #4,r3 ; advance a[]
- movl r1,r6 ; store hi result => c
-
- sobgtr r4,0$
-
- movl r6,r0 ; return c
- ret
-
- .title vax_bn_mul_words unsigned multiply & add, 32*32+32=>64
-;
-; w.j.m. 15-jan-1999
-;
-; it's magic ...
-;
-; ULONG bn_mul_words(ULONG r[],ULONG a[],int n,ULONG w) {
-; ULONG c = 0;
-; int i;
-; for(i = 0; i < num; i++) <c,r[i]> := a[i] * w + c ;
-; return(c);
-; }
-
-r=4 ;(AP)
-a=8 ;(AP)
-n=12 ;(AP) n by value (input)
-w=16 ;(AP) w by value (input)
-
-
- .psect code,nowrt
-
-.entry bn_mul_words,^m<r2,r3,r4,r5,r6>
-
- moval @r(ap),r2 ; r2 -> r[]
- moval @a(ap),r3 ; r3 -> a[]
- movl n(ap),r4 ; r4 = loop count (assumed >0 by C code)
- movl w(ap),r5 ; r5 = w
- clrl r6 ; r6 = c
-
-0$:
- ; <r1,r0> := w * a[] + c
- emul r5,(r3),r6,r0 ; w, a[], c considered signed
-
- ; fixup for "negative" c
- tstl r6 ; c
- bgeq 10$
- incl r1
-10$:
-
- ; combined fixup for "negative" w, a[]
- tstl r5 ; w
- bgeq 20$
- addl2 (r3),r1 ; a[]
-20$:
- tstl (r3) ; a[]
- bgeq 30$
- addl2 r5,r1 ; w
-30$:
-
- movl r0,(r2)+ ; store lo result in r[] & advance
- addl #4,r3 ; advance a[]
- movl r1,r6 ; store hi result => c
-
- sobgtr r4,0$
-
- movl r6,r0 ; return c
- ret
-
- .title vax_bn_sqr_words unsigned square, 32*32=>64
-;
-; w.j.m. 15-jan-1999
-;
-; it's magic ...
-;
-; void bn_sqr_words(ULONG r[],ULONG a[],int n) {
-; int i;
-; for(i = 0; i < n; i++) <r[2*i+1],r[2*i]> := a[i] * a[i] ;
-; }
-
-r=4 ;(AP)
-a=8 ;(AP)
-n=12 ;(AP) n by value (input)
-
-
- .psect code,nowrt
-
-.entry bn_sqr_words,^m<r2,r3,r4,r5>
-
- moval @r(ap),r2 ; r2 -> r[]
- moval @a(ap),r3 ; r3 -> a[]
- movl n(ap),r4 ; r4 = n (assumed >0 by C code)
-
-0$:
- movl (r3)+,r5 ; r5 = a[] & advance
-
- ; <r1,r0> := a[] * a[]
- emul r5,r5,#0,r0 ; a[] considered signed
-
- ; fixup for "negative" a[]
- tstl r5 ; a[]
- bgeq 30$
- addl2 r5,r1 ; a[]
- addl2 r5,r1 ; a[]
-30$:
-
- movl r0,(r2)+ ; store lo result in r[] & advance
- movl r1,(r2)+ ; store hi result in r[] & advance
-
- sobgtr r4,0$
-
- movl #1,r0 ; return SS$_NORMAL
- ret
-
- .title vax_bn_div_words unsigned divide
-;
-; Richard Levitte 20-Nov-2000
-;
-; ULONG bn_div_words(ULONG h, ULONG l, ULONG d)
-; {
-; return ((ULONG)((((ULLONG)h)<<32)|l) / (ULLONG)d);
-; }
-;
-; Using EDIV would be very easy, if it didn't do signed calculations.
-; Any time any of the input numbers are signed, there are problems,
-; usually with integer overflow, at which point it returns useless
-; data (the quotient gets the value of l, and the remainder becomes 0).
-;
-; If it was just for the dividend, it would be very easy, just divide
-; it by 2 (unsigned), do the division, multiply the resulting quotient
-; and remainder by 2, add the bit that was dropped when dividing by 2
-; to the remainder, and do some adjustment so the remainder doesn't
-; end up larger than the divisor. For some cases when the divisor is
-; negative (from EDIV's point of view, i.e. when the highest bit is set),
-; dividing the dividend by 2 isn't enough, and since some operations
-; might generate integer overflows even when the dividend is divided by
-; 4 (when the high part of the shifted down dividend ends up being exactly
-; half of the divisor, the result is the quotient 0x80000000, which is
-; negative...) it needs to be divided by 8. Furthermore, the divisor needs
-; to be divided by 2 (unsigned) as well, to avoid more problems with the sign.
-; In this case, a little extra fiddling with the remainder is required.
-;
-; So, the simplest way to handle this is always to divide the dividend
-; by 8, and to divide the divisor by 2 if it's highest bit is set.
-; After EDIV has been used, the quotient gets multiplied by 8 if the
-; original divisor was positive, otherwise 4. The remainder, oddly
-; enough, is *always* multiplied by 8.
-; NOTE: in the case mentioned above, where the high part of the shifted
-; down dividend ends up being exactly half the shifted down divisor, we
-; end up with a 33 bit quotient. That's no problem however, it usually
-; means we have ended up with a too large remainder as well, and the
-; problem is fixed by the last part of the algorithm (next paragraph).
-;
-; The routine ends with comparing the resulting remainder with the
-; original divisor and if the remainder is larger, subtract the
-; original divisor from it, and increase the quotient by 1. This is
-; done until the remainder is smaller than the divisor.
-;
-; The complete algorithm looks like this:
-;
-; d' = d
-; l' = l & 7
-; [h,l] = [h,l] >> 3
-; [q,r] = floor([h,l] / d) # This is the EDIV operation
-; if (q < 0) q = -q # I doubt this is necessary any more
-;
-; r' = r >> 29
-; if (d' >= 0)
-; q' = q >> 29
-; q = q << 3
-; else
-; q' = q >> 30
-; q = q << 2
-; r = (r << 3) + l'
-;
-; if (d' < 0)
-; {
-; [r',r] = [r',r] - q
-; while ([r',r] < 0)
-; {
-; [r',r] = [r',r] + d
-; [q',q] = [q',q] - 1
-; }
-; }
-;
-; while ([r',r] >= d')
-; {
-; [r',r] = [r',r] - d'
-; [q',q] = [q',q] + 1
-; }
-;
-; return q
-
-h=4 ;(AP) h by value (input)
-l=8 ;(AP) l by value (input)
-d=12 ;(AP) d by value (input)
-
-;r2 = l, q
-;r3 = h, r
-;r4 = d
-;r5 = l'
-;r6 = r'
-;r7 = d'
-;r8 = q'
-
- .psect code,nowrt
-
-.entry bn_div_words,^m<r2,r3,r4,r5,r6,r7,r8>
- movl l(ap),r2
- movl h(ap),r3
- movl d(ap),r4
-
- bicl3 #^XFFFFFFF8,r2,r5 ; l' = l & 7
- bicl3 #^X00000007,r2,r2
-
- bicl3 #^XFFFFFFF8,r3,r6
- bicl3 #^X00000007,r3,r3
-
- addl r6,r2
-
- rotl #-3,r2,r2 ; l = l >> 3
- rotl #-3,r3,r3 ; h = h >> 3
-
- movl r4,r7 ; d' = d
-
- movl #0,r6 ; r' = 0
- movl #0,r8 ; q' = 0
-
- tstl r4
- beql 666$ ; Uh-oh, the divisor is 0...
- bgtr 1$
- rotl #-1,r4,r4 ; If d is negative, shift it right.
- bicl2 #^X80000000,r4 ; Since d is then a large number, the
- ; lowest bit is insignificant
- ; (contradict that, and I'll fix the problem!)
-1$:
- ediv r4,r2,r2,r3 ; Do the actual division
-
- tstl r2
- bgeq 3$
- mnegl r2,r2 ; if q < 0, negate it
-3$:
- tstl r7
- blss 4$
- rotl #3,r2,r2 ; q = q << 3
- bicl3 #^XFFFFFFF8,r2,r8 ; q' gets the high bits from q
- bicl3 #^X00000007,r2,r2
- bsb 41$
-4$: ; else
- rotl #2,r2,r2 ; q = q << 2
- bicl3 #^XFFFFFFFC,r2,r8 ; q' gets the high bits from q
- bicl3 #^X00000003,r2,r2
-41$:
- rotl #3,r3,r3 ; r = r << 3
- bicl3 #^XFFFFFFF8,r3,r6 ; r' gets the high bits from r
- bicl3 #^X00000007,r3,r3
- addl r5,r3 ; r = r + l'
-
- tstl r7
- bgeq 5$
- bitl #1,r7
- beql 5$ ; if d' < 0 && d' & 1
- subl r2,r3 ; [r',r] = [r',r] - [q',q]
- sbwc r8,r6
-45$:
- bgeq 5$ ; while r < 0
- decl r2 ; [q',q] = [q',q] - 1
- sbwc #0,r8
- addl r7,r3 ; [r',r] = [r',r] + d'
- adwc #0,r6
- brb 45$
-
-; The return points are placed in the middle to keep a short distance from
-; all the branch points
-42$:
-; movl r3,r1
- movl r2,r0
- ret
-666$:
- movl #^XFFFFFFFF,r0
- ret
-
-5$:
- tstl r6
- bneq 6$
- cmpl r3,r7
- blssu 42$ ; while [r',r] >= d'
-6$:
- subl r7,r3 ; [r',r] = [r',r] - d'
- sbwc #0,r6
- incl r2 ; [q',q] = [q',q] + 1
- adwc #0,r8
- brb 5$
-
- .title vax_bn_add_words unsigned add of two arrays
-;
-; Richard Levitte 20-Nov-2000
-;
-; ULONG bn_add_words(ULONG r[], ULONG a[], ULONG b[], int n) {
-; ULONG c = 0;
-; int i;
-; for (i = 0; i < n; i++) <c,r[i]> = a[i] + b[i] + c;
-; return(c);
-; }
-
-r=4 ;(AP) r by reference (output)
-a=8 ;(AP) a by reference (input)
-b=12 ;(AP) b by reference (input)
-n=16 ;(AP) n by value (input)
-
-
- .psect code,nowrt
-
-.entry bn_add_words,^m<r2,r3,r4,r5,r6>
-
- moval @r(ap),r2
- moval @a(ap),r3
- moval @b(ap),r4
- movl n(ap),r5 ; assumed >0 by C code
- clrl r0 ; c
-
- tstl r5 ; carry = 0
- bleq 666$
-
-0$:
- movl (r3)+,r6 ; carry untouched
- adwc (r4)+,r6 ; carry used and touched
- movl r6,(r2)+ ; carry untouched
- sobgtr r5,0$ ; carry untouched
-
- adwc #0,r0
-666$:
- ret
-
- .title vax_bn_sub_words unsigned add of two arrays
-;
-; Richard Levitte 20-Nov-2000
-;
-; ULONG bn_sub_words(ULONG r[], ULONG a[], ULONG b[], int n) {
-; ULONG c = 0;
-; int i;
-; for (i = 0; i < n; i++) <c,r[i]> = a[i] - b[i] - c;
-; return(c);
-; }
-
-r=4 ;(AP) r by reference (output)
-a=8 ;(AP) a by reference (input)
-b=12 ;(AP) b by reference (input)
-n=16 ;(AP) n by value (input)
-
-
- .psect code,nowrt
-
-.entry bn_sub_words,^m<r2,r3,r4,r5,r6>
-
- moval @r(ap),r2
- moval @a(ap),r3
- moval @b(ap),r4
- movl n(ap),r5 ; assumed >0 by C code
- clrl r0 ; c
-
- tstl r5 ; carry = 0
- bleq 666$
-
-0$:
- movl (r3)+,r6 ; carry untouched
- sbwc (r4)+,r6 ; carry used and touched
- movl r6,(r2)+ ; carry untouched
- sobgtr r5,0$ ; carry untouched
-
- adwc #0,r0
-666$:
- ret
-
-
-;r=4 ;(AP)
-;a=8 ;(AP)
-;b=12 ;(AP)
-;n=16 ;(AP) n by value (input)
-
- .psect code,nowrt
-
-.entry BN_MUL_COMBA8,^m<r2,r3,r4,r5,r6,r7,r8,r9,r10,r11>
- movab -924(sp),sp
- clrq r8
-
- clrl r10
-
- movl 8(ap),r6
- movzwl 2(r6),r3
- movl 12(ap),r7
- bicl3 #-65536,(r7),r2
- movzwl 2(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,(r6),-12(fp)
- bicl3 #-65536,r3,-16(fp)
- mull3 r0,-12(fp),-4(fp)
- mull2 r2,-12(fp)
- mull3 r2,-16(fp),-8(fp)
- mull2 r0,-16(fp)
- addl3 -4(fp),-8(fp),r0
- bicl3 #0,r0,-4(fp)
- cmpl -4(fp),-8(fp)
- bgequ noname.45
- addl2 #65536,-16(fp)
-noname.45:
- movzwl -2(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-16(fp)
- bicl3 #-65536,-4(fp),r0
- ashl #16,r0,-8(fp)
- addl3 -8(fp),-12(fp),r0
- bicl3 #0,r0,-12(fp)
- cmpl -12(fp),-8(fp)
- bgequ noname.46
- incl -16(fp)
-noname.46:
- movl -12(fp),r1
- movl -16(fp),r2
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.47
- incl r2
-noname.47:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.48
- incl r10
-noname.48:
-
- movl 4(ap),r11
- movl r9,(r11)
-
- clrl r9
-
- movzwl 2(r6),r2
- bicl3 #-65536,4(r7),r3
- movzwl 6(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,(r6),-28(fp)
- bicl3 #-65536,r2,-32(fp)
- mull3 r0,-28(fp),-20(fp)
- mull2 r3,-28(fp)
- mull3 r3,-32(fp),-24(fp)
- mull2 r0,-32(fp)
- addl3 -20(fp),-24(fp),r0
- bicl3 #0,r0,-20(fp)
- cmpl -20(fp),-24(fp)
- bgequ noname.49
- addl2 #65536,-32(fp)
-noname.49:
- movzwl -18(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-32(fp)
- bicl3 #-65536,-20(fp),r0
- ashl #16,r0,-24(fp)
- addl3 -24(fp),-28(fp),r0
- bicl3 #0,r0,-28(fp)
- cmpl -28(fp),-24(fp)
- bgequ noname.50
- incl -32(fp)
-noname.50:
- movl -28(fp),r1
- movl -32(fp),r2
- addl2 r1,r8
- bicl2 #0,r8
- cmpl r8,r1
- bgequ noname.51
- incl r2
-noname.51:
- addl2 r2,r10
- bicl2 #0,r10
- cmpl r10,r2
- bgequ noname.52
- incl r9
-noname.52:
-
- movzwl 6(r6),r2
- bicl3 #-65536,(r7),r3
- movzwl 2(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,4(r6),-44(fp)
- bicl3 #-65536,r2,-48(fp)
- mull3 r0,-44(fp),-36(fp)
- mull2 r3,-44(fp)
- mull3 r3,-48(fp),-40(fp)
- mull2 r0,-48(fp)
- addl3 -36(fp),-40(fp),r0
- bicl3 #0,r0,-36(fp)
- cmpl -36(fp),-40(fp)
- bgequ noname.53
- addl2 #65536,-48(fp)
-noname.53:
- movzwl -34(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-48(fp)
- bicl3 #-65536,-36(fp),r0
- ashl #16,r0,-40(fp)
- addl3 -40(fp),-44(fp),r0
- bicl3 #0,r0,-44(fp)
- cmpl -44(fp),-40(fp)
- bgequ noname.54
- incl -48(fp)
-noname.54:
- movl -44(fp),r1
- movl -48(fp),r2
- addl2 r1,r8
- bicl2 #0,r8
- cmpl r8,r1
- bgequ noname.55
- incl r2
-noname.55:
- addl2 r2,r10
- bicl2 #0,r10
- cmpl r10,r2
- bgequ noname.56
- incl r9
-noname.56:
-
- movl r8,4(r11)
-
- clrl r8
-
- movzwl 10(r6),r2
- bicl3 #-65536,(r7),r3
- movzwl 2(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,8(r6),-60(fp)
- bicl3 #-65536,r2,-64(fp)
- mull3 r0,-60(fp),-52(fp)
- mull2 r3,-60(fp)
- mull3 r3,-64(fp),-56(fp)
- mull2 r0,-64(fp)
- addl3 -52(fp),-56(fp),r0
- bicl3 #0,r0,-52(fp)
- cmpl -52(fp),-56(fp)
- bgequ noname.57
- addl2 #65536,-64(fp)
-noname.57:
- movzwl -50(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-64(fp)
- bicl3 #-65536,-52(fp),r0
- ashl #16,r0,-56(fp)
- addl3 -56(fp),-60(fp),r0
- bicl3 #0,r0,-60(fp)
- cmpl -60(fp),-56(fp)
- bgequ noname.58
- incl -64(fp)
-noname.58:
- movl -60(fp),r1
- movl -64(fp),r2
- addl2 r1,r10
- bicl2 #0,r10
- cmpl r10,r1
- bgequ noname.59
- incl r2
-noname.59:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.60
- incl r8
-noname.60:
-
- movzwl 6(r6),r2
- bicl3 #-65536,4(r7),r3
- movzwl 6(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,4(r6),-76(fp)
- bicl3 #-65536,r2,-80(fp)
- mull3 r0,-76(fp),-68(fp)
- mull2 r3,-76(fp)
- mull3 r3,-80(fp),-72(fp)
- mull2 r0,-80(fp)
- addl3 -68(fp),-72(fp),r0
- bicl3 #0,r0,-68(fp)
- cmpl -68(fp),-72(fp)
- bgequ noname.61
- addl2 #65536,-80(fp)
-noname.61:
- movzwl -66(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-80(fp)
- bicl3 #-65536,-68(fp),r0
- ashl #16,r0,-72(fp)
- addl3 -72(fp),-76(fp),r0
- bicl3 #0,r0,-76(fp)
- cmpl -76(fp),-72(fp)
- bgequ noname.62
- incl -80(fp)
-noname.62:
- movl -76(fp),r1
- movl -80(fp),r2
- addl2 r1,r10
- bicl2 #0,r10
- cmpl r10,r1
- bgequ noname.63
- incl r2
-noname.63:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.64
- incl r8
-noname.64:
-
- movzwl 2(r6),r2
- bicl3 #-65536,8(r7),r3
- movzwl 10(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,(r6),-92(fp)
- bicl3 #-65536,r2,-96(fp)
- mull3 r0,-92(fp),-84(fp)
- mull2 r3,-92(fp)
- mull3 r3,-96(fp),-88(fp)
- mull2 r0,-96(fp)
- addl3 -84(fp),-88(fp),r0
- bicl3 #0,r0,-84(fp)
- cmpl -84(fp),-88(fp)
- bgequ noname.65
- addl2 #65536,-96(fp)
-noname.65:
- movzwl -82(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-96(fp)
- bicl3 #-65536,-84(fp),r0
- ashl #16,r0,-88(fp)
- addl3 -88(fp),-92(fp),r0
- bicl3 #0,r0,-92(fp)
- cmpl -92(fp),-88(fp)
- bgequ noname.66
- incl -96(fp)
-noname.66:
- movl -92(fp),r1
- movl -96(fp),r2
- addl2 r1,r10
- bicl2 #0,r10
- cmpl r10,r1
- bgequ noname.67
- incl r2
-noname.67:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.68
- incl r8
-noname.68:
-
- movl r10,8(r11)
-
- clrl r10
-
- movzwl 2(r6),r2
- bicl3 #-65536,12(r7),r3
- movzwl 14(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,(r6),-108(fp)
- bicl3 #-65536,r2,-112(fp)
- mull3 r0,-108(fp),-100(fp)
- mull2 r3,-108(fp)
- mull3 r3,-112(fp),-104(fp)
- mull2 r0,-112(fp)
- addl3 -100(fp),-104(fp),r0
- bicl3 #0,r0,-100(fp)
- cmpl -100(fp),-104(fp)
- bgequ noname.69
- addl2 #65536,-112(fp)
-noname.69:
- movzwl -98(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-112(fp)
- bicl3 #-65536,-100(fp),r0
- ashl #16,r0,-104(fp)
- addl3 -104(fp),-108(fp),r0
- bicl3 #0,r0,-108(fp)
- cmpl -108(fp),-104(fp)
- bgequ noname.70
- incl -112(fp)
-noname.70:
- movl -108(fp),r1
- movl -112(fp),r2
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.71
- incl r2
-noname.71:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.72
- incl r10
-noname.72:
-
- movzwl 6(r6),r2
- bicl3 #-65536,8(r7),r3
- movzwl 10(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,4(r6),-124(fp)
- bicl3 #-65536,r2,-128(fp)
- mull3 r0,-124(fp),-116(fp)
- mull2 r3,-124(fp)
- mull3 r3,-128(fp),-120(fp)
- mull2 r0,-128(fp)
- addl3 -116(fp),-120(fp),r0
- bicl3 #0,r0,-116(fp)
- cmpl -116(fp),-120(fp)
- bgequ noname.73
- addl2 #65536,-128(fp)
-noname.73:
- movzwl -114(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-128(fp)
- bicl3 #-65536,-116(fp),r0
- ashl #16,r0,-120(fp)
- addl3 -120(fp),-124(fp),r0
- bicl3 #0,r0,-124(fp)
- cmpl -124(fp),-120(fp)
- bgequ noname.74
- incl -128(fp)
-noname.74:
- movl -124(fp),r1
- movl -128(fp),r2
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.75
- incl r2
-noname.75:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.76
- incl r10
-noname.76:
-
- movzwl 10(r6),r2
- bicl3 #-65536,4(r7),r3
- movzwl 6(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,8(r6),-140(fp)
- bicl3 #-65536,r2,-144(fp)
- mull3 r0,-140(fp),-132(fp)
- mull2 r3,-140(fp)
- mull3 r3,-144(fp),-136(fp)
- mull2 r0,-144(fp)
- addl3 -132(fp),-136(fp),r0
- bicl3 #0,r0,-132(fp)
- cmpl -132(fp),-136(fp)
- bgequ noname.77
- addl2 #65536,-144(fp)
-noname.77:
- movzwl -130(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-144(fp)
- bicl3 #-65536,-132(fp),r0
- ashl #16,r0,-136(fp)
- addl3 -136(fp),-140(fp),r0
- bicl3 #0,r0,-140(fp)
- cmpl -140(fp),-136(fp)
- bgequ noname.78
- incl -144(fp)
-noname.78:
- movl -140(fp),r1
- movl -144(fp),r2
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.79
- incl r2
-noname.79:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.80
- incl r10
-noname.80:
-
- movzwl 14(r6),r2
- bicl3 #-65536,(r7),r3
- movzwl 2(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,12(r6),-156(fp)
- bicl3 #-65536,r2,-160(fp)
- mull3 r0,-156(fp),-148(fp)
- mull2 r3,-156(fp)
- mull3 r3,-160(fp),-152(fp)
- mull2 r0,-160(fp)
- addl3 -148(fp),-152(fp),r0
- bicl3 #0,r0,-148(fp)
- cmpl -148(fp),-152(fp)
- bgequ noname.81
- addl2 #65536,-160(fp)
-noname.81:
- movzwl -146(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-160(fp)
- bicl3 #-65536,-148(fp),r0
- ashl #16,r0,-152(fp)
- addl3 -152(fp),-156(fp),r0
- bicl3 #0,r0,-156(fp)
- cmpl -156(fp),-152(fp)
- bgequ noname.82
- incl -160(fp)
-noname.82:
- movl -156(fp),r1
- movl -160(fp),r2
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.83
- incl r2
-noname.83:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.84
- incl r10
-noname.84:
-
- movl r9,12(r11)
-
- clrl r9
-
- movzwl 18(r6),r2
- bicl3 #-65536,(r7),r3
- movzwl 2(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,16(r6),-172(fp)
- bicl3 #-65536,r2,-176(fp)
- mull3 r0,-172(fp),-164(fp)
- mull2 r3,-172(fp)
- mull3 r3,-176(fp),-168(fp)
- mull2 r0,-176(fp)
- addl3 -164(fp),-168(fp),r0
- bicl3 #0,r0,-164(fp)
- cmpl -164(fp),-168(fp)
- bgequ noname.85
- addl2 #65536,-176(fp)
-noname.85:
- movzwl -162(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-176(fp)
- bicl3 #-65536,-164(fp),r0
- ashl #16,r0,-168(fp)
- addl3 -168(fp),-172(fp),r0
- bicl3 #0,r0,-172(fp)
- cmpl -172(fp),-168(fp)
- bgequ noname.86
- incl -176(fp)
-noname.86:
- movl -172(fp),r1
- movl -176(fp),r2
- addl2 r1,r8
- bicl2 #0,r8
- cmpl r8,r1
- bgequ noname.87
- incl r2
-noname.87:
- addl2 r2,r10
- bicl2 #0,r10
- cmpl r10,r2
- bgequ noname.88
- incl r9
-noname.88:
-
- movzwl 14(r6),r2
- bicl3 #-65536,4(r7),r3
- movzwl 6(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,12(r6),-188(fp)
- bicl3 #-65536,r2,-192(fp)
- mull3 r0,-188(fp),-180(fp)
- mull2 r3,-188(fp)
- mull3 r3,-192(fp),-184(fp)
- mull2 r0,-192(fp)
- addl3 -180(fp),-184(fp),r0
- bicl3 #0,r0,-180(fp)
- cmpl -180(fp),-184(fp)
- bgequ noname.89
- addl2 #65536,-192(fp)
-noname.89:
- movzwl -178(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-192(fp)
- bicl3 #-65536,-180(fp),r0
- ashl #16,r0,-184(fp)
- addl3 -184(fp),-188(fp),r0
- bicl3 #0,r0,-188(fp)
- cmpl -188(fp),-184(fp)
- bgequ noname.90
- incl -192(fp)
-noname.90:
- movl -188(fp),r1
- movl -192(fp),r2
- addl2 r1,r8
- bicl2 #0,r8
- cmpl r8,r1
- bgequ noname.91
- incl r2
-noname.91:
- addl2 r2,r10
- bicl2 #0,r10
- cmpl r10,r2
- bgequ noname.92
- incl r9
-noname.92:
-
- movzwl 10(r6),r2
- bicl3 #-65536,8(r7),r3
- movzwl 10(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,8(r6),-204(fp)
- bicl3 #-65536,r2,-208(fp)
- mull3 r0,-204(fp),-196(fp)
- mull2 r3,-204(fp)
- mull3 r3,-208(fp),-200(fp)
- mull2 r0,-208(fp)
- addl3 -196(fp),-200(fp),r0
- bicl3 #0,r0,-196(fp)
- cmpl -196(fp),-200(fp)
- bgequ noname.93
- addl2 #65536,-208(fp)
-noname.93:
- movzwl -194(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-208(fp)
- bicl3 #-65536,-196(fp),r0
- ashl #16,r0,-200(fp)
- addl3 -200(fp),-204(fp),r0
- bicl3 #0,r0,-204(fp)
- cmpl -204(fp),-200(fp)
- bgequ noname.94
- incl -208(fp)
-noname.94:
- movl -204(fp),r1
- movl -208(fp),r2
- addl2 r1,r8
- bicl2 #0,r8
- cmpl r8,r1
- bgequ noname.95
- incl r2
-noname.95:
- addl2 r2,r10
- bicl2 #0,r10
- cmpl r10,r2
- bgequ noname.96
- incl r9
-noname.96:
-
- movzwl 6(r6),r2
- bicl3 #-65536,12(r7),r3
- movzwl 14(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,4(r6),-220(fp)
- bicl3 #-65536,r2,-224(fp)
- mull3 r0,-220(fp),-212(fp)
- mull2 r3,-220(fp)
- mull3 r3,-224(fp),-216(fp)
- mull2 r0,-224(fp)
- addl3 -212(fp),-216(fp),r0
- bicl3 #0,r0,-212(fp)
- cmpl -212(fp),-216(fp)
- bgequ noname.97
- addl2 #65536,-224(fp)
-noname.97:
- movzwl -210(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-224(fp)
- bicl3 #-65536,-212(fp),r0
- ashl #16,r0,-216(fp)
- addl3 -216(fp),-220(fp),r0
- bicl3 #0,r0,-220(fp)
- cmpl -220(fp),-216(fp)
- bgequ noname.98
- incl -224(fp)
-noname.98:
- movl -220(fp),r1
- movl -224(fp),r2
- addl2 r1,r8
- bicl2 #0,r8
- cmpl r8,r1
- bgequ noname.99
- incl r2
-noname.99:
- addl2 r2,r10
- bicl2 #0,r10
- cmpl r10,r2
- bgequ noname.100
- incl r9
-noname.100:
-
- movzwl 2(r6),r2
- bicl3 #-65536,16(r7),r3
- movzwl 18(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,(r6),-236(fp)
- bicl3 #-65536,r2,-240(fp)
- mull3 r0,-236(fp),-228(fp)
- mull2 r3,-236(fp)
- mull3 r3,-240(fp),-232(fp)
- mull2 r0,-240(fp)
- addl3 -228(fp),-232(fp),r0
- bicl3 #0,r0,-228(fp)
- cmpl -228(fp),-232(fp)
- bgequ noname.101
- addl2 #65536,-240(fp)
-noname.101:
- movzwl -226(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-240(fp)
- bicl3 #-65536,-228(fp),r0
- ashl #16,r0,-232(fp)
- addl3 -232(fp),-236(fp),r0
- bicl3 #0,r0,-236(fp)
- cmpl -236(fp),-232(fp)
- bgequ noname.102
- incl -240(fp)
-noname.102:
- movl -236(fp),r1
- movl -240(fp),r2
- addl2 r1,r8
- bicl2 #0,r8
- cmpl r8,r1
- bgequ noname.103
- incl r2
-noname.103:
- addl2 r2,r10
- bicl2 #0,r10
- cmpl r10,r2
- bgequ noname.104
- incl r9
-noname.104:
-
- movl r8,16(r11)
-
- clrl r8
-
- movzwl 2(r6),r2
- bicl3 #-65536,20(r7),r3
- movzwl 22(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,(r6),-252(fp)
- bicl3 #-65536,r2,-256(fp)
- mull3 r0,-252(fp),-244(fp)
- mull2 r3,-252(fp)
- mull3 r3,-256(fp),-248(fp)
- mull2 r0,-256(fp)
- addl3 -244(fp),-248(fp),r0
- bicl3 #0,r0,-244(fp)
- cmpl -244(fp),-248(fp)
- bgequ noname.105
- addl2 #65536,-256(fp)
-noname.105:
- movzwl -242(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-256(fp)
- bicl3 #-65536,-244(fp),r0
- ashl #16,r0,-248(fp)
- addl3 -248(fp),-252(fp),r0
- bicl3 #0,r0,-252(fp)
- cmpl -252(fp),-248(fp)
- bgequ noname.106
- incl -256(fp)
-noname.106:
- movl -252(fp),r1
- movl -256(fp),r2
- addl2 r1,r10
- bicl2 #0,r10
- cmpl r10,r1
- bgequ noname.107
- incl r2
-noname.107:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.108
- incl r8
-noname.108:
-
- movzwl 6(r6),r2
- bicl3 #-65536,16(r7),r3
- movzwl 18(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,4(r6),-268(fp)
- bicl3 #-65536,r2,-272(fp)
- mull3 r0,-268(fp),-260(fp)
- mull2 r3,-268(fp)
- mull3 r3,-272(fp),-264(fp)
- mull2 r0,-272(fp)
- addl3 -260(fp),-264(fp),r0
- bicl3 #0,r0,-260(fp)
- cmpl -260(fp),-264(fp)
- bgequ noname.109
- addl2 #65536,-272(fp)
-noname.109:
- movzwl -258(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-272(fp)
- bicl3 #-65536,-260(fp),r0
- ashl #16,r0,-264(fp)
- addl3 -264(fp),-268(fp),r0
- bicl3 #0,r0,-268(fp)
- cmpl -268(fp),-264(fp)
- bgequ noname.110
- incl -272(fp)
-noname.110:
- movl -268(fp),r1
- movl -272(fp),r2
- addl2 r1,r10
- bicl2 #0,r10
- cmpl r10,r1
- bgequ noname.111
- incl r2
-noname.111:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.112
- incl r8
-noname.112:
-
- movzwl 10(r6),r2
- bicl3 #-65536,12(r7),r3
- movzwl 14(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,8(r6),-284(fp)
- bicl3 #-65536,r2,-288(fp)
- mull3 r0,-284(fp),-276(fp)
- mull2 r3,-284(fp)
- mull3 r3,-288(fp),-280(fp)
- mull2 r0,-288(fp)
- addl3 -276(fp),-280(fp),r0
- bicl3 #0,r0,-276(fp)
- cmpl -276(fp),-280(fp)
- bgequ noname.113
- addl2 #65536,-288(fp)
-noname.113:
- movzwl -274(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-288(fp)
- bicl3 #-65536,-276(fp),r0
- ashl #16,r0,-280(fp)
- addl3 -280(fp),-284(fp),r0
- bicl3 #0,r0,-284(fp)
- cmpl -284(fp),-280(fp)
- bgequ noname.114
- incl -288(fp)
-noname.114:
- movl -284(fp),r1
- movl -288(fp),r2
- addl2 r1,r10
- bicl2 #0,r10
- cmpl r10,r1
- bgequ noname.115
- incl r2
-noname.115:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.116
- incl r8
-noname.116:
-
- movzwl 14(r6),r2
- bicl3 #-65536,8(r7),r3
- movzwl 10(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,12(r6),-300(fp)
- bicl3 #-65536,r2,-304(fp)
- mull3 r0,-300(fp),-292(fp)
- mull2 r3,-300(fp)
- mull3 r3,-304(fp),-296(fp)
- mull2 r0,-304(fp)
- addl3 -292(fp),-296(fp),r0
- bicl3 #0,r0,-292(fp)
- cmpl -292(fp),-296(fp)
- bgequ noname.117
- addl2 #65536,-304(fp)
-noname.117:
- movzwl -290(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-304(fp)
- bicl3 #-65536,-292(fp),r0
- ashl #16,r0,-296(fp)
- addl3 -296(fp),-300(fp),r0
- bicl3 #0,r0,-300(fp)
- cmpl -300(fp),-296(fp)
- bgequ noname.118
- incl -304(fp)
-noname.118:
- movl -300(fp),r1
- movl -304(fp),r2
- addl2 r1,r10
- bicl2 #0,r10
- cmpl r10,r1
- bgequ noname.119
- incl r2
-noname.119:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.120
- incl r8
-noname.120:
-
- movzwl 18(r6),r2
- bicl3 #-65536,4(r7),r3
- movzwl 6(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,16(r6),-316(fp)
- bicl3 #-65536,r2,-320(fp)
- mull3 r0,-316(fp),-308(fp)
- mull2 r3,-316(fp)
- mull3 r3,-320(fp),-312(fp)
- mull2 r0,-320(fp)
- addl3 -308(fp),-312(fp),r0
- bicl3 #0,r0,-308(fp)
- cmpl -308(fp),-312(fp)
- bgequ noname.121
- addl2 #65536,-320(fp)
-noname.121:
- movzwl -306(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-320(fp)
- bicl3 #-65536,-308(fp),r0
- ashl #16,r0,-312(fp)
- addl3 -312(fp),-316(fp),r0
- bicl3 #0,r0,-316(fp)
- cmpl -316(fp),-312(fp)
- bgequ noname.122
- incl -320(fp)
-noname.122:
- movl -316(fp),r1
- movl -320(fp),r2
- addl2 r1,r10
- bicl2 #0,r10
- cmpl r10,r1
- bgequ noname.123
- incl r2
-
-noname.123:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.124
- incl r8
-noname.124:
-
- movzwl 22(r6),r2
- bicl3 #-65536,(r7),r3
- movzwl 2(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,20(r6),-332(fp)
- bicl3 #-65536,r2,-336(fp)
- mull3 r0,-332(fp),-324(fp)
- mull2 r3,-332(fp)
- mull3 r3,-336(fp),-328(fp)
- mull2 r0,-336(fp)
- addl3 -324(fp),-328(fp),r0
- bicl3 #0,r0,-324(fp)
- cmpl -324(fp),-328(fp)
- bgequ noname.125
- addl2 #65536,-336(fp)
-noname.125:
- movzwl -322(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-336(fp)
- bicl3 #-65536,-324(fp),r0
- ashl #16,r0,-328(fp)
- addl3 -328(fp),-332(fp),r0
- bicl3 #0,r0,-332(fp)
- cmpl -332(fp),-328(fp)
- bgequ noname.126
- incl -336(fp)
-noname.126:
- movl -332(fp),r1
- movl -336(fp),r2
- addl2 r1,r10
- bicl2 #0,r10
- cmpl r10,r1
- bgequ noname.127
- incl r2
-noname.127:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.128
- incl r8
-noname.128:
-
- movl r10,20(r11)
-
- clrl r10
-
- movzwl 26(r6),r2
- bicl3 #-65536,(r7),r3
- movzwl 2(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,24(r6),-348(fp)
- bicl3 #-65536,r2,-352(fp)
- mull3 r0,-348(fp),-340(fp)
- mull2 r3,-348(fp)
- mull3 r3,-352(fp),-344(fp)
- mull2 r0,-352(fp)
- addl3 -340(fp),-344(fp),r0
- bicl3 #0,r0,-340(fp)
- cmpl -340(fp),-344(fp)
- bgequ noname.129
- addl2 #65536,-352(fp)
-noname.129:
- movzwl -338(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-352(fp)
- bicl3 #-65536,-340(fp),r0
- ashl #16,r0,-344(fp)
- addl3 -344(fp),-348(fp),r0
- bicl3 #0,r0,-348(fp)
- cmpl -348(fp),-344(fp)
- bgequ noname.130
- incl -352(fp)
-noname.130:
- movl -348(fp),r1
- movl -352(fp),r2
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.131
- incl r2
-noname.131:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.132
- incl r10
-noname.132:
-
- movzwl 22(r6),r2
- bicl3 #-65536,4(r7),r3
- movzwl 6(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,20(r6),-364(fp)
- bicl3 #-65536,r2,-368(fp)
- mull3 r0,-364(fp),-356(fp)
- mull2 r3,-364(fp)
- mull3 r3,-368(fp),-360(fp)
- mull2 r0,-368(fp)
- addl3 -356(fp),-360(fp),r0
- bicl3 #0,r0,-356(fp)
- cmpl -356(fp),-360(fp)
- bgequ noname.133
- addl2 #65536,-368(fp)
-noname.133:
- movzwl -354(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-368(fp)
- bicl3 #-65536,-356(fp),r0
- ashl #16,r0,-360(fp)
- addl3 -360(fp),-364(fp),r0
- bicl3 #0,r0,-364(fp)
- cmpl -364(fp),-360(fp)
- bgequ noname.134
- incl -368(fp)
-noname.134:
- movl -364(fp),r1
- movl -368(fp),r2
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.135
- incl r2
-noname.135:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.136
- incl r10
-noname.136:
-
- movzwl 18(r6),r2
- bicl3 #-65536,8(r7),r3
- movzwl 10(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,16(r6),-380(fp)
- bicl3 #-65536,r2,-384(fp)
- mull3 r0,-380(fp),-372(fp)
- mull2 r3,-380(fp)
- mull3 r3,-384(fp),-376(fp)
- mull2 r0,-384(fp)
- addl3 -372(fp),-376(fp),r0
- bicl3 #0,r0,-372(fp)
- cmpl -372(fp),-376(fp)
- bgequ noname.137
- addl2 #65536,-384(fp)
-noname.137:
- movzwl -370(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-384(fp)
- bicl3 #-65536,-372(fp),r0
- ashl #16,r0,-376(fp)
- addl3 -376(fp),-380(fp),r0
- bicl3 #0,r0,-380(fp)
- cmpl -380(fp),-376(fp)
- bgequ noname.138
- incl -384(fp)
-noname.138:
- movl -380(fp),r1
- movl -384(fp),r2
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.139
- incl r2
-noname.139:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.140
- incl r10
-noname.140:
-
- movzwl 14(r6),r2
- bicl3 #-65536,12(r7),r3
- movzwl 14(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,12(r6),-396(fp)
- bicl3 #-65536,r2,-400(fp)
- mull3 r0,-396(fp),-388(fp)
- mull2 r3,-396(fp)
- mull3 r3,-400(fp),-392(fp)
- mull2 r0,-400(fp)
- addl3 -388(fp),-392(fp),r0
- bicl3 #0,r0,-388(fp)
- cmpl -388(fp),-392(fp)
- bgequ noname.141
- addl2 #65536,-400(fp)
-noname.141:
- movzwl -386(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-400(fp)
- bicl3 #-65536,-388(fp),r0
- ashl #16,r0,-392(fp)
- addl3 -392(fp),-396(fp),r0
- bicl3 #0,r0,-396(fp)
- cmpl -396(fp),-392(fp)
- bgequ noname.142
- incl -400(fp)
-noname.142:
- movl -396(fp),r1
- movl -400(fp),r2
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.143
- incl r2
-noname.143:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.144
- incl r10
-noname.144:
-
- movzwl 10(r6),r2
- bicl3 #-65536,16(r7),r3
- movzwl 18(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,8(r6),-412(fp)
- bicl3 #-65536,r2,-416(fp)
- mull3 r0,-412(fp),-404(fp)
- mull2 r3,-412(fp)
- mull3 r3,-416(fp),-408(fp)
- mull2 r0,-416(fp)
- addl3 -404(fp),-408(fp),r0
- bicl3 #0,r0,-404(fp)
- cmpl -404(fp),-408(fp)
- bgequ noname.145
- addl2 #65536,-416(fp)
-noname.145:
- movzwl -402(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-416(fp)
- bicl3 #-65536,-404(fp),r0
- ashl #16,r0,-408(fp)
- addl3 -408(fp),-412(fp),r0
- bicl3 #0,r0,-412(fp)
- cmpl -412(fp),-408(fp)
- bgequ noname.146
- incl -416(fp)
-noname.146:
- movl -412(fp),r1
- movl -416(fp),r2
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.147
- incl r2
-noname.147:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.148
- incl r10
-noname.148:
-
- movzwl 6(r6),r2
- bicl3 #-65536,20(r7),r3
- movzwl 22(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,4(r6),-428(fp)
- bicl3 #-65536,r2,-432(fp)
- mull3 r0,-428(fp),-420(fp)
- mull2 r3,-428(fp)
- mull3 r3,-432(fp),-424(fp)
- mull2 r0,-432(fp)
- addl3 -420(fp),-424(fp),r0
- bicl3 #0,r0,-420(fp)
- cmpl -420(fp),-424(fp)
- bgequ noname.149
- addl2 #65536,-432(fp)
-noname.149:
- movzwl -418(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-432(fp)
- bicl3 #-65536,-420(fp),r0
- ashl #16,r0,-424(fp)
- addl3 -424(fp),-428(fp),r0
- bicl3 #0,r0,-428(fp)
- cmpl -428(fp),-424(fp)
- bgequ noname.150
- incl -432(fp)
-noname.150:
- movl -428(fp),r1
- movl -432(fp),r2
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.151
- incl r2
-noname.151:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.152
- incl r10
-noname.152:
-
- movzwl 2(r6),r2
- bicl3 #-65536,24(r7),r3
- movzwl 26(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,(r6),-444(fp)
- bicl3 #-65536,r2,-448(fp)
- mull3 r0,-444(fp),-436(fp)
- mull2 r3,-444(fp)
- mull3 r3,-448(fp),-440(fp)
- mull2 r0,-448(fp)
- addl3 -436(fp),-440(fp),r0
- bicl3 #0,r0,-436(fp)
- cmpl -436(fp),-440(fp)
- bgequ noname.153
- addl2 #65536,-448(fp)
-noname.153:
- movzwl -434(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-448(fp)
- bicl3 #-65536,-436(fp),r0
- ashl #16,r0,-440(fp)
- addl3 -440(fp),-444(fp),r0
- bicl3 #0,r0,-444(fp)
- cmpl -444(fp),-440(fp)
- bgequ noname.154
- incl -448(fp)
-noname.154:
- movl -444(fp),r1
- movl -448(fp),r2
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.155
- incl r2
-noname.155:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.156
- incl r10
-noname.156:
-
- movl r9,24(r11)
-
- clrl r9
-
- movzwl 2(r6),r2
- bicl3 #-65536,28(r7),r3
- movzwl 30(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,(r6),-460(fp)
- bicl3 #-65536,r2,-464(fp)
- mull3 r0,-460(fp),-452(fp)
- mull2 r3,-460(fp)
- mull3 r3,-464(fp),-456(fp)
- mull2 r0,-464(fp)
- addl3 -452(fp),-456(fp),r0
- bicl3 #0,r0,-452(fp)
- cmpl -452(fp),-456(fp)
- bgequ noname.157
- addl2 #65536,-464(fp)
-noname.157:
- movzwl -450(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-464(fp)
- bicl3 #-65536,-452(fp),r0
- ashl #16,r0,-456(fp)
- addl3 -456(fp),-460(fp),r0
- bicl3 #0,r0,-460(fp)
- cmpl -460(fp),-456(fp)
- bgequ noname.158
- incl -464(fp)
-noname.158:
- movl -460(fp),r1
- movl -464(fp),r2
- addl2 r1,r8
- bicl2 #0,r8
- cmpl r8,r1
- bgequ noname.159
- incl r2
-noname.159:
- addl2 r2,r10
- bicl2 #0,r10
- cmpl r10,r2
- bgequ noname.160
- incl r9
-noname.160:
-
- movzwl 6(r6),r2
- bicl3 #-65536,24(r7),r3
- movzwl 26(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,4(r6),-476(fp)
- bicl3 #-65536,r2,-480(fp)
- mull3 r0,-476(fp),-468(fp)
- mull2 r3,-476(fp)
- mull3 r3,-480(fp),-472(fp)
- mull2 r0,-480(fp)
- addl3 -468(fp),-472(fp),r0
- bicl3 #0,r0,-468(fp)
- cmpl -468(fp),-472(fp)
- bgequ noname.161
- addl2 #65536,-480(fp)
-noname.161:
- movzwl -466(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-480(fp)
- bicl3 #-65536,-468(fp),r0
- ashl #16,r0,-472(fp)
- addl3 -472(fp),-476(fp),r0
- bicl3 #0,r0,-476(fp)
- cmpl -476(fp),-472(fp)
- bgequ noname.162
- incl -480(fp)
-noname.162:
- movl -476(fp),r1
- movl -480(fp),r2
- addl2 r1,r8
- bicl2 #0,r8
- cmpl r8,r1
- bgequ noname.163
- incl r2
-noname.163:
- addl2 r2,r10
- bicl2 #0,r10
- cmpl r10,r2
- bgequ noname.164
- incl r9
-noname.164:
-
- movzwl 10(r6),r2
- bicl3 #-65536,20(r7),r3
- movzwl 22(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,8(r6),-492(fp)
- bicl3 #-65536,r2,-496(fp)
- mull3 r0,-492(fp),-484(fp)
- mull2 r3,-492(fp)
- mull3 r3,-496(fp),-488(fp)
- mull2 r0,-496(fp)
- addl3 -484(fp),-488(fp),r0
- bicl3 #0,r0,-484(fp)
- cmpl -484(fp),-488(fp)
- bgequ noname.165
- addl2 #65536,-496(fp)
-noname.165:
- movzwl -482(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-496(fp)
- bicl3 #-65536,-484(fp),r0
- ashl #16,r0,-488(fp)
- addl3 -488(fp),-492(fp),r0
- bicl3 #0,r0,-492(fp)
- cmpl -492(fp),-488(fp)
- bgequ noname.166
- incl -496(fp)
-noname.166:
- movl -492(fp),r1
- movl -496(fp),r2
- addl2 r1,r8
- bicl2 #0,r8
- cmpl r8,r1
- bgequ noname.167
- incl r2
-noname.167:
- addl2 r2,r10
- bicl2 #0,r10
- cmpl r10,r2
- bgequ noname.168
- incl r9
-noname.168:
-
- movzwl 14(r6),r2
- bicl3 #-65536,16(r7),r3
- movzwl 18(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,12(r6),-508(fp)
- bicl3 #-65536,r2,-512(fp)
- mull3 r0,-508(fp),-500(fp)
- mull2 r3,-508(fp)
- mull3 r3,-512(fp),-504(fp)
- mull2 r0,-512(fp)
- addl3 -500(fp),-504(fp),r0
- bicl3 #0,r0,-500(fp)
- cmpl -500(fp),-504(fp)
- bgequ noname.169
- addl2 #65536,-512(fp)
-noname.169:
- movzwl -498(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-512(fp)
- bicl3 #-65536,-500(fp),r0
- ashl #16,r0,-504(fp)
- addl3 -504(fp),-508(fp),r0
- bicl3 #0,r0,-508(fp)
- cmpl -508(fp),-504(fp)
- bgequ noname.170
- incl -512(fp)
-noname.170:
- movl -508(fp),r1
- movl -512(fp),r2
- addl2 r1,r8
- bicl2 #0,r8
- cmpl r8,r1
- bgequ noname.171
- incl r2
-noname.171:
- addl2 r2,r10
- bicl2 #0,r10
- cmpl r10,r2
- bgequ noname.172
- incl r9
-noname.172:
-
- movzwl 18(r6),r2
- bicl3 #-65536,12(r7),r3
- movzwl 14(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,16(r6),-524(fp)
- bicl3 #-65536,r2,-528(fp)
- mull3 r0,-524(fp),-516(fp)
- mull2 r3,-524(fp)
- mull3 r3,-528(fp),-520(fp)
- mull2 r0,-528(fp)
- addl3 -516(fp),-520(fp),r0
- bicl3 #0,r0,-516(fp)
- cmpl -516(fp),-520(fp)
- bgequ noname.173
- addl2 #65536,-528(fp)
-noname.173:
- movzwl -514(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-528(fp)
- bicl3 #-65536,-516(fp),r0
- ashl #16,r0,-520(fp)
- addl3 -520(fp),-524(fp),r0
- bicl3 #0,r0,-524(fp)
- cmpl -524(fp),-520(fp)
- bgequ noname.174
- incl -528(fp)
-noname.174:
- movl -524(fp),r1
- movl -528(fp),r2
- addl2 r1,r8
- bicl2 #0,r8
- cmpl r8,r1
- bgequ noname.175
- incl r2
-noname.175:
- addl2 r2,r10
- bicl2 #0,r10
- cmpl r10,r2
- bgequ noname.176
- incl r9
-noname.176:
-
- movzwl 22(r6),r2
- bicl3 #-65536,8(r7),r3
- movzwl 10(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,20(r6),-540(fp)
- bicl3 #-65536,r2,-544(fp)
- mull3 r0,-540(fp),-532(fp)
- mull2 r3,-540(fp)
- mull3 r3,-544(fp),-536(fp)
- mull2 r0,-544(fp)
- addl3 -532(fp),-536(fp),r0
- bicl3 #0,r0,-532(fp)
- cmpl -532(fp),-536(fp)
- bgequ noname.177
- addl2 #65536,-544(fp)
-noname.177:
- movzwl -530(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-544(fp)
- bicl3 #-65536,-532(fp),r0
- ashl #16,r0,-536(fp)
- addl3 -536(fp),-540(fp),r0
- bicl3 #0,r0,-540(fp)
- cmpl -540(fp),-536(fp)
- bgequ noname.178
- incl -544(fp)
-noname.178:
- movl -540(fp),r1
- movl -544(fp),r2
- addl2 r1,r8
- bicl2 #0,r8
- cmpl r8,r1
- bgequ noname.179
- incl r2
-noname.179:
- addl2 r2,r10
- bicl2 #0,r10
- cmpl r10,r2
- bgequ noname.180
- incl r9
-noname.180:
-
- movzwl 26(r6),r2
- bicl3 #-65536,4(r7),r3
- movzwl 6(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,24(r6),-556(fp)
- bicl3 #-65536,r2,-560(fp)
- mull3 r0,-556(fp),-548(fp)
- mull2 r3,-556(fp)
- mull3 r3,-560(fp),-552(fp)
- mull2 r0,-560(fp)
- addl3 -548(fp),-552(fp),r0
- bicl3 #0,r0,-548(fp)
- cmpl -548(fp),-552(fp)
- bgequ noname.181
- addl2 #65536,-560(fp)
-noname.181:
- movzwl -546(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-560(fp)
- bicl3 #-65536,-548(fp),r0
- ashl #16,r0,-552(fp)
- addl3 -552(fp),-556(fp),r0
- bicl3 #0,r0,-556(fp)
- cmpl -556(fp),-552(fp)
- bgequ noname.182
- incl -560(fp)
-noname.182:
- movl -556(fp),r1
- movl -560(fp),r2
- addl2 r1,r8
- bicl2 #0,r8
- cmpl r8,r1
- bgequ noname.183
- incl r2
-noname.183:
- addl2 r2,r10
- bicl2 #0,r10
- cmpl r10,r2
- bgequ noname.184
- incl r9
-noname.184:
-
- movzwl 30(r6),r2
- bicl3 #-65536,(r7),r3
- movzwl 2(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,28(r6),-572(fp)
- bicl3 #-65536,r2,-576(fp)
- mull3 r0,-572(fp),-564(fp)
- mull2 r3,-572(fp)
- mull3 r3,-576(fp),-568(fp)
- mull2 r0,-576(fp)
- addl3 -564(fp),-568(fp),r0
- bicl3 #0,r0,-564(fp)
- cmpl -564(fp),-568(fp)
- bgequ noname.185
- addl2 #65536,-576(fp)
-noname.185:
- movzwl -562(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-576(fp)
- bicl3 #-65536,-564(fp),r0
- ashl #16,r0,-568(fp)
- addl3 -568(fp),-572(fp),r0
- bicl3 #0,r0,-572(fp)
- cmpl -572(fp),-568(fp)
- bgequ noname.186
- incl -576(fp)
-noname.186:
- movl -572(fp),r1
- movl -576(fp),r2
- addl2 r1,r8
- bicl2 #0,r8
- cmpl r8,r1
- bgequ noname.187
- incl r2
-noname.187:
- addl2 r2,r10
- bicl2 #0,r10
- cmpl r10,r2
- bgequ noname.188
- incl r9
-noname.188:
-
- movl r8,28(r11)
-
- clrl r8
-
- movzwl 30(r6),r2
- bicl3 #-65536,4(r7),r3
- movzwl 6(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,28(r6),-588(fp)
- bicl3 #-65536,r2,-592(fp)
- mull3 r0,-588(fp),-580(fp)
- mull2 r3,-588(fp)
- mull3 r3,-592(fp),-584(fp)
- mull2 r0,-592(fp)
- addl3 -580(fp),-584(fp),r0
- bicl3 #0,r0,-580(fp)
- cmpl -580(fp),-584(fp)
- bgequ noname.189
- addl2 #65536,-592(fp)
-noname.189:
- movzwl -578(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-592(fp)
- bicl3 #-65536,-580(fp),r0
- ashl #16,r0,-584(fp)
- addl3 -584(fp),-588(fp),r0
- bicl3 #0,r0,-588(fp)
- cmpl -588(fp),-584(fp)
- bgequ noname.190
- incl -592(fp)
-noname.190:
- movl -588(fp),r1
- movl -592(fp),r2
- addl2 r1,r10
- bicl2 #0,r10
- cmpl r10,r1
- bgequ noname.191
- incl r2
-noname.191:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.192
- incl r8
-noname.192:
-
- movzwl 26(r6),r2
- bicl3 #-65536,8(r7),r3
- movzwl 10(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,24(r6),-604(fp)
- bicl3 #-65536,r2,-608(fp)
- mull3 r0,-604(fp),-596(fp)
- mull2 r3,-604(fp)
- mull3 r3,-608(fp),-600(fp)
- mull2 r0,-608(fp)
- addl3 -596(fp),-600(fp),r0
- bicl3 #0,r0,-596(fp)
- cmpl -596(fp),-600(fp)
- bgequ noname.193
- addl2 #65536,-608(fp)
-noname.193:
- movzwl -594(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-608(fp)
- bicl3 #-65536,-596(fp),r0
- ashl #16,r0,-600(fp)
- addl3 -600(fp),-604(fp),r0
- bicl3 #0,r0,-604(fp)
- cmpl -604(fp),-600(fp)
- bgequ noname.194
- incl -608(fp)
-noname.194:
- movl -604(fp),r1
- movl -608(fp),r2
- addl2 r1,r10
- bicl2 #0,r10
- cmpl r10,r1
- bgequ noname.195
- incl r2
-noname.195:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.196
- incl r8
-noname.196:
-
- movzwl 22(r6),r2
- bicl3 #-65536,12(r7),r3
- movzwl 14(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,20(r6),-620(fp)
- bicl3 #-65536,r2,-624(fp)
- mull3 r0,-620(fp),-612(fp)
- mull2 r3,-620(fp)
- mull3 r3,-624(fp),-616(fp)
- mull2 r0,-624(fp)
- addl3 -612(fp),-616(fp),r0
- bicl3 #0,r0,-612(fp)
- cmpl -612(fp),-616(fp)
- bgequ noname.197
- addl2 #65536,-624(fp)
-noname.197:
- movzwl -610(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-624(fp)
- bicl3 #-65536,-612(fp),r0
- ashl #16,r0,-616(fp)
- addl3 -616(fp),-620(fp),r0
- bicl3 #0,r0,-620(fp)
- cmpl -620(fp),-616(fp)
- bgequ noname.198
- incl -624(fp)
-noname.198:
- movl -620(fp),r1
- movl -624(fp),r2
- addl2 r1,r10
- bicl2 #0,r10
- cmpl r10,r1
- bgequ noname.199
- incl r2
-noname.199:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.200
- incl r8
-noname.200:
-
- movzwl 18(r6),r2
- bicl3 #-65536,16(r7),r3
- movzwl 18(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,16(r6),-636(fp)
- bicl3 #-65536,r2,-640(fp)
- mull3 r0,-636(fp),-628(fp)
- mull2 r3,-636(fp)
- mull3 r3,-640(fp),-632(fp)
- mull2 r0,-640(fp)
- addl3 -628(fp),-632(fp),r0
- bicl3 #0,r0,-628(fp)
- cmpl -628(fp),-632(fp)
- bgequ noname.201
- addl2 #65536,-640(fp)
-noname.201:
- movzwl -626(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-640(fp)
- bicl3 #-65536,-628(fp),r0
- ashl #16,r0,-632(fp)
- addl3 -632(fp),-636(fp),r0
- bicl3 #0,r0,-636(fp)
- cmpl -636(fp),-632(fp)
- bgequ noname.202
- incl -640(fp)
-noname.202:
- movl -636(fp),r1
- movl -640(fp),r2
- addl2 r1,r10
- bicl2 #0,r10
- cmpl r10,r1
- bgequ noname.203
- incl r2
-noname.203:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.204
- incl r8
-noname.204:
-
- movzwl 14(r6),r2
- bicl3 #-65536,20(r7),r3
- movzwl 22(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,12(r6),-652(fp)
- bicl3 #-65536,r2,-656(fp)
- mull3 r0,-652(fp),-644(fp)
- mull2 r3,-652(fp)
- mull3 r3,-656(fp),-648(fp)
- mull2 r0,-656(fp)
- addl3 -644(fp),-648(fp),r0
- bicl3 #0,r0,-644(fp)
- cmpl -644(fp),-648(fp)
- bgequ noname.205
- addl2 #65536,-656(fp)
-noname.205:
- movzwl -642(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-656(fp)
- bicl3 #-65536,-644(fp),r0
- ashl #16,r0,-648(fp)
- addl3 -648(fp),-652(fp),r0
- bicl3 #0,r0,-652(fp)
- cmpl -652(fp),-648(fp)
- bgequ noname.206
- incl -656(fp)
-noname.206:
- movl -652(fp),r1
- movl -656(fp),r2
- addl2 r1,r10
- bicl2 #0,r10
- cmpl r10,r1
- bgequ noname.207
- incl r2
-noname.207:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.208
- incl r8
-noname.208:
-
- movzwl 10(r6),r2
- bicl3 #-65536,24(r7),r3
- movzwl 26(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,8(r6),-668(fp)
- bicl3 #-65536,r2,-672(fp)
- mull3 r0,-668(fp),-660(fp)
- mull2 r3,-668(fp)
- mull3 r3,-672(fp),-664(fp)
- mull2 r0,-672(fp)
- addl3 -660(fp),-664(fp),r0
- bicl3 #0,r0,-660(fp)
- cmpl -660(fp),-664(fp)
- bgequ noname.209
- addl2 #65536,-672(fp)
-noname.209:
- movzwl -658(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-672(fp)
- bicl3 #-65536,-660(fp),r0
- ashl #16,r0,-664(fp)
- addl3 -664(fp),-668(fp),r0
- bicl3 #0,r0,-668(fp)
- cmpl -668(fp),-664(fp)
- bgequ noname.210
- incl -672(fp)
-noname.210:
- movl -668(fp),r1
- movl -672(fp),r2
- addl2 r1,r10
- bicl2 #0,r10
- cmpl r10,r1
- bgequ noname.211
- incl r2
-noname.211:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.212
- incl r8
-noname.212:
-
- movzwl 6(r6),r2
- bicl3 #-65536,28(r7),r3
- movzwl 30(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,4(r6),-684(fp)
- bicl3 #-65536,r2,-688(fp)
- mull3 r0,-684(fp),-676(fp)
- mull2 r3,-684(fp)
- mull3 r3,-688(fp),-680(fp)
- mull2 r0,-688(fp)
- addl3 -676(fp),-680(fp),r0
- bicl3 #0,r0,-676(fp)
- cmpl -676(fp),-680(fp)
- bgequ noname.213
- addl2 #65536,-688(fp)
-noname.213:
- movzwl -674(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-688(fp)
- bicl3 #-65536,-676(fp),r0
- ashl #16,r0,-680(fp)
- addl3 -680(fp),-684(fp),r0
- bicl3 #0,r0,-684(fp)
- cmpl -684(fp),-680(fp)
- bgequ noname.214
- incl -688(fp)
-noname.214:
- movl -684(fp),r1
- movl -688(fp),r2
- addl2 r1,r10
- bicl2 #0,r10
- cmpl r10,r1
- bgequ noname.215
- incl r2
-noname.215:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.216
- incl r8
-noname.216:
-
- movl r10,32(r11)
-
- clrl r10
-
- movzwl 10(r6),r2
- bicl3 #-65536,28(r7),r3
- movzwl 30(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,8(r6),-700(fp)
- bicl3 #-65536,r2,-704(fp)
- mull3 r0,-700(fp),-692(fp)
- mull2 r3,-700(fp)
- mull3 r3,-704(fp),-696(fp)
- mull2 r0,-704(fp)
- addl3 -692(fp),-696(fp),r0
- bicl3 #0,r0,-692(fp)
- cmpl -692(fp),-696(fp)
- bgequ noname.217
- addl2 #65536,-704(fp)
-noname.217:
- movzwl -690(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-704(fp)
- bicl3 #-65536,-692(fp),r0
- ashl #16,r0,-696(fp)
- addl3 -696(fp),-700(fp),r0
- bicl3 #0,r0,-700(fp)
- cmpl -700(fp),-696(fp)
- bgequ noname.218
- incl -704(fp)
-noname.218:
- movl -700(fp),r1
- movl -704(fp),r2
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.219
- incl r2
-noname.219:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.220
- incl r10
-noname.220:
-
- movzwl 14(r6),r2
- bicl3 #-65536,24(r7),r3
- movzwl 26(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,12(r6),-716(fp)
- bicl3 #-65536,r2,-720(fp)
- mull3 r0,-716(fp),-708(fp)
- mull2 r3,-716(fp)
- mull3 r3,-720(fp),-712(fp)
- mull2 r0,-720(fp)
- addl3 -708(fp),-712(fp),r0
- bicl3 #0,r0,-708(fp)
- cmpl -708(fp),-712(fp)
- bgequ noname.221
- addl2 #65536,-720(fp)
-noname.221:
- movzwl -706(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-720(fp)
- bicl3 #-65536,-708(fp),r0
- ashl #16,r0,-712(fp)
- addl3 -712(fp),-716(fp),r0
- bicl3 #0,r0,-716(fp)
- cmpl -716(fp),-712(fp)
- bgequ noname.222
- incl -720(fp)
-noname.222:
- movl -716(fp),r1
- movl -720(fp),r2
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.223
- incl r2
-noname.223:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.224
- incl r10
-noname.224:
-
- movzwl 18(r6),r2
- bicl3 #-65536,20(r7),r3
- movzwl 22(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,16(r6),-732(fp)
- bicl3 #-65536,r2,-736(fp)
- mull3 r0,-732(fp),-724(fp)
- mull2 r3,-732(fp)
- mull3 r3,-736(fp),-728(fp)
- mull2 r0,-736(fp)
- addl3 -724(fp),-728(fp),r0
- bicl3 #0,r0,-724(fp)
- cmpl -724(fp),-728(fp)
- bgequ noname.225
- addl2 #65536,-736(fp)
-noname.225:
- movzwl -722(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-736(fp)
- bicl3 #-65536,-724(fp),r0
- ashl #16,r0,-728(fp)
- addl3 -728(fp),-732(fp),r0
- bicl3 #0,r0,-732(fp)
- cmpl -732(fp),-728(fp)
- bgequ noname.226
- incl -736(fp)
-noname.226:
- movl -732(fp),r1
- movl -736(fp),r2
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.227
- incl r2
-noname.227:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.228
- incl r10
-noname.228:
-
- movzwl 22(r6),r2
- bicl3 #-65536,16(r7),r3
- movzwl 18(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,20(r6),-748(fp)
- bicl3 #-65536,r2,-752(fp)
- mull3 r0,-748(fp),-740(fp)
- mull2 r3,-748(fp)
- mull3 r3,-752(fp),-744(fp)
- mull2 r0,-752(fp)
- addl3 -740(fp),-744(fp),r0
- bicl3 #0,r0,-740(fp)
- cmpl -740(fp),-744(fp)
- bgequ noname.229
- addl2 #65536,-752(fp)
-noname.229:
- movzwl -738(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-752(fp)
- bicl3 #-65536,-740(fp),r0
- ashl #16,r0,-744(fp)
- addl3 -744(fp),-748(fp),r0
- bicl3 #0,r0,-748(fp)
- cmpl -748(fp),-744(fp)
- bgequ noname.230
- incl -752(fp)
-noname.230:
- movl -748(fp),r1
- movl -752(fp),r2
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.231
- incl r2
-noname.231:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.232
- incl r10
-noname.232:
-
- movzwl 26(r6),r2
- bicl3 #-65536,12(r7),r3
- movzwl 14(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,24(r6),-764(fp)
- bicl3 #-65536,r2,-768(fp)
- mull3 r0,-764(fp),-756(fp)
- mull2 r3,-764(fp)
- mull3 r3,-768(fp),-760(fp)
- mull2 r0,-768(fp)
- addl3 -756(fp),-760(fp),r0
- bicl3 #0,r0,-756(fp)
- cmpl -756(fp),-760(fp)
- bgequ noname.233
- addl2 #65536,-768(fp)
-noname.233:
- movzwl -754(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-768(fp)
- bicl3 #-65536,-756(fp),r0
- ashl #16,r0,-760(fp)
- addl3 -760(fp),-764(fp),r0
- bicl3 #0,r0,-764(fp)
- cmpl -764(fp),-760(fp)
- bgequ noname.234
- incl -768(fp)
-noname.234:
- movl -764(fp),r1
- movl -768(fp),r2
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.235
- incl r2
-noname.235:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.236
- incl r10
-noname.236:
-
- bicl3 #-65536,28(r6),r3
- movzwl 30(r6),r1
- bicl2 #-65536,r1
- bicl3 #-65536,8(r7),r2
- movzwl 10(r7),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,-772(fp)
- mull2 r2,r5
- mull3 r2,r4,-776(fp)
- mull2 r0,r4
- addl3 -772(fp),-776(fp),r0
- bicl3 #0,r0,-772(fp)
- cmpl -772(fp),-776(fp)
- bgequ noname.237
- addl2 #65536,r4
-noname.237:
- movzwl -770(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-772(fp),r0
- ashl #16,r0,-776(fp)
- addl2 -776(fp),r5
- bicl2 #0,r5
- cmpl r5,-776(fp)
- bgequ noname.238
- incl r4
-noname.238:
- movl r5,r1
- movl r4,r2
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.239
- incl r2
-noname.239:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.240
- incl r10
-noname.240:
-
- movl r9,36(r11)
-
- clrl r9
-
- bicl3 #-65536,28(r6),r3
- movzwl 30(r6),r1
- bicl2 #-65536,r1
- bicl3 #-65536,12(r7),r2
- movzwl 14(r7),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,-780(fp)
- mull2 r2,r5
- mull3 r2,r4,-784(fp)
- mull2 r0,r4
- addl3 -780(fp),-784(fp),r0
- bicl3 #0,r0,-780(fp)
- cmpl -780(fp),-784(fp)
- bgequ noname.241
- addl2 #65536,r4
-noname.241:
- movzwl -778(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-780(fp),r0
- ashl #16,r0,-784(fp)
- addl2 -784(fp),r5
- bicl2 #0,r5
- cmpl r5,-784(fp)
- bgequ noname.242
- incl r4
-noname.242:
- movl r5,r1
- movl r4,r2
- addl2 r1,r8
- bicl2 #0,r8
- cmpl r8,r1
- bgequ noname.243
- incl r2
-noname.243:
- addl2 r2,r10
- bicl2 #0,r10
- cmpl r10,r2
- bgequ noname.244
- incl r9
-noname.244:
-
- bicl3 #-65536,24(r6),r3
- movzwl 26(r6),r1
- bicl2 #-65536,r1
- bicl3 #-65536,16(r7),r2
- movzwl 18(r7),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,-788(fp)
- mull2 r2,r5
- mull3 r2,r4,-792(fp)
- mull2 r0,r4
- addl3 -788(fp),-792(fp),r0
- bicl3 #0,r0,-788(fp)
- cmpl -788(fp),-792(fp)
- bgequ noname.245
- addl2 #65536,r4
-noname.245:
- movzwl -786(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-788(fp),r0
- ashl #16,r0,-792(fp)
- addl2 -792(fp),r5
- bicl2 #0,r5
- cmpl r5,-792(fp)
- bgequ noname.246
- incl r4
-noname.246:
- movl r5,r1
- movl r4,r2
- addl2 r1,r8
- bicl2 #0,r8
- cmpl r8,r1
- bgequ noname.247
- incl r2
-noname.247:
- addl2 r2,r10
- bicl2 #0,r10
- cmpl r10,r2
- bgequ noname.248
- incl r9
-noname.248:
-
- bicl3 #-65536,20(r6),r3
- movzwl 22(r6),r1
- bicl2 #-65536,r1
- bicl3 #-65536,20(r7),r2
- movzwl 22(r7),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,-796(fp)
- mull2 r2,r5
- mull3 r2,r4,-800(fp)
- mull2 r0,r4
- addl3 -796(fp),-800(fp),r0
- bicl3 #0,r0,-796(fp)
- cmpl -796(fp),-800(fp)
- bgequ noname.249
- addl2 #65536,r4
-noname.249:
- movzwl -794(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-796(fp),r0
- ashl #16,r0,-800(fp)
- addl2 -800(fp),r5
- bicl2 #0,r5
- cmpl r5,-800(fp)
- bgequ noname.250
- incl r4
-noname.250:
- movl r5,r1
- movl r4,r2
- addl2 r1,r8
- bicl2 #0,r8
- cmpl r8,r1
- bgequ noname.251
- incl r2
-noname.251:
- addl2 r2,r10
- bicl2 #0,r10
- cmpl r10,r2
- bgequ noname.252
- incl r9
-noname.252:
-
- bicl3 #-65536,16(r6),r3
- movzwl 18(r6),r1
- bicl2 #-65536,r1
- bicl3 #-65536,24(r7),r2
- movzwl 26(r7),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,-804(fp)
- mull2 r2,r5
- mull3 r2,r4,-808(fp)
- mull2 r0,r4
- addl3 -804(fp),-808(fp),r0
- bicl3 #0,r0,-804(fp)
- cmpl -804(fp),-808(fp)
- bgequ noname.253
- addl2 #65536,r4
-noname.253:
- movzwl -802(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-804(fp),r0
- ashl #16,r0,-808(fp)
- addl2 -808(fp),r5
- bicl2 #0,r5
- cmpl r5,-808(fp)
- bgequ noname.254
- incl r4
-noname.254:
- movl r5,r1
- movl r4,r2
- addl2 r1,r8
- bicl2 #0,r8
- cmpl r8,r1
- bgequ noname.255
- incl r2
-noname.255:
- addl2 r2,r10
- bicl2 #0,r10
- cmpl r10,r2
- bgequ noname.256
- incl r9
-noname.256:
-
- bicl3 #-65536,12(r6),r3
- movzwl 14(r6),r1
- bicl2 #-65536,r1
- bicl3 #-65536,28(r7),r2
- movzwl 30(r7),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,-812(fp)
- mull2 r2,r5
- mull3 r2,r4,-816(fp)
- mull2 r0,r4
- addl3 -812(fp),-816(fp),r0
- bicl3 #0,r0,-812(fp)
- cmpl -812(fp),-816(fp)
- bgequ noname.257
- addl2 #65536,r4
-noname.257:
- movzwl -810(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-812(fp),r0
- ashl #16,r0,-816(fp)
- addl2 -816(fp),r5
- bicl2 #0,r5
- cmpl r5,-816(fp)
- bgequ noname.258
- incl r4
-noname.258:
- movl r5,r1
- movl r4,r2
- addl2 r1,r8
- bicl2 #0,r8
- cmpl r8,r1
- bgequ noname.259
- incl r2
-noname.259:
- addl2 r2,r10
- bicl2 #0,r10
- cmpl r10,r2
- bgequ noname.260
- incl r9
-noname.260:
-
- movl r8,40(r11)
-
- clrl r8
-
- bicl3 #-65536,16(r6),r3
- movzwl 18(r6),r2
- bicl3 #-65536,28(r7),r1
- movzwl 30(r7),r0
- bicl2 #-65536,r0
- movl r3,r4
- bicl3 #-65536,r2,-828(fp)
- mull3 r0,r4,-820(fp)
- mull2 r1,r4
- mull3 r1,-828(fp),-824(fp)
- mull2 r0,-828(fp)
- addl3 -820(fp),-824(fp),r0
- bicl3 #0,r0,-820(fp)
- cmpl -820(fp),-824(fp)
- bgequ noname.261
- addl2 #65536,-828(fp)
-noname.261:
- movzwl -818(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-828(fp)
- bicl3 #-65536,-820(fp),r0
- ashl #16,r0,-824(fp)
- addl2 -824(fp),r4
- bicl2 #0,r4
- cmpl r4,-824(fp)
- bgequ noname.262
- incl -828(fp)
-noname.262:
- movl r4,r1
- movl -828(fp),r2
- addl2 r1,r10
- bicl2 #0,r10
- cmpl r10,r1
- bgequ noname.263
- incl r2
-noname.263:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.264
- incl r8
-noname.264:
-
- movzwl 22(r6),r2
- bicl3 #-65536,24(r7),r3
- movzwl 26(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,20(r6),-840(fp)
- bicl3 #-65536,r2,-844(fp)
- mull3 r0,-840(fp),-832(fp)
- mull2 r3,-840(fp)
- mull3 r3,-844(fp),-836(fp)
- mull2 r0,-844(fp)
- addl3 -832(fp),-836(fp),r0
- bicl3 #0,r0,-832(fp)
- cmpl -832(fp),-836(fp)
- bgequ noname.265
- addl2 #65536,-844(fp)
-noname.265:
- movzwl -830(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-844(fp)
- bicl3 #-65536,-832(fp),r0
- ashl #16,r0,-836(fp)
- addl3 -836(fp),-840(fp),r0
- bicl3 #0,r0,-840(fp)
- cmpl -840(fp),-836(fp)
- bgequ noname.266
- incl -844(fp)
-noname.266:
- movl -840(fp),r1
- movl -844(fp),r2
- addl2 r1,r10
- bicl2 #0,r10
- cmpl r10,r1
- bgequ noname.267
- incl r2
-noname.267:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.268
- incl r8
-noname.268:
-
- bicl3 #-65536,24(r6),r3
- movzwl 26(r6),r1
- bicl2 #-65536,r1
- bicl3 #-65536,20(r7),r2
- movzwl 22(r7),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,-848(fp)
- mull2 r2,r5
- mull3 r2,r4,-852(fp)
- mull2 r0,r4
- addl3 -848(fp),-852(fp),r0
- bicl3 #0,r0,-848(fp)
- cmpl -848(fp),-852(fp)
- bgequ noname.269
- addl2 #65536,r4
-noname.269:
- movzwl -846(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-848(fp),r0
- ashl #16,r0,-852(fp)
- addl2 -852(fp),r5
- bicl2 #0,r5
- cmpl r5,-852(fp)
- bgequ noname.270
- incl r4
-noname.270:
- movl r5,r1
- movl r4,r2
- addl2 r1,r10
- bicl2 #0,r10
- cmpl r10,r1
- bgequ noname.271
- incl r2
-noname.271:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.272
- incl r8
-noname.272:
-
- bicl3 #-65536,28(r6),r3
- movzwl 30(r6),r1
- bicl2 #-65536,r1
- bicl3 #-65536,16(r7),r2
- movzwl 18(r7),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,-856(fp)
- mull2 r2,r5
- mull3 r2,r4,-860(fp)
- mull2 r0,r4
- addl3 -856(fp),-860(fp),r0
- bicl3 #0,r0,-856(fp)
- cmpl -856(fp),-860(fp)
- bgequ noname.273
- addl2 #65536,r4
-noname.273:
- movzwl -854(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-856(fp),r0
- ashl #16,r0,-860(fp)
- addl2 -860(fp),r5
- bicl2 #0,r5
- cmpl r5,-860(fp)
- bgequ noname.274
- incl r4
-noname.274:
- movl r5,r1
- movl r4,r2
- addl2 r1,r10
- bicl2 #0,r10
- cmpl r10,r1
- bgequ noname.275
- incl r2
-noname.275:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.276
- incl r8
-noname.276:
-
- movl r10,44(r11)
-
- clrl r10
-
- bicl3 #-65536,28(r6),r3
- movzwl 30(r6),r1
- bicl2 #-65536,r1
- bicl3 #-65536,20(r7),r2
- movzwl 22(r7),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,-864(fp)
- mull2 r2,r5
- mull3 r2,r4,-868(fp)
- mull2 r0,r4
- addl3 -864(fp),-868(fp),r0
- bicl3 #0,r0,-864(fp)
- cmpl -864(fp),-868(fp)
- bgequ noname.277
- addl2 #65536,r4
-noname.277:
- movzwl -862(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-864(fp),r0
- ashl #16,r0,-868(fp)
- addl2 -868(fp),r5
- bicl2 #0,r5
- cmpl r5,-868(fp)
- bgequ noname.278
- incl r4
-noname.278:
- movl r5,r1
- movl r4,r2
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.279
- incl r2
-noname.279:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.280
- incl r10
-noname.280:
-
- bicl3 #-65536,24(r6),r3
- movzwl 26(r6),r1
- bicl2 #-65536,r1
- bicl3 #-65536,24(r7),r2
- movzwl 26(r7),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,-872(fp)
- mull2 r2,r5
- mull3 r2,r4,-876(fp)
- mull2 r0,r4
- addl3 -872(fp),-876(fp),r0
- bicl3 #0,r0,-872(fp)
- cmpl -872(fp),-876(fp)
- bgequ noname.281
- addl2 #65536,r4
-noname.281:
- movzwl -870(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-872(fp),r0
- ashl #16,r0,-876(fp)
- addl2 -876(fp),r5
- bicl2 #0,r5
- cmpl r5,-876(fp)
- bgequ noname.282
- incl r4
-noname.282:
- movl r5,r1
- movl r4,r2
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.283
- incl r2
-noname.283:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.284
- incl r10
-noname.284:
-
- bicl3 #-65536,20(r6),r3
- movzwl 22(r6),r1
- bicl2 #-65536,r1
- bicl3 #-65536,28(r7),r2
- movzwl 30(r7),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,-880(fp)
- mull2 r2,r5
- mull3 r2,r4,-884(fp)
- mull2 r0,r4
- addl3 -880(fp),-884(fp),r0
- bicl3 #0,r0,-880(fp)
- cmpl -880(fp),-884(fp)
- bgequ noname.285
- addl2 #65536,r4
-noname.285:
- movzwl -878(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-880(fp),r0
- ashl #16,r0,-884(fp)
- addl2 -884(fp),r5
- bicl2 #0,r5
- cmpl r5,-884(fp)
- bgequ noname.286
- incl r4
-noname.286:
- movl r5,r1
- movl r4,r2
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.287
- incl r2
-noname.287:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.288
- incl r10
-noname.288:
-
- movl r9,48(r11)
-
- clrl r9
-
- bicl3 #-65536,24(r6),r3
- movzwl 26(r6),r1
- bicl2 #-65536,r1
- bicl3 #-65536,28(r7),r2
- movzwl 30(r7),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,-888(fp)
- mull2 r2,r5
- mull3 r2,r4,-892(fp)
- mull2 r0,r4
- addl3 -888(fp),-892(fp),r0
- bicl3 #0,r0,-888(fp)
- cmpl -888(fp),-892(fp)
- bgequ noname.289
- addl2 #65536,r4
-noname.289:
- movzwl -886(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-888(fp),r0
- ashl #16,r0,-892(fp)
- addl2 -892(fp),r5
- bicl2 #0,r5
- cmpl r5,-892(fp)
- bgequ noname.290
- incl r4
-noname.290:
- movl r5,r1
- movl r4,r2
- addl2 r1,r8
- bicl2 #0,r8
- cmpl r8,r1
- bgequ noname.291
- incl r2
-noname.291:
- addl2 r2,r10
- bicl2 #0,r10
- cmpl r10,r2
- bgequ noname.292
- incl r9
-noname.292:
-
- movzwl 30(r6),r2
- bicl3 #-65536,24(r7),r3
- movzwl 26(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,28(r6),-904(fp)
- bicl3 #-65536,r2,-908(fp)
- mull3 r0,-904(fp),-896(fp)
- mull2 r3,-904(fp)
- mull3 r3,-908(fp),-900(fp)
- mull2 r0,-908(fp)
- addl3 -896(fp),-900(fp),r0
- bicl3 #0,r0,-896(fp)
- cmpl -896(fp),-900(fp)
- bgequ noname.293
- addl2 #65536,-908(fp)
-noname.293:
- movzwl -894(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-908(fp)
- bicl3 #-65536,-896(fp),r0
- ashl #16,r0,-900(fp)
- addl3 -900(fp),-904(fp),r0
- bicl3 #0,r0,-904(fp)
- cmpl -904(fp),-900(fp)
- bgequ noname.294
- incl -908(fp)
-noname.294:
- movl -904(fp),r1
- movl -908(fp),r2
- addl2 r1,r8
- bicl2 #0,r8
- cmpl r8,r1
- bgequ noname.295
- incl r2
-noname.295:
- addl2 r2,r10
- bicl2 #0,r10
- cmpl r10,r2
- bgequ noname.296
- incl r9
-noname.296:
-
- movl r8,52(r11)
-
- clrl r8
-
- movzwl 30(r6),r2
- bicl3 #-65536,28(r7),r3
- movzwl 30(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,28(r6),-920(fp)
- bicl3 #-65536,r2,-924(fp)
- mull3 r0,-920(fp),-912(fp)
- mull2 r3,-920(fp)
- mull3 r3,-924(fp),-916(fp)
- mull2 r0,-924(fp)
- addl3 -912(fp),-916(fp),r0
- bicl3 #0,r0,-912(fp)
- cmpl -912(fp),-916(fp)
- bgequ noname.297
- addl2 #65536,-924(fp)
-noname.297:
- movzwl -910(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-924(fp)
- bicl3 #-65536,-912(fp),r0
- ashl #16,r0,-916(fp)
- addl3 -916(fp),-920(fp),r0
- bicl3 #0,r0,-920(fp)
- cmpl -920(fp),-916(fp)
- bgequ noname.298
- incl -924(fp)
-noname.298:
- movl -920(fp),r1
- movl -924(fp),r2
- addl2 r1,r10
- bicl2 #0,r10
- cmpl r10,r1
- bgequ noname.299
- incl r2
-noname.299:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.300
- incl r8
-noname.300:
-
- movl r10,56(r11)
-
- movl r9,60(r11)
-
- ret
-
-
-
-;r=4 ;(AP)
-;a=8 ;(AP)
-;b=12 ;(AP)
-;n=16 ;(AP) n by value (input)
-
- .psect code,nowrt
-
-.entry BN_MUL_COMBA4,^m<r2,r3,r4,r5,r6,r7,r8,r9,r10,r11>
- movab -156(sp),sp
-
- clrq r9
-
- clrl r8
-
- movl 8(ap),r6
- bicl3 #-65536,(r6),r3
- movzwl 2(r6),r2
- bicl2 #-65536,r2
- movl 12(ap),r7
- bicl3 #-65536,(r7),r1
- movzwl 2(r7),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r2,r4
- mull3 r0,r5,-4(fp)
- mull2 r1,r5
- mull3 r1,r4,-8(fp)
- mull2 r0,r4
- addl3 -4(fp),-8(fp),r0
- bicl3 #0,r0,-4(fp)
- cmpl -4(fp),-8(fp)
- bgequ noname.303
- addl2 #65536,r4
-noname.303:
- movzwl -2(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-4(fp),r0
- ashl #16,r0,-8(fp)
- addl2 -8(fp),r5
- bicl2 #0,r5
- cmpl r5,-8(fp)
- bgequ noname.304
- incl r4
-noname.304:
- movl r5,r1
- movl r4,r2
- addl2 r1,r10
- bicl2 #0,r10
- cmpl r10,r1
- bgequ noname.305
- incl r2
-noname.305:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.306
- incl r8
-noname.306:
-
- movl 4(ap),r11
- movl r10,(r11)
-
- clrl r10
-
- bicl3 #-65536,(r6),r3
- movzwl 2(r6),r1
- bicl2 #-65536,r1
- bicl3 #-65536,4(r7),r2
- movzwl 6(r7),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,-12(fp)
- mull2 r2,r5
- mull3 r2,r4,-16(fp)
- mull2 r0,r4
- addl3 -12(fp),-16(fp),r0
- bicl3 #0,r0,-12(fp)
- cmpl -12(fp),-16(fp)
- bgequ noname.307
- addl2 #65536,r4
-noname.307:
- movzwl -10(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-12(fp),r0
- ashl #16,r0,-16(fp)
- addl2 -16(fp),r5
- bicl2 #0,r5
- cmpl r5,-16(fp)
- bgequ noname.308
- incl r4
-noname.308:
- movl r5,r1
- movl r4,r2
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.309
- incl r2
-noname.309:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.310
- incl r10
-noname.310:
-
- bicl3 #-65536,4(r6),r3
- movzwl 6(r6),r1
- bicl2 #-65536,r1
- bicl3 #-65536,(r7),r2
- movzwl 2(r7),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,-20(fp)
- mull2 r2,r5
- mull3 r2,r4,-24(fp)
- mull2 r0,r4
- addl3 -20(fp),-24(fp),r0
- bicl3 #0,r0,-20(fp)
- cmpl -20(fp),-24(fp)
- bgequ noname.311
- addl2 #65536,r4
-noname.311:
- movzwl -18(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-20(fp),r0
- ashl #16,r0,-24(fp)
- addl2 -24(fp),r5
- bicl2 #0,r5
- cmpl r5,-24(fp)
- bgequ noname.312
- incl r4
-noname.312:
- movl r5,r1
- movl r4,r2
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.313
- incl r2
-noname.313:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.314
- incl r10
-noname.314:
-
- movl r9,4(r11)
-
- clrl r9
-
- bicl3 #-65536,8(r6),r3
- movzwl 10(r6),r1
- bicl2 #-65536,r1
- bicl3 #-65536,(r7),r2
- movzwl 2(r7),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,-28(fp)
- mull2 r2,r5
- mull3 r2,r4,-32(fp)
- mull2 r0,r4
- addl3 -28(fp),-32(fp),r0
- bicl3 #0,r0,-28(fp)
- cmpl -28(fp),-32(fp)
- bgequ noname.315
- addl2 #65536,r4
-noname.315:
- movzwl -26(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-28(fp),r0
- ashl #16,r0,-32(fp)
- addl2 -32(fp),r5
- bicl2 #0,r5
- cmpl r5,-32(fp)
- bgequ noname.316
- incl r4
-noname.316:
- movl r5,r1
- movl r4,r2
- addl2 r1,r8
- bicl2 #0,r8
- cmpl r8,r1
- bgequ noname.317
- incl r2
-noname.317:
- addl2 r2,r10
- bicl2 #0,r10
- cmpl r10,r2
- bgequ noname.318
- incl r9
-noname.318:
-
- bicl3 #-65536,4(r6),r3
- movzwl 6(r6),r1
- bicl2 #-65536,r1
- bicl3 #-65536,4(r7),r2
- movzwl 6(r7),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,-36(fp)
- mull2 r2,r5
- mull3 r2,r4,-40(fp)
- mull2 r0,r4
- addl3 -36(fp),-40(fp),r0
- bicl3 #0,r0,-36(fp)
- cmpl -36(fp),-40(fp)
- bgequ noname.319
- addl2 #65536,r4
-noname.319:
- movzwl -34(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-36(fp),r0
- ashl #16,r0,-40(fp)
- addl2 -40(fp),r5
- bicl2 #0,r5
- cmpl r5,-40(fp)
- bgequ noname.320
- incl r4
-noname.320:
- movl r5,r1
- movl r4,r2
- addl2 r1,r8
- bicl2 #0,r8
- cmpl r8,r1
- bgequ noname.321
- incl r2
-noname.321:
- addl2 r2,r10
- bicl2 #0,r10
- cmpl r10,r2
- bgequ noname.322
- incl r9
-noname.322:
-
- bicl3 #-65536,(r6),r3
- movzwl 2(r6),r1
- bicl2 #-65536,r1
- bicl3 #-65536,8(r7),r2
- movzwl 10(r7),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,-44(fp)
- mull2 r2,r5
- mull3 r2,r4,-48(fp)
- mull2 r0,r4
- addl3 -44(fp),-48(fp),r0
- bicl3 #0,r0,-44(fp)
- cmpl -44(fp),-48(fp)
- bgequ noname.323
- addl2 #65536,r4
-noname.323:
- movzwl -42(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-44(fp),r0
- ashl #16,r0,-48(fp)
- addl2 -48(fp),r5
- bicl2 #0,r5
- cmpl r5,-48(fp)
- bgequ noname.324
- incl r4
-noname.324:
- movl r5,r1
- movl r4,r2
- addl2 r1,r8
- bicl2 #0,r8
- cmpl r8,r1
- bgequ noname.325
- incl r2
-noname.325:
- addl2 r2,r10
- bicl2 #0,r10
- cmpl r10,r2
- bgequ noname.326
- incl r9
-noname.326:
-
- movl r8,8(r11)
-
- clrl r8
-
- bicl3 #-65536,(r6),r3
- movzwl 2(r6),r2
- bicl3 #-65536,12(r7),r1
- movzwl 14(r7),r0
- bicl2 #-65536,r0
- movl r3,r4
- bicl3 #-65536,r2,-60(fp)
- mull3 r0,r4,-52(fp)
- mull2 r1,r4
- mull3 r1,-60(fp),-56(fp)
- mull2 r0,-60(fp)
- addl3 -52(fp),-56(fp),r0
- bicl3 #0,r0,-52(fp)
- cmpl -52(fp),-56(fp)
- bgequ noname.327
- addl2 #65536,-60(fp)
-noname.327:
- movzwl -50(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-60(fp)
- bicl3 #-65536,-52(fp),r0
- ashl #16,r0,-56(fp)
- addl2 -56(fp),r4
- bicl2 #0,r4
- cmpl r4,-56(fp)
- bgequ noname.328
- incl -60(fp)
-noname.328:
- movl r4,r1
- movl -60(fp),r2
- addl2 r1,r10
- bicl2 #0,r10
- cmpl r10,r1
- bgequ noname.329
- incl r2
-noname.329:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.330
- incl r8
-noname.330:
-
- movzwl 6(r6),r2
- bicl3 #-65536,8(r7),r3
- movzwl 10(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,4(r6),-72(fp)
- bicl3 #-65536,r2,-76(fp)
- mull3 r0,-72(fp),-64(fp)
- mull2 r3,-72(fp)
- mull3 r3,-76(fp),-68(fp)
- mull2 r0,-76(fp)
- addl3 -64(fp),-68(fp),r0
- bicl3 #0,r0,-64(fp)
- cmpl -64(fp),-68(fp)
- bgequ noname.331
- addl2 #65536,-76(fp)
-noname.331:
- movzwl -62(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-76(fp)
- bicl3 #-65536,-64(fp),r0
- ashl #16,r0,-68(fp)
- addl3 -68(fp),-72(fp),r0
- bicl3 #0,r0,-72(fp)
- cmpl -72(fp),-68(fp)
- bgequ noname.332
- incl -76(fp)
-noname.332:
- movl -72(fp),r1
- movl -76(fp),r2
- addl2 r1,r10
- bicl2 #0,r10
- cmpl r10,r1
- bgequ noname.333
- incl r2
-noname.333:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.334
- incl r8
-noname.334:
-
- bicl3 #-65536,8(r6),r3
- movzwl 10(r6),r1
- bicl2 #-65536,r1
- bicl3 #-65536,4(r7),r2
- movzwl 6(r7),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,-80(fp)
- mull2 r2,r5
- mull3 r2,r4,-84(fp)
- mull2 r0,r4
- addl3 -80(fp),-84(fp),r0
- bicl3 #0,r0,-80(fp)
- cmpl -80(fp),-84(fp)
- bgequ noname.335
- addl2 #65536,r4
-noname.335:
- movzwl -78(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-80(fp),r0
- ashl #16,r0,-84(fp)
- addl2 -84(fp),r5
- bicl2 #0,r5
- cmpl r5,-84(fp)
- bgequ noname.336
- incl r4
-noname.336:
- movl r5,r1
- movl r4,r2
- addl2 r1,r10
- bicl2 #0,r10
- cmpl r10,r1
- bgequ noname.337
- incl r2
-noname.337:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.338
- incl r8
-noname.338:
-
- bicl3 #-65536,12(r6),r3
- movzwl 14(r6),r1
- bicl2 #-65536,r1
- bicl3 #-65536,(r7),r2
- movzwl 2(r7),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,-88(fp)
- mull2 r2,r5
- mull3 r2,r4,-92(fp)
- mull2 r0,r4
- addl3 -88(fp),-92(fp),r0
- bicl3 #0,r0,-88(fp)
- cmpl -88(fp),-92(fp)
- bgequ noname.339
- addl2 #65536,r4
-noname.339:
- movzwl -86(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-88(fp),r0
- ashl #16,r0,-92(fp)
- addl2 -92(fp),r5
- bicl2 #0,r5
- cmpl r5,-92(fp)
- bgequ noname.340
- incl r4
-noname.340:
- movl r5,r1
- movl r4,r2
- addl2 r1,r10
- bicl2 #0,r10
- cmpl r10,r1
- bgequ noname.341
- incl r2
-noname.341:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.342
- incl r8
-noname.342:
-
- movl r10,12(r11)
-
- clrl r10
-
- bicl3 #-65536,12(r6),r3
- movzwl 14(r6),r1
- bicl2 #-65536,r1
- bicl3 #-65536,4(r7),r2
- movzwl 6(r7),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,-96(fp)
- mull2 r2,r5
- mull3 r2,r4,-100(fp)
- mull2 r0,r4
- addl3 -96(fp),-100(fp),r0
- bicl3 #0,r0,-96(fp)
- cmpl -96(fp),-100(fp)
- bgequ noname.343
- addl2 #65536,r4
-noname.343:
- movzwl -94(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-96(fp),r0
- ashl #16,r0,-100(fp)
- addl2 -100(fp),r5
- bicl2 #0,r5
- cmpl r5,-100(fp)
- bgequ noname.344
- incl r4
-noname.344:
- movl r5,r1
- movl r4,r2
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.345
- incl r2
-noname.345:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.346
- incl r10
-noname.346:
-
- bicl3 #-65536,8(r6),r3
- movzwl 10(r6),r1
- bicl2 #-65536,r1
- bicl3 #-65536,8(r7),r2
- movzwl 10(r7),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,-104(fp)
- mull2 r2,r5
- mull3 r2,r4,-108(fp)
- mull2 r0,r4
- addl3 -104(fp),-108(fp),r0
- bicl3 #0,r0,-104(fp)
- cmpl -104(fp),-108(fp)
- bgequ noname.347
- addl2 #65536,r4
-noname.347:
- movzwl -102(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-104(fp),r0
- ashl #16,r0,-108(fp)
- addl2 -108(fp),r5
- bicl2 #0,r5
- cmpl r5,-108(fp)
- bgequ noname.348
- incl r4
-noname.348:
- movl r5,r1
- movl r4,r2
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.349
- incl r2
-noname.349:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.350
- incl r10
-noname.350:
-
- bicl3 #-65536,4(r6),r3
- movzwl 6(r6),r1
- bicl2 #-65536,r1
- bicl3 #-65536,12(r7),r2
- movzwl 14(r7),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,-112(fp)
- mull2 r2,r5
- mull3 r2,r4,-116(fp)
- mull2 r0,r4
- addl3 -112(fp),-116(fp),r0
- bicl3 #0,r0,-112(fp)
- cmpl -112(fp),-116(fp)
- bgequ noname.351
- addl2 #65536,r4
-noname.351:
- movzwl -110(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-112(fp),r0
- ashl #16,r0,-116(fp)
- addl2 -116(fp),r5
- bicl2 #0,r5
- cmpl r5,-116(fp)
- bgequ noname.352
- incl r4
-noname.352:
- movl r5,r1
- movl r4,r2
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.353
- incl r2
-noname.353:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.354
- incl r10
-noname.354:
-
- movl r9,16(r11)
-
- clrl r9
-
- bicl3 #-65536,8(r6),r3
- movzwl 10(r6),r1
- bicl2 #-65536,r1
- bicl3 #-65536,12(r7),r2
- movzwl 14(r7),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,-120(fp)
- mull2 r2,r5
- mull3 r2,r4,-124(fp)
- mull2 r0,r4
- addl3 -120(fp),-124(fp),r0
- bicl3 #0,r0,-120(fp)
- cmpl -120(fp),-124(fp)
- bgequ noname.355
- addl2 #65536,r4
-noname.355:
- movzwl -118(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-120(fp),r0
- ashl #16,r0,-124(fp)
- addl2 -124(fp),r5
- bicl2 #0,r5
- cmpl r5,-124(fp)
- bgequ noname.356
- incl r4
-noname.356:
- movl r5,r1
- movl r4,r2
- addl2 r1,r8
- bicl2 #0,r8
- cmpl r8,r1
- bgequ noname.357
- incl r2
-noname.357:
- addl2 r2,r10
- bicl2 #0,r10
- cmpl r10,r2
- bgequ noname.358
- incl r9
-noname.358:
-
- movzwl 14(r6),r2
- bicl3 #-65536,8(r7),r3
- movzwl 10(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,12(r6),-136(fp)
- bicl3 #-65536,r2,-140(fp)
- mull3 r0,-136(fp),-128(fp)
- mull2 r3,-136(fp)
- mull3 r3,-140(fp),-132(fp)
- mull2 r0,-140(fp)
- addl3 -128(fp),-132(fp),r0
- bicl3 #0,r0,-128(fp)
- cmpl -128(fp),-132(fp)
- bgequ noname.359
- addl2 #65536,-140(fp)
-noname.359:
- movzwl -126(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-140(fp)
- bicl3 #-65536,-128(fp),r0
- ashl #16,r0,-132(fp)
- addl3 -132(fp),-136(fp),r0
- bicl3 #0,r0,-136(fp)
- cmpl -136(fp),-132(fp)
- bgequ noname.360
- incl -140(fp)
-noname.360:
- movl -136(fp),r1
- movl -140(fp),r2
- addl2 r1,r8
- bicl2 #0,r8
- cmpl r8,r1
- bgequ noname.361
- incl r2
-noname.361:
- addl2 r2,r10
- bicl2 #0,r10
- cmpl r10,r2
- bgequ noname.362
- incl r9
-noname.362:
-
- movl r8,20(r11)
-
- clrl r8
-
- movzwl 14(r6),r2
- bicl3 #-65536,12(r7),r3
- movzwl 14(r7),r0
- bicl2 #-65536,r0
- bicl3 #-65536,12(r6),-152(fp)
- bicl3 #-65536,r2,-156(fp)
- mull3 r0,-152(fp),-144(fp)
- mull2 r3,-152(fp)
- mull3 r3,-156(fp),-148(fp)
- mull2 r0,-156(fp)
- addl3 -144(fp),-148(fp),r0
- bicl3 #0,r0,-144(fp)
- cmpl -144(fp),-148(fp)
- bgequ noname.363
- addl2 #65536,-156(fp)
-noname.363:
- movzwl -142(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-156(fp)
- bicl3 #-65536,-144(fp),r0
- ashl #16,r0,-148(fp)
- addl3 -148(fp),-152(fp),r0
- bicl3 #0,r0,-152(fp)
- cmpl -152(fp),-148(fp)
- bgequ noname.364
- incl -156(fp)
-noname.364:
- movl -152(fp),r1
- movl -156(fp),r2
- addl2 r1,r10
- bicl2 #0,r10
- cmpl r10,r1
- bgequ noname.365
- incl r2
-noname.365:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.366
- incl r8
-noname.366:
-
- movl r10,24(r11)
-
- movl r9,28(r11)
-
- ret
-
-
-
-;r=4 ;(AP)
-;a=8 ;(AP)
-;b=12 ;(AP)
-;n=16 ;(AP) n by value (input)
-
- .psect code,nowrt
-
-.entry BN_SQR_COMBA8,^m<r2,r3,r4,r5,r6,r7,r8,r9>
- movab -444(sp),sp
-
- clrq r8
-
- clrl r7
-
- movl 8(ap),r4
- movl (r4),r3
- bicl3 #-65536,r3,-4(fp)
- extzv #16,#16,r3,r0
- bicl3 #-65536,r0,r3
- movl -4(fp),r0
- mull3 r0,r3,-8(fp)
- mull3 r0,r0,-4(fp)
- mull2 r3,r3
- bicl3 #32767,-8(fp),r0
- extzv #15,#17,r0,r0
- addl2 r0,r3
- bicl3 #-65536,-8(fp),r0
- ashl #17,r0,-8(fp)
- addl3 -4(fp),-8(fp),r0
- bicl3 #0,r0,-4(fp)
- cmpl -4(fp),-8(fp)
- bgequ noname.369
- incl r3
-noname.369:
- movl -4(fp),r1
- movl r3,r2
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.370
- incl r2
-noname.370:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.371
- incl r7
-noname.371:
-
- movl r9,@4(ap)
-
- clrl r9
-
- movzwl 6(r4),r2
- bicl3 #-65536,(r4),r3
- movzwl 2(r4),r0
- bicl2 #-65536,r0
- bicl3 #-65536,4(r4),-20(fp)
- bicl3 #-65536,r2,-24(fp)
- mull3 r0,-20(fp),-12(fp)
- mull2 r3,-20(fp)
- mull3 r3,-24(fp),-16(fp)
- mull2 r0,-24(fp)
- addl3 -12(fp),-16(fp),r0
- bicl3 #0,r0,-12(fp)
- cmpl -12(fp),-16(fp)
- bgequ noname.372
- addl2 #65536,-24(fp)
-noname.372:
- movzwl -10(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-24(fp)
- bicl3 #-65536,-12(fp),r0
- ashl #16,r0,-16(fp)
- addl3 -16(fp),-20(fp),r0
- bicl3 #0,r0,-20(fp)
- cmpl -20(fp),-16(fp)
- bgequ noname.373
- incl -24(fp)
-noname.373:
- movl -20(fp),r3
- movl -24(fp),r2
- bbc #31,r2,noname.374
- incl r9
-noname.374:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.375
- incl r2
-noname.375:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r8
- bicl2 #0,r8
- cmpl r8,r3
- bgequ noname.376
- incl r2
- bicl3 #0,r2,r0
- bneq noname.376
- incl r9
-noname.376:
- addl2 r2,r7
- bicl2 #0,r7
- cmpl r7,r2
- bgequ noname.377
- incl r9
-noname.377:
-
- movl 4(ap),r0
- movl r8,4(r0)
-
- clrl r8
-
- movl 8(ap),r4
- movl 4(r4),r3
- bicl3 #-65536,r3,-28(fp)
- extzv #16,#16,r3,r0
- bicl3 #-65536,r0,r3
- movl -28(fp),r0
- mull3 r0,r3,-32(fp)
- mull3 r0,r0,-28(fp)
- mull2 r3,r3
- bicl3 #32767,-32(fp),r0
- extzv #15,#17,r0,r0
- addl2 r0,r3
- bicl3 #-65536,-32(fp),r0
- ashl #17,r0,-32(fp)
- addl3 -28(fp),-32(fp),r0
- bicl3 #0,r0,-28(fp)
- cmpl -28(fp),-32(fp)
- bgequ noname.378
- incl r3
-noname.378:
- movl -28(fp),r1
- movl r3,r2
- addl2 r1,r7
- bicl2 #0,r7
- cmpl r7,r1
- bgequ noname.379
- incl r2
-noname.379:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.380
- incl r8
-noname.380:
-
- movzwl 10(r4),r2
- bicl3 #-65536,(r4),r3
- movzwl 2(r4),r0
- bicl2 #-65536,r0
- bicl3 #-65536,8(r4),-44(fp)
- bicl3 #-65536,r2,-48(fp)
- mull3 r0,-44(fp),-36(fp)
- mull2 r3,-44(fp)
- mull3 r3,-48(fp),-40(fp)
- mull2 r0,-48(fp)
- addl3 -36(fp),-40(fp),r0
- bicl3 #0,r0,-36(fp)
- cmpl -36(fp),-40(fp)
- bgequ noname.381
- addl2 #65536,-48(fp)
-noname.381:
- movzwl -34(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-48(fp)
- bicl3 #-65536,-36(fp),r0
- ashl #16,r0,-40(fp)
- addl3 -40(fp),-44(fp),r0
- bicl3 #0,r0,-44(fp)
- cmpl -44(fp),-40(fp)
- bgequ noname.382
- incl -48(fp)
-noname.382:
- movl -44(fp),r3
- movl -48(fp),r2
- bbc #31,r2,noname.383
- incl r8
-noname.383:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.384
- incl r2
-noname.384:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r7
- bicl2 #0,r7
- cmpl r7,r3
- bgequ noname.385
- incl r2
- bicl3 #0,r2,r0
- bneq noname.385
- incl r8
-noname.385:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.386
- incl r8
-noname.386:
-
- movl 4(ap),r0
- movl r7,8(r0)
-
- clrl r7
-
- movl 8(ap),r0
- movzwl 14(r0),r2
- bicl3 #-65536,(r0),r3
- movzwl 2(r0),r1
- bicl2 #-65536,r1
- bicl3 #-65536,12(r0),-60(fp)
- bicl3 #-65536,r2,-64(fp)
- mull3 r1,-60(fp),-52(fp)
- mull2 r3,-60(fp)
- mull3 r3,-64(fp),-56(fp)
- mull2 r1,-64(fp)
- addl3 -52(fp),-56(fp),r0
- bicl3 #0,r0,-52(fp)
- cmpl -52(fp),-56(fp)
- bgequ noname.387
- addl2 #65536,-64(fp)
-noname.387:
- movzwl -50(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-64(fp)
- bicl3 #-65536,-52(fp),r0
- ashl #16,r0,-56(fp)
- addl3 -56(fp),-60(fp),r0
- bicl3 #0,r0,-60(fp)
- cmpl -60(fp),-56(fp)
- bgequ noname.388
- incl -64(fp)
-noname.388:
- movl -60(fp),r3
- movl -64(fp),r2
- bbc #31,r2,noname.389
- incl r7
-noname.389:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.390
- incl r2
-noname.390:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r9
- bicl2 #0,r9
- cmpl r9,r3
- bgequ noname.391
- incl r2
- bicl3 #0,r2,r0
- bneq noname.391
- incl r7
-noname.391:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.392
- incl r7
-noname.392:
-
- movl 8(ap),r0
- movzwl 10(r0),r2
- bicl3 #-65536,4(r0),r3
- movzwl 6(r0),r1
- bicl2 #-65536,r1
- bicl3 #-65536,8(r0),-76(fp)
- bicl3 #-65536,r2,-80(fp)
- mull3 r1,-76(fp),-68(fp)
- mull2 r3,-76(fp)
- mull3 r3,-80(fp),-72(fp)
- mull2 r1,-80(fp)
- addl3 -68(fp),-72(fp),r0
- bicl3 #0,r0,-68(fp)
- cmpl -68(fp),-72(fp)
- bgequ noname.393
- addl2 #65536,-80(fp)
-noname.393:
- movzwl -66(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-80(fp)
- bicl3 #-65536,-68(fp),r0
- ashl #16,r0,-72(fp)
- addl3 -72(fp),-76(fp),r0
- bicl3 #0,r0,-76(fp)
- cmpl -76(fp),-72(fp)
- bgequ noname.394
- incl -80(fp)
-noname.394:
- movl -76(fp),r3
- movl -80(fp),r2
- bbc #31,r2,noname.395
- incl r7
-noname.395:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.396
- incl r2
-noname.396:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r9
- bicl2 #0,r9
- cmpl r9,r3
- bgequ noname.397
- incl r2
- bicl3 #0,r2,r0
- bneq noname.397
- incl r7
-noname.397:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.398
- incl r7
-noname.398:
-
- movl 4(ap),r0
- movl r9,12(r0)
-
- clrl r9
-
- movl 8(ap),r2
- movl 8(r2),r4
- bicl3 #-65536,r4,-84(fp)
- extzv #16,#16,r4,r0
- bicl3 #-65536,r0,r4
- movl -84(fp),r0
- mull3 r0,r4,-88(fp)
- mull3 r0,r0,-84(fp)
- mull2 r4,r4
- bicl3 #32767,-88(fp),r0
- extzv #15,#17,r0,r0
- addl2 r0,r4
- bicl3 #-65536,-88(fp),r0
- ashl #17,r0,-88(fp)
- addl3 -84(fp),-88(fp),r0
- bicl3 #0,r0,-84(fp)
- cmpl -84(fp),-88(fp)
- bgequ noname.399
- incl r4
-noname.399:
- movl -84(fp),r1
- movl r4,r3
- addl2 r1,r8
- bicl2 #0,r8
- cmpl r8,r1
- bgequ noname.400
- incl r3
-noname.400:
- addl2 r3,r7
- bicl2 #0,r7
- cmpl r7,r3
- bgequ noname.401
- incl r9
-noname.401:
-
- movzwl 14(r2),r3
- bicl3 #-65536,4(r2),r1
- movzwl 6(r2),r0
- bicl2 #-65536,r0
- bicl3 #-65536,12(r2),-100(fp)
- bicl3 #-65536,r3,-104(fp)
- mull3 r0,-100(fp),-92(fp)
- mull2 r1,-100(fp)
- mull3 r1,-104(fp),-96(fp)
- mull2 r0,-104(fp)
- addl3 -92(fp),-96(fp),r0
- bicl3 #0,r0,-92(fp)
- cmpl -92(fp),-96(fp)
- bgequ noname.402
- addl2 #65536,-104(fp)
-noname.402:
- movzwl -90(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-104(fp)
- bicl3 #-65536,-92(fp),r0
- ashl #16,r0,-96(fp)
- addl3 -96(fp),-100(fp),r0
- bicl3 #0,r0,-100(fp)
- cmpl -100(fp),-96(fp)
- bgequ noname.403
- incl -104(fp)
-noname.403:
- movl -100(fp),r3
- movl -104(fp),r2
- bbc #31,r2,noname.404
- incl r9
-noname.404:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.405
- incl r2
-noname.405:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r8
- bicl2 #0,r8
- cmpl r8,r3
- bgequ noname.406
- incl r2
- bicl3 #0,r2,r0
- bneq noname.406
- incl r9
-noname.406:
- addl2 r2,r7
- bicl2 #0,r7
- cmpl r7,r2
- bgequ noname.407
- incl r9
-noname.407:
-
- movl 8(ap),r0
- movzwl 18(r0),r2
- bicl3 #-65536,(r0),r3
- movzwl 2(r0),r1
- bicl2 #-65536,r1
- bicl3 #-65536,16(r0),-116(fp)
- bicl3 #-65536,r2,-120(fp)
- mull3 r1,-116(fp),-108(fp)
- mull2 r3,-116(fp)
- mull3 r3,-120(fp),-112(fp)
- mull2 r1,-120(fp)
- addl3 -108(fp),-112(fp),r0
- bicl3 #0,r0,-108(fp)
- cmpl -108(fp),-112(fp)
- bgequ noname.408
- addl2 #65536,-120(fp)
-noname.408:
- movzwl -106(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-120(fp)
- bicl3 #-65536,-108(fp),r0
- ashl #16,r0,-112(fp)
- addl3 -112(fp),-116(fp),r0
- bicl3 #0,r0,-116(fp)
- cmpl -116(fp),-112(fp)
- bgequ noname.409
- incl -120(fp)
-noname.409:
- movl -116(fp),r3
- movl -120(fp),r2
- bbc #31,r2,noname.410
- incl r9
-noname.410:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.411
- incl r2
-noname.411:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r8
- bicl2 #0,r8
- cmpl r8,r3
- bgequ noname.412
- incl r2
- bicl3 #0,r2,r0
- bneq noname.412
- incl r9
-noname.412:
- addl2 r2,r7
- bicl2 #0,r7
- cmpl r7,r2
- bgequ noname.413
- incl r9
-noname.413:
-
- movl 4(ap),r0
- movl r8,16(r0)
-
- clrl r8
-
- movl 8(ap),r0
- movzwl 22(r0),r2
- bicl3 #-65536,(r0),r3
- movzwl 2(r0),r1
- bicl2 #-65536,r1
- bicl3 #-65536,20(r0),-132(fp)
- bicl3 #-65536,r2,-136(fp)
- mull3 r1,-132(fp),-124(fp)
- mull2 r3,-132(fp)
- mull3 r3,-136(fp),-128(fp)
- mull2 r1,-136(fp)
- addl3 -124(fp),-128(fp),r0
- bicl3 #0,r0,-124(fp)
- cmpl -124(fp),-128(fp)
- bgequ noname.414
- addl2 #65536,-136(fp)
-noname.414:
- movzwl -122(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-136(fp)
- bicl3 #-65536,-124(fp),r0
- ashl #16,r0,-128(fp)
- addl3 -128(fp),-132(fp),r0
- bicl3 #0,r0,-132(fp)
- cmpl -132(fp),-128(fp)
- bgequ noname.415
- incl -136(fp)
-noname.415:
- movl -132(fp),r3
- movl -136(fp),r2
- bbc #31,r2,noname.416
- incl r8
-noname.416:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.417
- incl r2
-noname.417:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r7
- bicl2 #0,r7
- cmpl r7,r3
- bgequ noname.418
- incl r2
- bicl3 #0,r2,r0
- bneq noname.418
- incl r8
-noname.418:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.419
- incl r8
-noname.419:
-
- movl 8(ap),r0
- movzwl 18(r0),r2
- bicl3 #-65536,4(r0),r3
- movzwl 6(r0),r1
- bicl2 #-65536,r1
- bicl3 #-65536,16(r0),-148(fp)
- bicl3 #-65536,r2,-152(fp)
- mull3 r1,-148(fp),-140(fp)
- mull2 r3,-148(fp)
- mull3 r3,-152(fp),-144(fp)
- mull2 r1,-152(fp)
- addl3 -140(fp),-144(fp),r0
- bicl3 #0,r0,-140(fp)
- cmpl -140(fp),-144(fp)
- bgequ noname.420
- addl2 #65536,-152(fp)
-noname.420:
- movzwl -138(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-152(fp)
- bicl3 #-65536,-140(fp),r0
- ashl #16,r0,-144(fp)
- addl3 -144(fp),-148(fp),r0
- bicl3 #0,r0,-148(fp)
- cmpl -148(fp),-144(fp)
- bgequ noname.421
- incl -152(fp)
-noname.421:
- movl -148(fp),r3
- movl -152(fp),r2
- bbc #31,r2,noname.422
- incl r8
-noname.422:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.423
- incl r2
-noname.423:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r7
- bicl2 #0,r7
- cmpl r7,r3
- bgequ noname.424
- incl r2
- bicl3 #0,r2,r0
- bneq noname.424
- incl r8
-noname.424:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.425
- incl r8
-noname.425:
-
- movl 8(ap),r0
- movzwl 14(r0),r2
- bicl3 #-65536,8(r0),r3
- movzwl 10(r0),r1
- bicl2 #-65536,r1
- bicl3 #-65536,12(r0),-164(fp)
- bicl3 #-65536,r2,-168(fp)
- mull3 r1,-164(fp),-156(fp)
- mull2 r3,-164(fp)
- mull3 r3,-168(fp),-160(fp)
- mull2 r1,-168(fp)
- addl3 -156(fp),-160(fp),r0
- bicl3 #0,r0,-156(fp)
- cmpl -156(fp),-160(fp)
- bgequ noname.426
- addl2 #65536,-168(fp)
-noname.426:
- movzwl -154(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-168(fp)
- bicl3 #-65536,-156(fp),r0
- ashl #16,r0,-160(fp)
- addl3 -160(fp),-164(fp),r0
- bicl3 #0,r0,-164(fp)
- cmpl -164(fp),-160(fp)
- bgequ noname.427
- incl -168(fp)
-noname.427:
- movl -164(fp),r3
- movl -168(fp),r2
- bbc #31,r2,noname.428
- incl r8
-noname.428:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.429
- incl r2
-noname.429:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r7
- bicl2 #0,r7
- cmpl r7,r3
- bgequ noname.430
- incl r2
- bicl3 #0,r2,r0
- bneq noname.430
- incl r8
-noname.430:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.431
- incl r8
-noname.431:
-
- movl 4(ap),r0
- movl r7,20(r0)
-
- clrl r7
-
- movl 8(ap),r2
- movl 12(r2),r4
- bicl3 #-65536,r4,-172(fp)
- extzv #16,#16,r4,r0
- bicl3 #-65536,r0,r4
- movl -172(fp),r0
- mull3 r0,r4,-176(fp)
- mull3 r0,r0,-172(fp)
- mull2 r4,r4
- bicl3 #32767,-176(fp),r0
- extzv #15,#17,r0,r0
- addl2 r0,r4
- bicl3 #-65536,-176(fp),r0
- ashl #17,r0,-176(fp)
- addl3 -172(fp),-176(fp),r0
- bicl3 #0,r0,-172(fp)
- cmpl -172(fp),-176(fp)
- bgequ noname.432
- incl r4
-noname.432:
- movl -172(fp),r1
- movl r4,r3
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.433
- incl r3
-noname.433:
- addl2 r3,r8
- bicl2 #0,r8
- cmpl r8,r3
- bgequ noname.434
- incl r7
-noname.434:
-
- movzwl 18(r2),r3
- bicl3 #-65536,8(r2),r1
- movzwl 10(r2),r0
- bicl2 #-65536,r0
- bicl3 #-65536,16(r2),-188(fp)
- bicl3 #-65536,r3,-192(fp)
- mull3 r0,-188(fp),-180(fp)
- mull2 r1,-188(fp)
- mull3 r1,-192(fp),-184(fp)
- mull2 r0,-192(fp)
- addl3 -180(fp),-184(fp),r0
- bicl3 #0,r0,-180(fp)
- cmpl -180(fp),-184(fp)
- bgequ noname.435
- addl2 #65536,-192(fp)
-noname.435:
- movzwl -178(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-192(fp)
- bicl3 #-65536,-180(fp),r0
- ashl #16,r0,-184(fp)
- addl3 -184(fp),-188(fp),r0
- bicl3 #0,r0,-188(fp)
- cmpl -188(fp),-184(fp)
- bgequ noname.436
- incl -192(fp)
-noname.436:
- movl -188(fp),r3
- movl -192(fp),r2
- bbc #31,r2,noname.437
- incl r7
-noname.437:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.438
- incl r2
-noname.438:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r9
- bicl2 #0,r9
- cmpl r9,r3
- bgequ noname.439
- incl r2
- bicl3 #0,r2,r0
- bneq noname.439
- incl r7
-noname.439:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.440
- incl r7
-noname.440:
-
- movl 8(ap),r0
- movzwl 22(r0),r2
- bicl3 #-65536,4(r0),r3
- movzwl 6(r0),r1
- bicl2 #-65536,r1
- bicl3 #-65536,20(r0),-204(fp)
- bicl3 #-65536,r2,-208(fp)
- mull3 r1,-204(fp),-196(fp)
- mull2 r3,-204(fp)
- mull3 r3,-208(fp),-200(fp)
- mull2 r1,-208(fp)
- addl3 -196(fp),-200(fp),r0
- bicl3 #0,r0,-196(fp)
- cmpl -196(fp),-200(fp)
- bgequ noname.441
- addl2 #65536,-208(fp)
-noname.441:
- movzwl -194(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-208(fp)
- bicl3 #-65536,-196(fp),r0
- ashl #16,r0,-200(fp)
- addl3 -200(fp),-204(fp),r0
- bicl3 #0,r0,-204(fp)
- cmpl -204(fp),-200(fp)
- bgequ noname.442
- incl -208(fp)
-noname.442:
- movl -204(fp),r3
- movl -208(fp),r2
- bbc #31,r2,noname.443
- incl r7
-noname.443:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.444
- incl r2
-noname.444:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r9
- bicl2 #0,r9
- cmpl r9,r3
- bgequ noname.445
- incl r2
- bicl3 #0,r2,r0
- bneq noname.445
- incl r7
-noname.445:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.446
- incl r7
-noname.446:
-
- movl 8(ap),r0
- movzwl 26(r0),r2
- bicl3 #-65536,(r0),r3
- movzwl 2(r0),r1
- bicl2 #-65536,r1
- bicl3 #-65536,24(r0),-220(fp)
- bicl3 #-65536,r2,-224(fp)
- mull3 r1,-220(fp),-212(fp)
- mull2 r3,-220(fp)
- mull3 r3,-224(fp),-216(fp)
- mull2 r1,-224(fp)
- addl3 -212(fp),-216(fp),r0
- bicl3 #0,r0,-212(fp)
- cmpl -212(fp),-216(fp)
- bgequ noname.447
- addl2 #65536,-224(fp)
-noname.447:
- movzwl -210(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-224(fp)
- bicl3 #-65536,-212(fp),r0
- ashl #16,r0,-216(fp)
- addl3 -216(fp),-220(fp),r0
- bicl3 #0,r0,-220(fp)
- cmpl -220(fp),-216(fp)
- bgequ noname.448
- incl -224(fp)
-noname.448:
- movl -220(fp),r3
- movl -224(fp),r2
- bbc #31,r2,noname.449
- incl r7
-noname.449:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.450
- incl r2
-noname.450:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r9
- bicl2 #0,r9
- cmpl r9,r3
- bgequ noname.451
- incl r2
- bicl3 #0,r2,r0
- bneq noname.451
- incl r7
-noname.451:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.452
- incl r7
-noname.452:
-
- movl 4(ap),r0
- movl r9,24(r0)
-
- clrl r9
-
- movl 8(ap),r0
- movzwl 30(r0),r2
- bicl3 #-65536,(r0),r3
- movzwl 2(r0),r1
- bicl2 #-65536,r1
- bicl3 #-65536,28(r0),-236(fp)
- bicl3 #-65536,r2,-240(fp)
- mull3 r1,-236(fp),-228(fp)
- mull2 r3,-236(fp)
- mull3 r3,-240(fp),-232(fp)
- mull2 r1,-240(fp)
- addl3 -228(fp),-232(fp),r0
- bicl3 #0,r0,-228(fp)
- cmpl -228(fp),-232(fp)
- bgequ noname.453
- addl2 #65536,-240(fp)
-noname.453:
- movzwl -226(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-240(fp)
- bicl3 #-65536,-228(fp),r0
- ashl #16,r0,-232(fp)
- addl3 -232(fp),-236(fp),r0
- bicl3 #0,r0,-236(fp)
- cmpl -236(fp),-232(fp)
- bgequ noname.454
- incl -240(fp)
-noname.454:
- movl -236(fp),r3
- movl -240(fp),r2
- bbc #31,r2,noname.455
- incl r9
-noname.455:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.456
- incl r2
-noname.456:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r8
- bicl2 #0,r8
- cmpl r8,r3
- bgequ noname.457
- incl r2
- bicl3 #0,r2,r0
- bneq noname.457
- incl r9
-noname.457:
- addl2 r2,r7
- bicl2 #0,r7
- cmpl r7,r2
- bgequ noname.458
- incl r9
-noname.458:
-
- movl 8(ap),r0
- movzwl 26(r0),r2
- bicl3 #-65536,4(r0),r3
- movzwl 6(r0),r1
- bicl2 #-65536,r1
- bicl3 #-65536,24(r0),-252(fp)
- bicl3 #-65536,r2,-256(fp)
- mull3 r1,-252(fp),-244(fp)
- mull2 r3,-252(fp)
- mull3 r3,-256(fp),-248(fp)
- mull2 r1,-256(fp)
- addl3 -244(fp),-248(fp),r0
- bicl3 #0,r0,-244(fp)
- cmpl -244(fp),-248(fp)
- bgequ noname.459
- addl2 #65536,-256(fp)
-noname.459:
- movzwl -242(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-256(fp)
- bicl3 #-65536,-244(fp),r0
- ashl #16,r0,-248(fp)
- addl3 -248(fp),-252(fp),r0
- bicl3 #0,r0,-252(fp)
- cmpl -252(fp),-248(fp)
- bgequ noname.460
- incl -256(fp)
-noname.460:
- movl -252(fp),r3
- movl -256(fp),r2
- bbc #31,r2,noname.461
- incl r9
-noname.461:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.462
- incl r2
-noname.462:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r8
- bicl2 #0,r8
- cmpl r8,r3
- bgequ noname.463
- incl r2
- bicl3 #0,r2,r0
- bneq noname.463
- incl r9
-noname.463:
- addl2 r2,r7
- bicl2 #0,r7
- cmpl r7,r2
- bgequ noname.464
- incl r9
-noname.464:
-
- movl 8(ap),r0
- movzwl 22(r0),r2
- bicl3 #-65536,8(r0),r3
- movzwl 10(r0),r1
- bicl2 #-65536,r1
- bicl3 #-65536,20(r0),-268(fp)
- bicl3 #-65536,r2,-272(fp)
- mull3 r1,-268(fp),-260(fp)
- mull2 r3,-268(fp)
- mull3 r3,-272(fp),-264(fp)
- mull2 r1,-272(fp)
- addl3 -260(fp),-264(fp),r0
- bicl3 #0,r0,-260(fp)
- cmpl -260(fp),-264(fp)
- bgequ noname.465
- addl2 #65536,-272(fp)
-noname.465:
- movzwl -258(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-272(fp)
- bicl3 #-65536,-260(fp),r0
- ashl #16,r0,-264(fp)
- addl3 -264(fp),-268(fp),r0
- bicl3 #0,r0,-268(fp)
- cmpl -268(fp),-264(fp)
- bgequ noname.466
- incl -272(fp)
-noname.466:
- movl -268(fp),r3
- movl -272(fp),r2
- bbc #31,r2,noname.467
- incl r9
-noname.467:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.468
- incl r2
-noname.468:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r8
- bicl2 #0,r8
- cmpl r8,r3
- bgequ noname.469
- incl r2
- bicl3 #0,r2,r0
- bneq noname.469
- incl r9
-noname.469:
- addl2 r2,r7
- bicl2 #0,r7
- cmpl r7,r2
- bgequ noname.470
- incl r9
-noname.470:
-
- movl 8(ap),r0
- movzwl 18(r0),r2
- bicl3 #-65536,12(r0),r3
- movzwl 14(r0),r1
- bicl2 #-65536,r1
- bicl3 #-65536,16(r0),-284(fp)
- bicl3 #-65536,r2,-288(fp)
- mull3 r1,-284(fp),-276(fp)
- mull2 r3,-284(fp)
- mull3 r3,-288(fp),-280(fp)
- mull2 r1,-288(fp)
- addl3 -276(fp),-280(fp),r0
- bicl3 #0,r0,-276(fp)
- cmpl -276(fp),-280(fp)
- bgequ noname.471
- addl2 #65536,-288(fp)
-noname.471:
- movzwl -274(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-288(fp)
- bicl3 #-65536,-276(fp),r0
- ashl #16,r0,-280(fp)
- addl3 -280(fp),-284(fp),r0
- bicl3 #0,r0,-284(fp)
- cmpl -284(fp),-280(fp)
- bgequ noname.472
- incl -288(fp)
-noname.472:
- movl -284(fp),r3
- movl -288(fp),r2
- bbc #31,r2,noname.473
- incl r9
-noname.473:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.474
- incl r2
-noname.474:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r8
- bicl2 #0,r8
- cmpl r8,r3
- bgequ noname.475
- incl r2
- bicl3 #0,r2,r0
- bneq noname.475
- incl r9
-noname.475:
- addl2 r2,r7
- bicl2 #0,r7
- cmpl r7,r2
- bgequ noname.476
- incl r9
-noname.476:
-
- movl 4(ap),r0
- movl r8,28(r0)
-
- clrl r8
-
- movl 8(ap),r3
- movl 16(r3),r4
- bicl3 #-65536,r4,r5
- extzv #16,#16,r4,r0
- bicl3 #-65536,r0,r4
- mull3 r5,r4,-292(fp)
- mull2 r5,r5
- mull2 r4,r4
- bicl3 #32767,-292(fp),r0
- extzv #15,#17,r0,r0
- addl2 r0,r4
- bicl3 #-65536,-292(fp),r0
- ashl #17,r0,-292(fp)
- addl2 -292(fp),r5
- bicl2 #0,r5
- cmpl r5,-292(fp)
- bgequ noname.477
- incl r4
-noname.477:
- movl r5,r1
- movl r4,r2
- addl2 r1,r7
- bicl2 #0,r7
- cmpl r7,r1
- bgequ noname.478
- incl r2
-noname.478:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.479
- incl r8
-noname.479:
-
- bicl3 #-65536,20(r3),r4
- movzwl 22(r3),r1
- bicl2 #-65536,r1
- bicl3 #-65536,12(r3),r2
- movzwl 14(r3),r0
- bicl2 #-65536,r0
- movl r4,r6
- movl r1,r5
- mull3 r0,r6,-296(fp)
- mull2 r2,r6
- mull3 r2,r5,-300(fp)
- mull2 r0,r5
- addl3 -296(fp),-300(fp),r0
- bicl3 #0,r0,-296(fp)
- cmpl -296(fp),-300(fp)
- bgequ noname.480
- addl2 #65536,r5
-noname.480:
- movzwl -294(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r5
- bicl3 #-65536,-296(fp),r0
- ashl #16,r0,-300(fp)
- addl2 -300(fp),r6
- bicl2 #0,r6
- cmpl r6,-300(fp)
- bgequ noname.481
- incl r5
-noname.481:
- movl r6,r3
- movl r5,r2
- bbc #31,r2,noname.482
- incl r8
-noname.482:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.483
- incl r2
-noname.483:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r7
- bicl2 #0,r7
- cmpl r7,r3
- bgequ noname.484
- incl r2
- bicl3 #0,r2,r0
- bneq noname.484
- incl r8
-noname.484:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.485
- incl r8
-noname.485:
-
- movl 8(ap),r0
- bicl3 #-65536,24(r0),r3
- movzwl 26(r0),r1
- bicl2 #-65536,r1
- bicl3 #-65536,8(r0),r2
- movzwl 10(r0),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,-304(fp)
- mull2 r2,r5
- mull3 r2,r4,-308(fp)
- mull2 r0,r4
- addl3 -304(fp),-308(fp),r0
- bicl3 #0,r0,-304(fp)
- cmpl -304(fp),-308(fp)
- bgequ noname.486
- addl2 #65536,r4
-noname.486:
- movzwl -302(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-304(fp),r0
- ashl #16,r0,-308(fp)
- addl2 -308(fp),r5
- bicl2 #0,r5
- cmpl r5,-308(fp)
- bgequ noname.487
- incl r4
-noname.487:
- movl r5,r3
- movl r4,r2
- bbc #31,r2,noname.488
- incl r8
-noname.488:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.489
- incl r2
-noname.489:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r7
- bicl2 #0,r7
- cmpl r7,r3
- bgequ noname.490
- incl r2
- bicl3 #0,r2,r0
- bneq noname.490
- incl r8
-noname.490:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.491
- incl r8
-noname.491:
-
- movl 8(ap),r0
- bicl3 #-65536,28(r0),r3
- movzwl 30(r0),r1
- bicl2 #-65536,r1
- bicl3 #-65536,4(r0),r2
- movzwl 6(r0),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,-312(fp)
- mull2 r2,r5
- mull3 r2,r4,-316(fp)
- mull2 r0,r4
- addl3 -312(fp),-316(fp),r0
- bicl3 #0,r0,-312(fp)
- cmpl -312(fp),-316(fp)
- bgequ noname.492
- addl2 #65536,r4
-noname.492:
- movzwl -310(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-312(fp),r0
- ashl #16,r0,-316(fp)
- addl2 -316(fp),r5
- bicl2 #0,r5
- cmpl r5,-316(fp)
- bgequ noname.493
- incl r4
-noname.493:
- movl r5,r3
- movl r4,r2
- bbc #31,r2,noname.494
- incl r8
-noname.494:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.495
- incl r2
-noname.495:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r7
- bicl2 #0,r7
- cmpl r7,r3
- bgequ noname.496
- incl r2
- bicl3 #0,r2,r0
- bneq noname.496
- incl r8
-noname.496:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.497
- incl r8
-noname.497:
-
- movl 4(ap),r0
- movl r7,32(r0)
-
- clrl r7
-
- movl 8(ap),r0
- bicl3 #-65536,28(r0),r3
- movzwl 30(r0),r2
- bicl3 #-65536,8(r0),r1
- movzwl 10(r0),r0
- bicl2 #-65536,r0
- movl r3,r4
- bicl3 #-65536,r2,-328(fp)
- mull3 r0,r4,-320(fp)
- mull2 r1,r4
- mull3 r1,-328(fp),-324(fp)
- mull2 r0,-328(fp)
- addl3 -320(fp),-324(fp),r0
- bicl3 #0,r0,-320(fp)
- cmpl -320(fp),-324(fp)
- bgequ noname.498
- addl2 #65536,-328(fp)
-noname.498:
- movzwl -318(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-328(fp)
- bicl3 #-65536,-320(fp),r0
- ashl #16,r0,-324(fp)
- addl2 -324(fp),r4
- bicl2 #0,r4
- cmpl r4,-324(fp)
- bgequ noname.499
- incl -328(fp)
-noname.499:
- movl r4,r3
- movl -328(fp),r2
- bbc #31,r2,noname.500
- incl r7
-noname.500:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.501
- incl r2
-noname.501:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r9
- bicl2 #0,r9
- cmpl r9,r3
- bgequ noname.502
- incl r2
- bicl3 #0,r2,r0
- bneq noname.502
- incl r7
-noname.502:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.503
- incl r7
-noname.503:
-
- movl 8(ap),r0
- movzwl 26(r0),r2
- bicl3 #-65536,12(r0),r3
- movzwl 14(r0),r1
- bicl2 #-65536,r1
- bicl3 #-65536,24(r0),-340(fp)
- bicl3 #-65536,r2,-344(fp)
- mull3 r1,-340(fp),-332(fp)
- mull2 r3,-340(fp)
- mull3 r3,-344(fp),-336(fp)
- mull2 r1,-344(fp)
- addl3 -332(fp),-336(fp),r0
- bicl3 #0,r0,-332(fp)
- cmpl -332(fp),-336(fp)
- bgequ noname.504
- addl2 #65536,-344(fp)
-noname.504:
- movzwl -330(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-344(fp)
- bicl3 #-65536,-332(fp),r0
- ashl #16,r0,-336(fp)
- addl3 -336(fp),-340(fp),r0
- bicl3 #0,r0,-340(fp)
- cmpl -340(fp),-336(fp)
- bgequ noname.505
- incl -344(fp)
-noname.505:
- movl -340(fp),r3
- movl -344(fp),r2
- bbc #31,r2,noname.506
- incl r7
-noname.506:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.507
- incl r2
-noname.507:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r9
- bicl2 #0,r9
- cmpl r9,r3
- bgequ noname.508
- incl r2
- bicl3 #0,r2,r0
- bneq noname.508
- incl r7
-noname.508:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.509
- incl r7
-noname.509:
-
- movl 8(ap),r0
- movzwl 22(r0),r2
- bicl3 #-65536,16(r0),r3
- movzwl 18(r0),r1
- bicl2 #-65536,r1
- bicl3 #-65536,20(r0),-356(fp)
- bicl3 #-65536,r2,-360(fp)
- mull3 r1,-356(fp),-348(fp)
- mull2 r3,-356(fp)
- mull3 r3,-360(fp),-352(fp)
- mull2 r1,-360(fp)
- addl3 -348(fp),-352(fp),r0
- bicl3 #0,r0,-348(fp)
- cmpl -348(fp),-352(fp)
- bgequ noname.510
- addl2 #65536,-360(fp)
-noname.510:
- movzwl -346(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-360(fp)
- bicl3 #-65536,-348(fp),r0
- ashl #16,r0,-352(fp)
- addl3 -352(fp),-356(fp),r0
- bicl3 #0,r0,-356(fp)
- cmpl -356(fp),-352(fp)
- bgequ noname.511
- incl -360(fp)
-noname.511:
- movl -356(fp),r3
- movl -360(fp),r2
- bbc #31,r2,noname.512
- incl r7
-noname.512:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.513
- incl r2
-noname.513:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r9
- bicl2 #0,r9
- cmpl r9,r3
- bgequ noname.514
- incl r2
- bicl3 #0,r2,r0
- bneq noname.514
- incl r7
-noname.514:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.515
- incl r7
-noname.515:
-
- movl 4(ap),r0
- movl r9,36(r0)
-
- clrl r9
-
- movl 8(ap),r3
- movl 20(r3),r4
- bicl3 #-65536,r4,-364(fp)
- extzv #16,#16,r4,r0
- bicl3 #-65536,r0,r4
- movl -364(fp),r0
- mull3 r0,r4,-368(fp)
- mull3 r0,r0,-364(fp)
- mull2 r4,r4
- bicl3 #32767,-368(fp),r0
- extzv #15,#17,r0,r0
- addl2 r0,r4
- bicl3 #-65536,-368(fp),r0
- ashl #17,r0,-368(fp)
- addl3 -364(fp),-368(fp),r0
- bicl3 #0,r0,-364(fp)
- cmpl -364(fp),-368(fp)
- bgequ noname.516
- incl r4
-noname.516:
- movl -364(fp),r1
- movl r4,r2
- addl2 r1,r8
- bicl2 #0,r8
- cmpl r8,r1
- bgequ noname.517
- incl r2
-noname.517:
- addl2 r2,r7
- bicl2 #0,r7
- cmpl r7,r2
- bgequ noname.518
- incl r9
-noname.518:
-
- bicl3 #-65536,24(r3),r4
- movzwl 26(r3),r1
- bicl2 #-65536,r1
- bicl3 #-65536,16(r3),r2
- movzwl 18(r3),r0
- bicl2 #-65536,r0
- movl r4,r6
- movl r1,r5
- mull3 r0,r6,-372(fp)
- mull2 r2,r6
- mull3 r2,r5,-376(fp)
- mull2 r0,r5
- addl3 -372(fp),-376(fp),r0
- bicl3 #0,r0,-372(fp)
- cmpl -372(fp),-376(fp)
- bgequ noname.519
- addl2 #65536,r5
-noname.519:
- movzwl -370(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r5
- bicl3 #-65536,-372(fp),r0
- ashl #16,r0,-376(fp)
- addl2 -376(fp),r6
- bicl2 #0,r6
- cmpl r6,-376(fp)
- bgequ noname.520
- incl r5
-noname.520:
- movl r6,r3
- movl r5,r2
- bbc #31,r2,noname.521
- incl r9
-noname.521:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.522
- incl r2
-noname.522:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r8
- bicl2 #0,r8
- cmpl r8,r3
- bgequ noname.523
- incl r2
- bicl3 #0,r2,r0
- bneq noname.523
- incl r9
-noname.523:
- addl2 r2,r7
- bicl2 #0,r7
- cmpl r7,r2
- bgequ noname.524
- incl r9
-noname.524:
-
- movl 8(ap),r0
- bicl3 #-65536,28(r0),r3
- movzwl 30(r0),r1
- bicl2 #-65536,r1
- bicl3 #-65536,12(r0),r2
- movzwl 14(r0),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,-380(fp)
- mull2 r2,r5
- mull3 r2,r4,-384(fp)
- mull2 r0,r4
- addl3 -380(fp),-384(fp),r0
- bicl3 #0,r0,-380(fp)
- cmpl -380(fp),-384(fp)
- bgequ noname.525
- addl2 #65536,r4
-noname.525:
- movzwl -378(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-380(fp),r0
- ashl #16,r0,-384(fp)
- addl2 -384(fp),r5
- bicl2 #0,r5
- cmpl r5,-384(fp)
- bgequ noname.526
- incl r4
-noname.526:
- movl r5,r3
- movl r4,r2
- bbc #31,r2,noname.527
- incl r9
-noname.527:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.528
- incl r2
-noname.528:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r8
- bicl2 #0,r8
- cmpl r8,r3
- bgequ noname.529
- incl r2
- bicl3 #0,r2,r0
- bneq noname.529
- incl r9
-noname.529:
- addl2 r2,r7
- bicl2 #0,r7
- cmpl r7,r2
- bgequ noname.530
- incl r9
-noname.530:
- movl 4(ap),r0
- movl r8,40(r0)
-
- clrl r8
-
- movl 8(ap),r0
- bicl3 #-65536,28(r0),r3
- movzwl 30(r0),r1
- bicl2 #-65536,r1
- bicl3 #-65536,16(r0),r2
- movzwl 18(r0),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,-388(fp)
- mull2 r2,r5
- mull3 r2,r4,-392(fp)
- mull2 r0,r4
- addl3 -388(fp),-392(fp),r0
- bicl3 #0,r0,-388(fp)
- cmpl -388(fp),-392(fp)
- bgequ noname.531
- addl2 #65536,r4
-noname.531:
- movzwl -386(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-388(fp),r0
- ashl #16,r0,-392(fp)
- addl2 -392(fp),r5
- bicl2 #0,r5
- cmpl r5,-392(fp)
- bgequ noname.532
- incl r4
-noname.532:
- movl r5,r3
- movl r4,r2
- bbc #31,r2,noname.533
- incl r8
-noname.533:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.534
- incl r2
-noname.534:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r7
- bicl2 #0,r7
- cmpl r7,r3
- bgequ noname.535
- incl r2
- bicl3 #0,r2,r0
- bneq noname.535
- incl r8
-noname.535:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.536
- incl r8
-noname.536:
-
- movl 8(ap),r0
- bicl3 #-65536,24(r0),r3
- movzwl 26(r0),r1
- bicl2 #-65536,r1
- bicl3 #-65536,20(r0),r2
- movzwl 22(r0),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,-396(fp)
- mull2 r2,r5
- mull3 r2,r4,-400(fp)
- mull2 r0,r4
- addl3 -396(fp),-400(fp),r0
- bicl3 #0,r0,-396(fp)
- cmpl -396(fp),-400(fp)
- bgequ noname.537
- addl2 #65536,r4
-noname.537:
- movzwl -394(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-396(fp),r0
- ashl #16,r0,-400(fp)
- addl2 -400(fp),r5
- bicl2 #0,r5
- cmpl r5,-400(fp)
- bgequ noname.538
- incl r4
-noname.538:
- movl r5,r3
- movl r4,r2
- bbc #31,r2,noname.539
- incl r8
-noname.539:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.540
- incl r2
-noname.540:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r7
- bicl2 #0,r7
- cmpl r7,r3
- bgequ noname.541
- incl r2
- bicl3 #0,r2,r0
- bneq noname.541
- incl r8
-noname.541:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.542
- incl r8
-noname.542:
-
- movl 4(ap),r0
- movl r7,44(r0)
-
- clrl r7
-
- movl 8(ap),r3
- movl 24(r3),r4
- bicl3 #-65536,r4,r5
- extzv #16,#16,r4,r0
- bicl3 #-65536,r0,r4
- mull3 r5,r4,-404(fp)
- mull2 r5,r5
- mull2 r4,r4
- bicl3 #32767,-404(fp),r0
- extzv #15,#17,r0,r0
- addl2 r0,r4
- bicl3 #-65536,-404(fp),r0
- ashl #17,r0,-404(fp)
- addl2 -404(fp),r5
- bicl2 #0,r5
- cmpl r5,-404(fp)
- bgequ noname.543
- incl r4
-noname.543:
- movl r5,r1
- movl r4,r2
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.544
- incl r2
-noname.544:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.545
- incl r7
-noname.545:
-
- movzwl 30(r3),r2
- bicl3 #-65536,20(r3),r1
- movzwl 22(r3),r0
- bicl2 #-65536,r0
- bicl3 #-65536,28(r3),-416(fp)
- bicl3 #-65536,r2,-420(fp)
- mull3 r0,-416(fp),-408(fp)
- mull2 r1,-416(fp)
- mull3 r1,-420(fp),-412(fp)
- mull2 r0,-420(fp)
- addl3 -408(fp),-412(fp),r0
- bicl3 #0,r0,-408(fp)
- cmpl -408(fp),-412(fp)
- bgequ noname.546
- addl2 #65536,-420(fp)
-noname.546:
- movzwl -406(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-420(fp)
- bicl3 #-65536,-408(fp),r0
- ashl #16,r0,-412(fp)
- addl3 -412(fp),-416(fp),r0
- bicl3 #0,r0,-416(fp)
- cmpl -416(fp),-412(fp)
- bgequ noname.547
- incl -420(fp)
-noname.547:
- movl -416(fp),r3
- movl -420(fp),r2
- bbc #31,r2,noname.548
- incl r7
-noname.548:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.549
- incl r2
-noname.549:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r9
- bicl2 #0,r9
- cmpl r9,r3
- bgequ noname.550
- incl r2
- bicl3 #0,r2,r0
- bneq noname.550
- incl r7
-noname.550:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.551
- incl r7
-noname.551:
-
- movl 4(ap),r0
- movl r9,48(r0)
-
- clrl r9
-
- movl 8(ap),r0
- movzwl 30(r0),r2
- bicl3 #-65536,24(r0),r3
- movzwl 26(r0),r1
- bicl2 #-65536,r1
- bicl3 #-65536,28(r0),-432(fp)
- bicl3 #-65536,r2,-436(fp)
- mull3 r1,-432(fp),-424(fp)
- mull2 r3,-432(fp)
- mull3 r3,-436(fp),-428(fp)
- mull2 r1,-436(fp)
- addl3 -424(fp),-428(fp),r0
- bicl3 #0,r0,-424(fp)
- cmpl -424(fp),-428(fp)
- bgequ noname.552
- addl2 #65536,-436(fp)
-noname.552:
- movzwl -422(fp),r0
- bicl2 #-65536,r0
- addl2 r0,-436(fp)
- bicl3 #-65536,-424(fp),r0
- ashl #16,r0,-428(fp)
- addl3 -428(fp),-432(fp),r0
- bicl3 #0,r0,-432(fp)
- cmpl -432(fp),-428(fp)
- bgequ noname.553
- incl -436(fp)
-noname.553:
- movl -432(fp),r3
- movl -436(fp),r2
- bbc #31,r2,noname.554
- incl r9
-noname.554:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.555
- incl r2
-noname.555:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r8
- bicl2 #0,r8
- cmpl r8,r3
- bgequ noname.556
- incl r2
- bicl3 #0,r2,r0
- bneq noname.556
- incl r9
-noname.556:
- addl2 r2,r7
- bicl2 #0,r7
- cmpl r7,r2
- bgequ noname.557
- incl r9
-noname.557:
-
- movl 4(ap),r4
- movl r8,52(r4)
-
- clrl r8
-
- movl 8(ap),r0
- movl 28(r0),r3
- bicl3 #-65536,r3,-440(fp)
- extzv #16,#16,r3,r0
- bicl3 #-65536,r0,r3
- movl -440(fp),r0
- mull3 r0,r3,-444(fp)
- mull3 r0,r0,-440(fp)
- mull2 r3,r3
- bicl3 #32767,-444(fp),r0
- extzv #15,#17,r0,r0
- addl2 r0,r3
- bicl3 #-65536,-444(fp),r0
- ashl #17,r0,-444(fp)
- addl3 -440(fp),-444(fp),r0
- bicl3 #0,r0,-440(fp)
- cmpl -440(fp),-444(fp)
- bgequ noname.558
- incl r3
-noname.558:
- movl -440(fp),r1
- movl r3,r2
- addl2 r1,r7
- bicl2 #0,r7
- cmpl r7,r1
- bgequ noname.559
- incl r2
-noname.559:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.560
- incl r8
-noname.560:
-
- movl r7,56(r4)
-
- movl r9,60(r4)
-
- ret
-
-
-
-;r=4 ;(AP)
-;a=8 ;(AP)
-;b=12 ;(AP)
-;n=16 ;(AP) n by value (input)
-
- .psect code,nowrt
-
-.entry BN_SQR_COMBA4,^m<r2,r3,r4,r5,r6,r7,r8,r9,r10>
- subl2 #44,sp
-
- clrq r8
-
- clrl r10
-
- movl 8(ap),r5
- movl (r5),r3
- bicl3 #-65536,r3,r4
- extzv #16,#16,r3,r0
- bicl3 #-65536,r0,r3
- mull3 r4,r3,-4(fp)
- mull2 r4,r4
- mull2 r3,r3
- bicl3 #32767,-4(fp),r0
- extzv #15,#17,r0,r0
- addl2 r0,r3
- bicl3 #-65536,-4(fp),r0
- ashl #17,r0,-4(fp)
- addl2 -4(fp),r4
- bicl2 #0,r4
- cmpl r4,-4(fp)
- bgequ noname.563
- incl r3
-noname.563:
- movl r4,r1
- movl r3,r2
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.564
- incl r2
-noname.564:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.565
- incl r10
-noname.565:
-
- movl r9,@4(ap)
-
- clrl r9
-
- bicl3 #-65536,4(r5),r3
- movzwl 6(r5),r1
- bicl2 #-65536,r1
- bicl3 #-65536,(r5),r2
- movzwl 2(r5),r0
- bicl2 #-65536,r0
- movl r3,r6
- movl r1,r4
- mull3 r0,r6,-8(fp)
- mull2 r2,r6
- mull2 r4,r2
- mull2 r0,r4
- addl3 -8(fp),r2,r0
- bicl3 #0,r0,-8(fp)
- cmpl -8(fp),r2
- bgequ noname.566
- addl2 #65536,r4
-noname.566:
- movzwl -6(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-8(fp),r0
- ashl #16,r0,r1
- addl2 r1,r6
- bicl2 #0,r6
- cmpl r6,r1
- bgequ noname.567
- incl r4
-noname.567:
- movl r6,r3
- movl r4,r2
- bbc #31,r2,noname.568
- incl r9
-noname.568:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.569
- incl r2
-noname.569:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r8
- bicl2 #0,r8
- cmpl r8,r3
- bgequ noname.570
- incl r2
- bicl3 #0,r2,r0
- bneq noname.570
- incl r9
-noname.570:
- addl2 r2,r10
- bicl2 #0,r10
- cmpl r10,r2
- bgequ noname.571
- incl r9
-noname.571:
-
- movl 4(ap),r0
- movl r8,4(r0)
-
- clrl r8
-
- movl 8(ap),r4
- movl 4(r4),r3
- bicl3 #-65536,r3,r5
- extzv #16,#16,r3,r0
- bicl3 #-65536,r0,r3
- mull3 r5,r3,r1
- mull2 r5,r5
- mull2 r3,r3
- bicl3 #32767,r1,r0
- extzv #15,#17,r0,r0
- addl2 r0,r3
- bicl2 #-65536,r1
- ashl #17,r1,r1
- addl2 r1,r5
- bicl2 #0,r5
- cmpl r5,r1
- bgequ noname.572
- incl r3
-noname.572:
- movl r5,r1
- movl r3,r2
- addl2 r1,r10
- bicl2 #0,r10
- cmpl r10,r1
- bgequ noname.573
- incl r2
-noname.573:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.574
- incl r8
-noname.574:
-
- bicl3 #-65536,8(r4),r3
- movzwl 10(r4),r1
- bicl2 #-65536,r1
- bicl3 #-65536,(r4),r2
- movzwl 2(r4),r0
- bicl2 #-65536,r0
- movl r3,r6
- movl r1,r5
- mull3 r0,r6,r7
- mull2 r2,r6
- mull2 r5,r2
- mull2 r0,r5
- addl2 r2,r7
- bicl2 #0,r7
- cmpl r7,r2
- bgequ noname.575
- addl2 #65536,r5
-noname.575:
- extzv #16,#16,r7,r0
- bicl2 #-65536,r0
- addl2 r0,r5
- bicl3 #-65536,r7,r0
- ashl #16,r0,r1
- addl2 r1,r6
- bicl2 #0,r6
- cmpl r6,r1
- bgequ noname.576
- incl r5
-noname.576:
- movl r6,r3
- movl r5,r2
- bbc #31,r2,noname.577
- incl r8
-noname.577:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.578
- incl r2
-noname.578:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r10
- bicl2 #0,r10
- cmpl r10,r3
- bgequ noname.579
- incl r2
- bicl3 #0,r2,r0
- bneq noname.579
- incl r8
-noname.579:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.580
- incl r8
-noname.580:
-
- movl 4(ap),r0
- movl r10,8(r0)
-
- clrl r10
-
- movl 8(ap),r0
- bicl3 #-65536,12(r0),r3
- movzwl 14(r0),r1
- bicl2 #-65536,r1
- bicl3 #-65536,(r0),r2
- movzwl 2(r0),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,r6
- mull2 r2,r5
- mull3 r2,r4,-12(fp)
- mull2 r0,r4
- addl2 -12(fp),r6
- bicl2 #0,r6
- cmpl r6,-12(fp)
- bgequ noname.581
- addl2 #65536,r4
-noname.581:
- extzv #16,#16,r6,r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,r6,r0
- ashl #16,r0,-12(fp)
- addl2 -12(fp),r5
- bicl2 #0,r5
- cmpl r5,-12(fp)
- bgequ noname.582
- incl r4
-noname.582:
- movl r5,r3
- movl r4,r2
- bbc #31,r2,noname.583
- incl r10
-noname.583:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.584
- incl r2
-noname.584:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r9
- bicl2 #0,r9
- cmpl r9,r3
- bgequ noname.585
- incl r2
- bicl3 #0,r2,r0
- bneq noname.585
- incl r10
-noname.585:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.586
- incl r10
-noname.586:
-
- movl 8(ap),r0
- bicl3 #-65536,8(r0),r3
- movzwl 10(r0),r1
- bicl2 #-65536,r1
- bicl3 #-65536,4(r0),r2
- movzwl 6(r0),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,-16(fp)
- mull2 r2,r5
- mull3 r2,r4,-20(fp)
- mull2 r0,r4
- addl3 -16(fp),-20(fp),r0
- bicl3 #0,r0,-16(fp)
- cmpl -16(fp),-20(fp)
- bgequ noname.587
- addl2 #65536,r4
-noname.587:
- movzwl -14(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-16(fp),r0
- ashl #16,r0,-20(fp)
- addl2 -20(fp),r5
- bicl2 #0,r5
- cmpl r5,-20(fp)
- bgequ noname.588
- incl r4
-noname.588:
- movl r5,r3
- movl r4,r2
- bbc #31,r2,noname.589
- incl r10
-noname.589:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.590
- incl r2
-noname.590:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r9
- bicl2 #0,r9
- cmpl r9,r3
- bgequ noname.591
- incl r2
- bicl3 #0,r2,r0
- bneq noname.591
- incl r10
-noname.591:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.592
- incl r10
-noname.592:
- movl 4(ap),r0
- movl r9,12(r0)
-
- clrl r9
-
- movl 8(ap),r3
- movl 8(r3),r4
- bicl3 #-65536,r4,r5
- extzv #16,#16,r4,r0
- bicl3 #-65536,r0,r4
- mull3 r5,r4,-24(fp)
- mull2 r5,r5
- mull2 r4,r4
- bicl3 #32767,-24(fp),r0
- extzv #15,#17,r0,r0
- addl2 r0,r4
- bicl3 #-65536,-24(fp),r0
- ashl #17,r0,-24(fp)
- addl2 -24(fp),r5
- bicl2 #0,r5
- cmpl r5,-24(fp)
- bgequ noname.593
- incl r4
-noname.593:
- movl r5,r1
- movl r4,r2
- addl2 r1,r8
- bicl2 #0,r8
- cmpl r8,r1
- bgequ noname.594
- incl r2
-noname.594:
- addl2 r2,r10
- bicl2 #0,r10
- cmpl r10,r2
- bgequ noname.595
- incl r9
-noname.595:
-
- bicl3 #-65536,12(r3),r4
- movzwl 14(r3),r1
- bicl2 #-65536,r1
- bicl3 #-65536,4(r3),r2
- movzwl 6(r3),r0
- bicl2 #-65536,r0
- movl r4,r6
- movl r1,r5
- mull3 r0,r6,-28(fp)
- mull2 r2,r6
- mull3 r2,r5,-32(fp)
- mull2 r0,r5
- addl3 -28(fp),-32(fp),r0
- bicl3 #0,r0,-28(fp)
- cmpl -28(fp),-32(fp)
- bgequ noname.596
- addl2 #65536,r5
-noname.596:
- movzwl -26(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r5
- bicl3 #-65536,-28(fp),r0
- ashl #16,r0,-32(fp)
- addl2 -32(fp),r6
- bicl2 #0,r6
- cmpl r6,-32(fp)
- bgequ noname.597
- incl r5
-noname.597:
- movl r6,r3
- movl r5,r2
- bbc #31,r2,noname.598
- incl r9
-noname.598:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.599
- incl r2
-noname.599:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r8
- bicl2 #0,r8
- cmpl r8,r3
- bgequ noname.600
- incl r2
- bicl3 #0,r2,r0
- bneq noname.600
- incl r9
-noname.600:
- addl2 r2,r10
- bicl2 #0,r10
- cmpl r10,r2
- bgequ noname.601
- incl r9
-noname.601:
-
- movl 4(ap),r0
- movl r8,16(r0)
-
- clrl r8
-
- movl 8(ap),r0
- bicl3 #-65536,12(r0),r3
- movzwl 14(r0),r1
- bicl2 #-65536,r1
- bicl3 #-65536,8(r0),r2
- movzwl 10(r0),r0
- bicl2 #-65536,r0
- movl r3,r5
- movl r1,r4
- mull3 r0,r5,-36(fp)
- mull2 r2,r5
- mull3 r2,r4,-40(fp)
- mull2 r0,r4
- addl3 -36(fp),-40(fp),r0
- bicl3 #0,r0,-36(fp)
- cmpl -36(fp),-40(fp)
- bgequ noname.602
- addl2 #65536,r4
-noname.602:
- movzwl -34(fp),r0
- bicl2 #-65536,r0
- addl2 r0,r4
- bicl3 #-65536,-36(fp),r0
- ashl #16,r0,-40(fp)
- addl2 -40(fp),r5
- bicl2 #0,r5
- cmpl r5,-40(fp)
- bgequ noname.603
- incl r4
-noname.603:
- movl r5,r3
- movl r4,r2
- bbc #31,r2,noname.604
- incl r8
-noname.604:
- addl2 r2,r2
- bicl2 #0,r2
- bbc #31,r3,noname.605
- incl r2
-noname.605:
- addl2 r3,r3
- bicl2 #0,r3
- addl2 r3,r10
- bicl2 #0,r10
- cmpl r10,r3
- bgequ noname.606
- incl r2
- bicl3 #0,r2,r0
- bneq noname.606
- incl r8
-noname.606:
- addl2 r2,r9
- bicl2 #0,r9
- cmpl r9,r2
- bgequ noname.607
- incl r8
-noname.607:
-
- movl 4(ap),r4
- movl r10,20(r4)
-
- clrl r10
-
- movl 8(ap),r0
- movl 12(r0),r3
- bicl3 #-65536,r3,r5
- extzv #16,#16,r3,r0
- bicl3 #-65536,r0,r3
- mull3 r5,r3,-44(fp)
- mull2 r5,r5
- mull2 r3,r3
- bicl3 #32767,-44(fp),r0
- extzv #15,#17,r0,r0
- addl2 r0,r3
- bicl3 #-65536,-44(fp),r0
- ashl #17,r0,-44(fp)
- addl2 -44(fp),r5
- bicl2 #0,r5
- cmpl r5,-44(fp)
- bgequ noname.608
- incl r3
-noname.608:
- movl r5,r1
- movl r3,r2
- addl2 r1,r9
- bicl2 #0,r9
- cmpl r9,r1
- bgequ noname.609
- incl r2
-noname.609:
- addl2 r2,r8
- bicl2 #0,r8
- cmpl r8,r2
- bgequ noname.610
- incl r10
-noname.610:
-
- movl r9,24(r4)
-
- movl r8,28(r4)
-
- ret
-
-; For now, the code below doesn't work, so I end this prematurely.
-.end
diff --git a/deps/openssl/openssl/crypto/bn/asm/x86-gf2m.pl b/deps/openssl/openssl/crypto/bn/asm/x86-gf2m.pl
index b579530272..f464368733 100644
--- a/deps/openssl/openssl/crypto/bn/asm/x86-gf2m.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/x86-gf2m.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -36,6 +43,9 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
push(@INC,"${dir}","${dir}../../perlasm");
require "x86asm.pl";
+$output = pop;
+open STDOUT,">$output";
+
&asm_init($ARGV[0],$0,$x86only = $ARGV[$#ARGV] eq "386");
$sse2=0;
@@ -311,3 +321,5 @@ if ($sse2) {
&asciz ("GF(2^m) Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>");
&asm_finish();
+
+close STDOUT;
diff --git a/deps/openssl/openssl/crypto/bn/asm/x86-mont.pl b/deps/openssl/openssl/crypto/bn/asm/x86-mont.pl
index 1c4003efc2..a8b402d59b 100755
--- a/deps/openssl/openssl/crypto/bn/asm/x86-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/x86-mont.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
@@ -30,6 +37,9 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
push(@INC,"${dir}","${dir}../../perlasm");
require "x86asm.pl";
+$output = pop;
+open STDOUT,">$output";
+
&asm_init($ARGV[0],$0);
$sse2=0;
@@ -84,7 +94,9 @@ $frame=32; # size of above frame rounded up to 16n
&and ("ebp",-64); # align to cache line
- # Some OSes, *cough*-dows, insist on stack being "wired" to
+ # An OS-agnostic version of __chkstk.
+ #
+ # Some OSes (Windows) insist on stack being "wired" to
# physical memory in strictly sequential manner, i.e. if stack
# allocation spans two pages, then reference to farmost one can
# be punishable by SEGV. But page walking can do good even on
@@ -289,7 +301,7 @@ if (0) {
&xor ("eax","eax"); # signal "not fast enough [yet]"
&jmp (&label("just_leave"));
# While the below code provides competitive performance for
- # all key lengthes on modern Intel cores, it's still more
+ # all key lengths on modern Intel cores, it's still more
# than 10% slower for 4096-bit key elsewhere:-( "Competitive"
# means compared to the original integer-only assembler.
# 512-bit RSA sign is better by ~40%, but that's about all
@@ -613,3 +625,5 @@ $sbit=$num;
&asciz("Montgomery Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>");
&asm_finish();
+
+close STDOUT;
diff --git a/deps/openssl/openssl/crypto/bn/asm/x86.pl b/deps/openssl/openssl/crypto/bn/asm/x86.pl
deleted file mode 100644
index 1bc4f1bb27..0000000000
--- a/deps/openssl/openssl/crypto/bn/asm/x86.pl
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/usr/local/bin/perl
-
-push(@INC,"perlasm","../../perlasm");
-require "x86asm.pl";
-
-require("x86/mul_add.pl");
-require("x86/mul.pl");
-require("x86/sqr.pl");
-require("x86/div.pl");
-require("x86/add.pl");
-require("x86/sub.pl");
-require("x86/comba.pl");
-
-&asm_init($ARGV[0],$0);
-
-&bn_mul_add_words("bn_mul_add_words");
-&bn_mul_words("bn_mul_words");
-&bn_sqr_words("bn_sqr_words");
-&bn_div_words("bn_div_words");
-&bn_add_words("bn_add_words");
-&bn_sub_words("bn_sub_words");
-&bn_mul_comba("bn_mul_comba8",8);
-&bn_mul_comba("bn_mul_comba4",4);
-&bn_sqr_comba("bn_sqr_comba8",8);
-&bn_sqr_comba("bn_sqr_comba4",4);
-
-&asm_finish();
-
diff --git a/deps/openssl/openssl/crypto/bn/asm/x86/add.pl b/deps/openssl/openssl/crypto/bn/asm/x86/add.pl
deleted file mode 100644
index 0b5cf583e3..0000000000
--- a/deps/openssl/openssl/crypto/bn/asm/x86/add.pl
+++ /dev/null
@@ -1,76 +0,0 @@
-#!/usr/local/bin/perl
-# x86 assember
-
-sub bn_add_words
- {
- local($name)=@_;
-
- &function_begin($name,"");
-
- &comment("");
- $a="esi";
- $b="edi";
- $c="eax";
- $r="ebx";
- $tmp1="ecx";
- $tmp2="edx";
- $num="ebp";
-
- &mov($r,&wparam(0)); # get r
- &mov($a,&wparam(1)); # get a
- &mov($b,&wparam(2)); # get b
- &mov($num,&wparam(3)); # get num
- &xor($c,$c); # clear carry
- &and($num,0xfffffff8); # num / 8
-
- &jz(&label("aw_finish"));
-
- &set_label("aw_loop",0);
- for ($i=0; $i<8; $i++)
- {
- &comment("Round $i");
-
- &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
- &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
- &add($tmp1,$c);
- &mov($c,0);
- &adc($c,$c);
- &add($tmp1,$tmp2);
- &adc($c,0);
- &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
- }
-
- &comment("");
- &add($a,32);
- &add($b,32);
- &add($r,32);
- &sub($num,8);
- &jnz(&label("aw_loop"));
-
- &set_label("aw_finish",0);
- &mov($num,&wparam(3)); # get num
- &and($num,7);
- &jz(&label("aw_end"));
-
- for ($i=0; $i<7; $i++)
- {
- &comment("Tail Round $i");
- &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
- &mov($tmp2,&DWP($i*4,$b,"",0));# *b
- &add($tmp1,$c);
- &mov($c,0);
- &adc($c,$c);
- &add($tmp1,$tmp2);
- &adc($c,0);
- &dec($num) if ($i != 6);
- &mov(&DWP($i*4,$r,"",0),$tmp1); # *a
- &jz(&label("aw_end")) if ($i != 6);
- }
- &set_label("aw_end",0);
-
-# &mov("eax",$c); # $c is "eax"
-
- &function_end($name);
- }
-
-1;
diff --git a/deps/openssl/openssl/crypto/bn/asm/x86/comba.pl b/deps/openssl/openssl/crypto/bn/asm/x86/comba.pl
deleted file mode 100644
index 2291253629..0000000000
--- a/deps/openssl/openssl/crypto/bn/asm/x86/comba.pl
+++ /dev/null
@@ -1,277 +0,0 @@
-#!/usr/local/bin/perl
-# x86 assember
-
-sub mul_add_c
- {
- local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
-
- # pos == -1 if eax and edx are pre-loaded, 0 to load from next
- # words, and 1 if load return value
-
- &comment("mul a[$ai]*b[$bi]");
-
- # "eax" and "edx" will always be pre-loaded.
- # &mov("eax",&DWP($ai*4,$a,"",0)) ;
- # &mov("edx",&DWP($bi*4,$b,"",0));
-
- &mul("edx");
- &add($c0,"eax");
- &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a
- &mov("eax",&wparam(0)) if $pos > 0; # load r[]
- ###
- &adc($c1,"edx");
- &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b
- &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b
- ###
- &adc($c2,0);
- # is pos > 1, it means it is the last loop
- &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[];
- &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a
- }
-
-sub sqr_add_c
- {
- local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
-
- # pos == -1 if eax and edx are pre-loaded, 0 to load from next
- # words, and 1 if load return value
-
- &comment("sqr a[$ai]*a[$bi]");
-
- # "eax" and "edx" will always be pre-loaded.
- # &mov("eax",&DWP($ai*4,$a,"",0)) ;
- # &mov("edx",&DWP($bi*4,$b,"",0));
-
- if ($ai == $bi)
- { &mul("eax");}
- else
- { &mul("edx");}
- &add($c0,"eax");
- &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
- ###
- &adc($c1,"edx");
- &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb);
- ###
- &adc($c2,0);
- # is pos > 1, it means it is the last loop
- &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
- &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
- }
-
-sub sqr_add_c2
- {
- local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
-
- # pos == -1 if eax and edx are pre-loaded, 0 to load from next
- # words, and 1 if load return value
-
- &comment("sqr a[$ai]*a[$bi]");
-
- # "eax" and "edx" will always be pre-loaded.
- # &mov("eax",&DWP($ai*4,$a,"",0)) ;
- # &mov("edx",&DWP($bi*4,$a,"",0));
-
- if ($ai == $bi)
- { &mul("eax");}
- else
- { &mul("edx");}
- &add("eax","eax");
- ###
- &adc("edx","edx");
- ###
- &adc($c2,0);
- &add($c0,"eax");
- &adc($c1,"edx");
- &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
- &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
- &adc($c2,0);
- &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
- &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb);
- ###
- }
-
-sub bn_mul_comba
- {
- local($name,$num)=@_;
- local($a,$b,$c0,$c1,$c2);
- local($i,$as,$ae,$bs,$be,$ai,$bi);
- local($tot,$end);
-
- &function_begin_B($name,"");
-
- $c0="ebx";
- $c1="ecx";
- $c2="ebp";
- $a="esi";
- $b="edi";
-
- $as=0;
- $ae=0;
- $bs=0;
- $be=0;
- $tot=$num+$num-1;
-
- &push("esi");
- &mov($a,&wparam(1));
- &push("edi");
- &mov($b,&wparam(2));
- &push("ebp");
- &push("ebx");
-
- &xor($c0,$c0);
- &mov("eax",&DWP(0,$a,"",0)); # load the first word
- &xor($c1,$c1);
- &mov("edx",&DWP(0,$b,"",0)); # load the first second
-
- for ($i=0; $i<$tot; $i++)
- {
- $ai=$as;
- $bi=$bs;
- $end=$be+1;
-
- &comment("################## Calculate word $i");
-
- for ($j=$bs; $j<$end; $j++)
- {
- &xor($c2,$c2) if ($j == $bs);
- if (($j+1) == $end)
- {
- $v=1;
- $v=2 if (($i+1) == $tot);
- }
- else
- { $v=0; }
- if (($j+1) != $end)
- {
- $na=($ai-1);
- $nb=($bi+1);
- }
- else
- {
- $na=$as+($i < ($num-1));
- $nb=$bs+($i >= ($num-1));
- }
-#printf STDERR "[$ai,$bi] -> [$na,$nb]\n";
- &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb);
- if ($v)
- {
- &comment("saved r[$i]");
- # &mov("eax",&wparam(0));
- # &mov(&DWP($i*4,"eax","",0),$c0);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- }
- $ai--;
- $bi++;
- }
- $as++ if ($i < ($num-1));
- $ae++ if ($i >= ($num-1));
-
- $bs++ if ($i >= ($num-1));
- $be++ if ($i < ($num-1));
- }
- &comment("save r[$i]");
- # &mov("eax",&wparam(0));
- &mov(&DWP($i*4,"eax","",0),$c0);
-
- &pop("ebx");
- &pop("ebp");
- &pop("edi");
- &pop("esi");
- &ret();
- &function_end_B($name);
- }
-
-sub bn_sqr_comba
- {
- local($name,$num)=@_;
- local($r,$a,$c0,$c1,$c2)=@_;
- local($i,$as,$ae,$bs,$be,$ai,$bi);
- local($b,$tot,$end,$half);
-
- &function_begin_B($name,"");
-
- $c0="ebx";
- $c1="ecx";
- $c2="ebp";
- $a="esi";
- $r="edi";
-
- &push("esi");
- &push("edi");
- &push("ebp");
- &push("ebx");
- &mov($r,&wparam(0));
- &mov($a,&wparam(1));
- &xor($c0,$c0);
- &xor($c1,$c1);
- &mov("eax",&DWP(0,$a,"",0)); # load the first word
-
- $as=0;
- $ae=0;
- $bs=0;
- $be=0;
- $tot=$num+$num-1;
-
- for ($i=0; $i<$tot; $i++)
- {
- $ai=$as;
- $bi=$bs;
- $end=$be+1;
-
- &comment("############### Calculate word $i");
- for ($j=$bs; $j<$end; $j++)
- {
- &xor($c2,$c2) if ($j == $bs);
- if (($ai-1) < ($bi+1))
- {
- $v=1;
- $v=2 if ($i+1) == $tot;
- }
- else
- { $v=0; }
- if (!$v)
- {
- $na=$ai-1;
- $nb=$bi+1;
- }
- else
- {
- $na=$as+($i < ($num-1));
- $nb=$bs+($i >= ($num-1));
- }
- if ($ai == $bi)
- {
- &sqr_add_c($r,$a,$ai,$bi,
- $c0,$c1,$c2,$v,$i,$na,$nb);
- }
- else
- {
- &sqr_add_c2($r,$a,$ai,$bi,
- $c0,$c1,$c2,$v,$i,$na,$nb);
- }
- if ($v)
- {
- &comment("saved r[$i]");
- #&mov(&DWP($i*4,$r,"",0),$c0);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- last;
- }
- $ai--;
- $bi++;
- }
- $as++ if ($i < ($num-1));
- $ae++ if ($i >= ($num-1));
-
- $bs++ if ($i >= ($num-1));
- $be++ if ($i < ($num-1));
- }
- &mov(&DWP($i*4,$r,"",0),$c0);
- &pop("ebx");
- &pop("ebp");
- &pop("edi");
- &pop("esi");
- &ret();
- &function_end_B($name);
- }
-
-1;
diff --git a/deps/openssl/openssl/crypto/bn/asm/x86/div.pl b/deps/openssl/openssl/crypto/bn/asm/x86/div.pl
deleted file mode 100644
index 0e90152caa..0000000000
--- a/deps/openssl/openssl/crypto/bn/asm/x86/div.pl
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/usr/local/bin/perl
-# x86 assember
-
-sub bn_div_words
- {
- local($name)=@_;
-
- &function_begin($name,"");
- &mov("edx",&wparam(0)); #
- &mov("eax",&wparam(1)); #
- &mov("ebx",&wparam(2)); #
- &div("ebx");
- &function_end($name);
- }
-1;
diff --git a/deps/openssl/openssl/crypto/bn/asm/x86/f b/deps/openssl/openssl/crypto/bn/asm/x86/f
deleted file mode 100644
index 22e4112224..0000000000
--- a/deps/openssl/openssl/crypto/bn/asm/x86/f
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/usr/local/bin/perl
-# x86 assember
-
diff --git a/deps/openssl/openssl/crypto/bn/asm/x86/mul.pl b/deps/openssl/openssl/crypto/bn/asm/x86/mul.pl
deleted file mode 100644
index 674cb9b055..0000000000
--- a/deps/openssl/openssl/crypto/bn/asm/x86/mul.pl
+++ /dev/null
@@ -1,77 +0,0 @@
-#!/usr/local/bin/perl
-# x86 assember
-
-sub bn_mul_words
- {
- local($name)=@_;
-
- &function_begin($name,"");
-
- &comment("");
- $Low="eax";
- $High="edx";
- $a="ebx";
- $w="ecx";
- $r="edi";
- $c="esi";
- $num="ebp";
-
- &xor($c,$c); # clear carry
- &mov($r,&wparam(0)); #
- &mov($a,&wparam(1)); #
- &mov($num,&wparam(2)); #
- &mov($w,&wparam(3)); #
-
- &and($num,0xfffffff8); # num / 8
- &jz(&label("mw_finish"));
-
- &set_label("mw_loop",0);
- for ($i=0; $i<32; $i+=4)
- {
- &comment("Round $i");
-
- &mov("eax",&DWP($i,$a,"",0)); # *a
- &mul($w); # *a * w
- &add("eax",$c); # L(t)+=c
- # XXX
-
- &adc("edx",0); # H(t)+=carry
- &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
-
- &mov($c,"edx"); # c= H(t);
- }
-
- &comment("");
- &add($a,32);
- &add($r,32);
- &sub($num,8);
- &jz(&label("mw_finish"));
- &jmp(&label("mw_loop"));
-
- &set_label("mw_finish",0);
- &mov($num,&wparam(2)); # get num
- &and($num,7);
- &jnz(&label("mw_finish2"));
- &jmp(&label("mw_end"));
-
- &set_label("mw_finish2",1);
- for ($i=0; $i<7; $i++)
- {
- &comment("Tail Round $i");
- &mov("eax",&DWP($i*4,$a,"",0));# *a
- &mul($w); # *a * w
- &add("eax",$c); # L(t)+=c
- # XXX
- &adc("edx",0); # H(t)+=carry
- &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
- &mov($c,"edx"); # c= H(t);
- &dec($num) if ($i != 7-1);
- &jz(&label("mw_end")) if ($i != 7-1);
- }
- &set_label("mw_end",0);
- &mov("eax",$c);
-
- &function_end($name);
- }
-
-1;
diff --git a/deps/openssl/openssl/crypto/bn/asm/x86/mul_add.pl b/deps/openssl/openssl/crypto/bn/asm/x86/mul_add.pl
deleted file mode 100644
index 61830d3a90..0000000000
--- a/deps/openssl/openssl/crypto/bn/asm/x86/mul_add.pl
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/local/bin/perl
-# x86 assember
-
-sub bn_mul_add_words
- {
- local($name)=@_;
-
- &function_begin($name,"");
-
- &comment("");
- $Low="eax";
- $High="edx";
- $a="ebx";
- $w="ebp";
- $r="edi";
- $c="esi";
-
- &xor($c,$c); # clear carry
- &mov($r,&wparam(0)); #
-
- &mov("ecx",&wparam(2)); #
- &mov($a,&wparam(1)); #
-
- &and("ecx",0xfffffff8); # num / 8
- &mov($w,&wparam(3)); #
-
- &push("ecx"); # Up the stack for a tmp variable
-
- &jz(&label("maw_finish"));
-
- &set_label("maw_loop",0);
-
- &mov(&swtmp(0),"ecx"); #
-
- for ($i=0; $i<32; $i+=4)
- {
- &comment("Round $i");
-
- &mov("eax",&DWP($i,$a,"",0)); # *a
- &mul($w); # *a * w
- &add("eax",$c); # L(t)+= *r
- &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r
- &adc("edx",0); # H(t)+=carry
- &add("eax",$c); # L(t)+=c
- &adc("edx",0); # H(t)+=carry
- &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
- &mov($c,"edx"); # c= H(t);
- }
-
- &comment("");
- &mov("ecx",&swtmp(0)); #
- &add($a,32);
- &add($r,32);
- &sub("ecx",8);
- &jnz(&label("maw_loop"));
-
- &set_label("maw_finish",0);
- &mov("ecx",&wparam(2)); # get num
- &and("ecx",7);
- &jnz(&label("maw_finish2")); # helps branch prediction
- &jmp(&label("maw_end"));
-
- &set_label("maw_finish2",1);
- for ($i=0; $i<7; $i++)
- {
- &comment("Tail Round $i");
- &mov("eax",&DWP($i*4,$a,"",0));# *a
- &mul($w); # *a * w
- &add("eax",$c); # L(t)+=c
- &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r
- &adc("edx",0); # H(t)+=carry
- &add("eax",$c);
- &adc("edx",0); # H(t)+=carry
- &dec("ecx") if ($i != 7-1);
- &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t);
- &mov($c,"edx"); # c= H(t);
- &jz(&label("maw_end")) if ($i != 7-1);
- }
- &set_label("maw_end",0);
- &mov("eax",$c);
-
- &pop("ecx"); # clear variable from
-
- &function_end($name);
- }
-
-1;
diff --git a/deps/openssl/openssl/crypto/bn/asm/x86/sqr.pl b/deps/openssl/openssl/crypto/bn/asm/x86/sqr.pl
deleted file mode 100644
index 1f90993cf6..0000000000
--- a/deps/openssl/openssl/crypto/bn/asm/x86/sqr.pl
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/usr/local/bin/perl
-# x86 assember
-
-sub bn_sqr_words
- {
- local($name)=@_;
-
- &function_begin($name,"");
-
- &comment("");
- $r="esi";
- $a="edi";
- $num="ebx";
-
- &mov($r,&wparam(0)); #
- &mov($a,&wparam(1)); #
- &mov($num,&wparam(2)); #
-
- &and($num,0xfffffff8); # num / 8
- &jz(&label("sw_finish"));
-
- &set_label("sw_loop",0);
- for ($i=0; $i<32; $i+=4)
- {
- &comment("Round $i");
- &mov("eax",&DWP($i,$a,"",0)); # *a
- # XXX
- &mul("eax"); # *a * *a
- &mov(&DWP($i*2,$r,"",0),"eax"); #
- &mov(&DWP($i*2+4,$r,"",0),"edx");#
- }
-
- &comment("");
- &add($a,32);
- &add($r,64);
- &sub($num,8);
- &jnz(&label("sw_loop"));
-
- &set_label("sw_finish",0);
- &mov($num,&wparam(2)); # get num
- &and($num,7);
- &jz(&label("sw_end"));
-
- for ($i=0; $i<7; $i++)
- {
- &comment("Tail Round $i");
- &mov("eax",&DWP($i*4,$a,"",0)); # *a
- # XXX
- &mul("eax"); # *a * *a
- &mov(&DWP($i*8,$r,"",0),"eax"); #
- &dec($num) if ($i != 7-1);
- &mov(&DWP($i*8+4,$r,"",0),"edx");
- &jz(&label("sw_end")) if ($i != 7-1);
- }
- &set_label("sw_end",0);
-
- &function_end($name);
- }
-
-1;
diff --git a/deps/openssl/openssl/crypto/bn/asm/x86/sub.pl b/deps/openssl/openssl/crypto/bn/asm/x86/sub.pl
deleted file mode 100644
index 837b0e1b07..0000000000
--- a/deps/openssl/openssl/crypto/bn/asm/x86/sub.pl
+++ /dev/null
@@ -1,76 +0,0 @@
-#!/usr/local/bin/perl
-# x86 assember
-
-sub bn_sub_words
- {
- local($name)=@_;
-
- &function_begin($name,"");
-
- &comment("");
- $a="esi";
- $b="edi";
- $c="eax";
- $r="ebx";
- $tmp1="ecx";
- $tmp2="edx";
- $num="ebp";
-
- &mov($r,&wparam(0)); # get r
- &mov($a,&wparam(1)); # get a
- &mov($b,&wparam(2)); # get b
- &mov($num,&wparam(3)); # get num
- &xor($c,$c); # clear carry
- &and($num,0xfffffff8); # num / 8
-
- &jz(&label("aw_finish"));
-
- &set_label("aw_loop",0);
- for ($i=0; $i<8; $i++)
- {
- &comment("Round $i");
-
- &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
- &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
- &sub($tmp1,$c);
- &mov($c,0);
- &adc($c,$c);
- &sub($tmp1,$tmp2);
- &adc($c,0);
- &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
- }
-
- &comment("");
- &add($a,32);
- &add($b,32);
- &add($r,32);
- &sub($num,8);
- &jnz(&label("aw_loop"));
-
- &set_label("aw_finish",0);
- &mov($num,&wparam(3)); # get num
- &and($num,7);
- &jz(&label("aw_end"));
-
- for ($i=0; $i<7; $i++)
- {
- &comment("Tail Round $i");
- &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
- &mov($tmp2,&DWP($i*4,$b,"",0));# *b
- &sub($tmp1,$c);
- &mov($c,0);
- &adc($c,$c);
- &sub($tmp1,$tmp2);
- &adc($c,0);
- &dec($num) if ($i != 6);
- &mov(&DWP($i*4,$r,"",0),$tmp1); # *a
- &jz(&label("aw_end")) if ($i != 6);
- }
- &set_label("aw_end",0);
-
-# &mov("eax",$c); # $c is "eax"
-
- &function_end($name);
- }
-
-1;
diff --git a/deps/openssl/openssl/crypto/bn/asm/x86_64-gcc.c b/deps/openssl/openssl/crypto/bn/asm/x86_64-gcc.c
index 1729b479d4..0ff3805a61 100644
--- a/deps/openssl/openssl/crypto/bn/asm/x86_64-gcc.c
+++ b/deps/openssl/openssl/crypto/bn/asm/x86_64-gcc.c
@@ -1,3 +1,12 @@
+/*
+ * Copyright 2002-2016 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
#include "../bn_lcl.h"
#if !(defined(__GNUC__) && __GNUC__>=2)
# include "../bn_asm.c" /* kind of dirty hack for Sun Studio */
@@ -216,9 +225,10 @@ BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
" adcq (%5,%2,8),%0 \n"
" movq %0,(%3,%2,8) \n"
" lea 1(%2),%2 \n"
- " loop 1b \n"
- " sbbq %0,%0 \n":"=&r" (ret), "+c"(n),
- "+r"(i)
+ " dec %1 \n"
+ " jnz 1b \n"
+ " sbbq %0,%0 \n"
+ :"=&r" (ret), "+c"(n), "+r"(i)
:"r"(rp), "r"(ap), "r"(bp)
:"cc", "memory");
@@ -242,9 +252,10 @@ BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
" sbbq (%5,%2,8),%0 \n"
" movq %0,(%3,%2,8) \n"
" lea 1(%2),%2 \n"
- " loop 1b \n"
- " sbbq %0,%0 \n":"=&r" (ret), "+c"(n),
- "+r"(i)
+ " dec %1 \n"
+ " jnz 1b \n"
+ " sbbq %0,%0 \n"
+ :"=&r" (ret), "+c"(n), "+r"(i)
:"r"(rp), "r"(ap), "r"(bp)
:"cc", "memory");
diff --git a/deps/openssl/openssl/crypto/bn/asm/x86_64-gf2m.pl b/deps/openssl/openssl/crypto/bn/asm/x86_64-gf2m.pl
index 42bbec2fb7..d962f62033 100644
--- a/deps/openssl/openssl/crypto/bn/asm/x86_64-gf2m.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/x86_64-gf2m.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -31,7 +38,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
-open OUT,"| \"$^X\" $xlate $flavour $output";
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
*STDOUT=*OUT;
($lo,$hi)=("%rax","%rdx"); $a=$lo;
diff --git a/deps/openssl/openssl/crypto/bn/asm/x86_64-mont.pl b/deps/openssl/openssl/crypto/bn/asm/x86_64-mont.pl
index 80492d8e63..df4cca5bfe 100755
--- a/deps/openssl/openssl/crypto/bn/asm/x86_64-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/x86_64-mont.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -50,7 +57,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
-open OUT,"| \"$^X\" $xlate $flavour $output";
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
*STDOUT=*OUT;
if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
@@ -129,7 +136,9 @@ $code.=<<___;
neg $num # restore $num
and \$-1024,%r10 # minimize TLB usage
- # Some OSes, *cough*-dows, insist on stack being "wired" to
+ # An OS-agnostic version of __chkstk.
+ #
+ # Some OSes (Windows) insist on stack being "wired" to
# physical memory in strictly sequential manner, i.e. if stack
# allocation spans two pages, then reference to farmost one can
# be punishable by SEGV. But page walking can do good even on
diff --git a/deps/openssl/openssl/crypto/bn/asm/x86_64-mont5.pl b/deps/openssl/openssl/crypto/bn/asm/x86_64-mont5.pl
index 42178e455a..5779059ea2 100755
--- a/deps/openssl/openssl/crypto/bn/asm/x86_64-mont5.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/x86_64-mont5.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -35,7 +42,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
-open OUT,"| \"$^X\" $xlate $flavour $output";
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
*STDOUT=*OUT;
if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
@@ -113,7 +120,9 @@ $code.=<<___;
neg $num # restore $num
and \$-1024,%r10 # minimize TLB usage
- # Some OSes, *cough*-dows, insist on stack being "wired" to
+ # An OS-agnostic version of __chkstk.
+ #
+ # Some OSes (Windows) insist on stack being "wired" to
# physical memory in strictly sequential manner, i.e. if stack
# allocation spans two pages, then reference to farmost one can
# be punishable by SEGV. But page walking can do good even on