summaryrefslogtreecommitdiff
path: root/deps/openssl/openssl/crypto/bn
diff options
context:
space:
mode:
Diffstat (limited to 'deps/openssl/openssl/crypto/bn')
-rw-r--r--deps/openssl/openssl/crypto/bn/README.pod8
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/alpha-mont.pl2
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/armv4-gf2m.pl2
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/armv4-mont.pl2
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/bn-586.pl26
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/c64xplus-gf2m.pl2
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/co-586.pl14
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/ia64-mont.pl6
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/ia64.S25
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/mips-mont.pl102
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/mips.pl752
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/pa-risc2.s1624
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/pa-risc2W.s1612
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/parisc-mont.pl16
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/ppc-mont.pl1697
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/ppc.pl275
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/ppc64-mont.pl4
-rwxr-xr-xdeps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl145
-rwxr-xr-xdeps/openssl/openssl/crypto/bn/asm/rsaz-x86_64.pl198
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/s390x-gf2m.pl4
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/s390x-mont.pl2
-rwxr-xr-xdeps/openssl/openssl/crypto/bn/asm/sparct4-mont.pl6
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/sparcv8.S12
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/sparcv8plus.S16
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/sparcv9-mont.pl4
-rwxr-xr-xdeps/openssl/openssl/crypto/bn/asm/sparcv9a-mont.pl4
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/via-mont.pl6
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/vis3-mont.pl2
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/x86-gf2m.pl4
-rwxr-xr-xdeps/openssl/openssl/crypto/bn/asm/x86-mont.pl8
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/x86_64-gcc.c14
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/x86_64-gf2m.pl37
-rwxr-xr-xdeps/openssl/openssl/crypto/bn/asm/x86_64-mont.pl80
-rwxr-xr-xdeps/openssl/openssl/crypto/bn/asm/x86_64-mont5.pl129
-rw-r--r--deps/openssl/openssl/crypto/bn/bn_add.c132
-rw-r--r--deps/openssl/openssl/crypto/bn/bn_asm.c32
-rw-r--r--deps/openssl/openssl/crypto/bn/bn_blind.c11
-rw-r--r--deps/openssl/openssl/crypto/bn/bn_ctx.c16
-rw-r--r--deps/openssl/openssl/crypto/bn/bn_dh.c296
-rw-r--r--deps/openssl/openssl/crypto/bn/bn_div.c28
-rw-r--r--deps/openssl/openssl/crypto/bn/bn_err.c167
-rw-r--r--deps/openssl/openssl/crypto/bn/bn_exp.c186
-rw-r--r--deps/openssl/openssl/crypto/bn/bn_exp2.c8
-rw-r--r--deps/openssl/openssl/crypto/bn/bn_gcd.c12
-rw-r--r--deps/openssl/openssl/crypto/bn/bn_gf2m.c160
-rw-r--r--deps/openssl/openssl/crypto/bn/bn_intern.c15
-rw-r--r--deps/openssl/openssl/crypto/bn/bn_lcl.h117
-rw-r--r--deps/openssl/openssl/crypto/bn/bn_lib.c195
-rw-r--r--deps/openssl/openssl/crypto/bn/bn_mod.c2
-rw-r--r--deps/openssl/openssl/crypto/bn/bn_mont.c44
-rw-r--r--deps/openssl/openssl/crypto/bn/bn_mul.c341
-rw-r--r--deps/openssl/openssl/crypto/bn/bn_nist.c4
-rw-r--r--deps/openssl/openssl/crypto/bn/bn_prime.c173
-rw-r--r--deps/openssl/openssl/crypto/bn/bn_prime.h515
-rw-r--r--deps/openssl/openssl/crypto/bn/bn_prime.pl12
-rw-r--r--deps/openssl/openssl/crypto/bn/bn_print.c106
-rw-r--r--deps/openssl/openssl/crypto/bn/bn_rand.c62
-rw-r--r--deps/openssl/openssl/crypto/bn/bn_recp.c37
-rw-r--r--deps/openssl/openssl/crypto/bn/bn_shift.c22
-rw-r--r--deps/openssl/openssl/crypto/bn/bn_sqr.c4
-rw-r--r--deps/openssl/openssl/crypto/bn/bn_sqrt.c6
-rw-r--r--deps/openssl/openssl/crypto/bn/bn_srp.c6
-rw-r--r--deps/openssl/openssl/crypto/bn/bn_word.c18
-rw-r--r--deps/openssl/openssl/crypto/bn/bn_x931p.c12
-rw-r--r--deps/openssl/openssl/crypto/bn/build.info35
-rw-r--r--deps/openssl/openssl/crypto/bn/rsaz_exp.c47
-rw-r--r--deps/openssl/openssl/crypto/bn/rsaz_exp.h51
67 files changed, 3970 insertions, 5742 deletions
diff --git a/deps/openssl/openssl/crypto/bn/README.pod b/deps/openssl/openssl/crypto/bn/README.pod
index 109ab0d914..706a140342 100644
--- a/deps/openssl/openssl/crypto/bn/README.pod
+++ b/deps/openssl/openssl/crypto/bn/README.pod
@@ -6,7 +6,7 @@ bn_mul_words, bn_mul_add_words, bn_sqr_words, bn_div_words,
bn_add_words, bn_sub_words, bn_mul_comba4, bn_mul_comba8,
bn_sqr_comba4, bn_sqr_comba8, bn_cmp_words, bn_mul_normal,
bn_mul_low_normal, bn_mul_recursive, bn_mul_part_recursive,
-bn_mul_low_recursive, bn_mul_high, bn_sqr_normal, bn_sqr_recursive,
+bn_mul_low_recursive, bn_sqr_normal, bn_sqr_recursive,
bn_expand, bn_wexpand, bn_expand2, bn_fix_top, bn_check_top,
bn_print, bn_dump, bn_set_max, bn_set_high, bn_set_low - BIGNUM
library internal functions
@@ -41,8 +41,6 @@ library internal functions
int n, int tna, int tnb, BN_ULONG *tmp);
void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b,
int n2, BN_ULONG *tmp);
- void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l,
- int n2, BN_ULONG *tmp);
void bn_sqr_normal(BN_ULONG *r, BN_ULONG *a, int n, BN_ULONG *tmp);
void bn_sqr_recursive(BN_ULONG *r, BN_ULONG *a, int n2, BN_ULONG *tmp);
@@ -178,10 +176,6 @@ bn_mul_low_recursive(B<r>, B<a>, B<b>, B<n2>, B<tmp>) operates on the
B<n2> word arrays B<r> and B<tmp> and the B<n2>/2 word arrays B<a>
and B<b>.
-bn_mul_high(B<r>, B<a>, B<b>, B<l>, B<n2>, B<tmp>) operates on the
-B<n2> word arrays B<r>, B<a>, B<b> and B<l> (?) and the 3*B<n2> word
-array B<tmp>.
-
BN_mul() calls bn_mul_normal(), or an optimized implementation if the
factors have the same size: bn_mul_comba8() is used if they are 8
words long, bn_mul_recursive() if they are larger than
diff --git a/deps/openssl/openssl/crypto/bn/asm/alpha-mont.pl b/deps/openssl/openssl/crypto/bn/asm/alpha-mont.pl
index 9632133090..c9b962a150 100644
--- a/deps/openssl/openssl/crypto/bn/asm/alpha-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/alpha-mont.pl
@@ -8,7 +8,7 @@
#
# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
diff --git a/deps/openssl/openssl/crypto/bn/asm/armv4-gf2m.pl b/deps/openssl/openssl/crypto/bn/asm/armv4-gf2m.pl
index 0bb5433075..7a0cdb2e8a 100644
--- a/deps/openssl/openssl/crypto/bn/asm/armv4-gf2m.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/armv4-gf2m.pl
@@ -36,7 +36,7 @@
#
# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
# Polynomial Multiplication on ARM Processors using the NEON Engine.
-#
+#
# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf
$flavour = shift;
diff --git a/deps/openssl/openssl/crypto/bn/asm/armv4-mont.pl b/deps/openssl/openssl/crypto/bn/asm/armv4-mont.pl
index ddee8b7fa1..6bedc62ba6 100644
--- a/deps/openssl/openssl/crypto/bn/asm/armv4-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/armv4-mont.pl
@@ -23,7 +23,7 @@
# [depending on key length, less for longer keys] on ARM920T, and
# +115-80% on Intel IXP425. This is compared to pre-bn_mul_mont code
# base and compiler generated code with in-lined umull and even umlal
-# instructions. The latter means that this code didn't really have an
+# instructions. The latter means that this code didn't really have an
# "advantage" of utilizing some "secret" instruction.
#
# The code is interoperable with Thumb ISA and is rather compact, less
diff --git a/deps/openssl/openssl/crypto/bn/asm/bn-586.pl b/deps/openssl/openssl/crypto/bn/asm/bn-586.pl
index 1ca1bbf7d4..58effc8808 100644
--- a/deps/openssl/openssl/crypto/bn/asm/bn-586.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/bn-586.pl
@@ -14,7 +14,7 @@ require "x86asm.pl";
$output = pop;
open STDOUT,">$output";
-&asm_init($ARGV[0],$0);
+&asm_init($ARGV[0]);
$sse2=0;
for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
@@ -54,7 +54,7 @@ sub bn_mul_add_words
&movd("mm0",&wparam(3)); # mm0 = w
&pxor("mm1","mm1"); # mm1 = carry_in
&jmp(&label("maw_sse2_entry"));
-
+
&set_label("maw_sse2_unrolled",16);
&movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0]
&paddq("mm1","mm3"); # mm1 = carry_in + r[0]
@@ -675,20 +675,20 @@ sub bn_sub_part_words
&adc($c,0);
&mov(&DWP($i*4,$r,"",0),$tmp1); # *r
}
-
+
&comment("");
&add($b,32);
&add($r,32);
&sub($num,8);
&jnz(&label("pw_neg_loop"));
-
+
&set_label("pw_neg_finish",0);
&mov($tmp2,&wparam(4)); # get dl
&mov($num,0);
&sub($num,$tmp2);
&and($num,7);
&jz(&label("pw_end"));
-
+
for ($i=0; $i<7; $i++)
{
&comment("dl<0 Tail Round $i");
@@ -705,9 +705,9 @@ sub bn_sub_part_words
}
&jmp(&label("pw_end"));
-
+
&set_label("pw_pos",0);
-
+
&and($num,0xfffffff8); # num / 8
&jz(&label("pw_pos_finish"));
@@ -722,18 +722,18 @@ sub bn_sub_part_words
&mov(&DWP($i*4,$r,"",0),$tmp1); # *r
&jnc(&label("pw_nc".$i));
}
-
+
&comment("");
&add($a,32);
&add($r,32);
&sub($num,8);
&jnz(&label("pw_pos_loop"));
-
+
&set_label("pw_pos_finish",0);
&mov($num,&wparam(4)); # get dl
&and($num,7);
&jz(&label("pw_end"));
-
+
for ($i=0; $i<7; $i++)
{
&comment("dl>0 Tail Round $i");
@@ -754,17 +754,17 @@ sub bn_sub_part_words
&mov(&DWP($i*4,$r,"",0),$tmp1); # *r
&set_label("pw_nc".$i,0);
}
-
+
&comment("");
&add($a,32);
&add($r,32);
&sub($num,8);
&jnz(&label("pw_nc_loop"));
-
+
&mov($num,&wparam(4)); # get dl
&and($num,7);
&jz(&label("pw_nc_end"));
-
+
for ($i=0; $i<7; $i++)
{
&mov($tmp1,&DWP($i*4,$a,"",0)); # *a
diff --git a/deps/openssl/openssl/crypto/bn/asm/c64xplus-gf2m.pl b/deps/openssl/openssl/crypto/bn/asm/c64xplus-gf2m.pl
index c0e5400807..9c46da3af8 100644
--- a/deps/openssl/openssl/crypto/bn/asm/c64xplus-gf2m.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/c64xplus-gf2m.pl
@@ -43,7 +43,7 @@ $code.=<<___;
SHRU $A,16, $Ahi ; smash $A to two halfwords
|| EXTU $A,16,16,$Alo
- XORMPY $Alo,$B_2,$Alox2 ; 16x8 bits muliplication
+ XORMPY $Alo,$B_2,$Alox2 ; 16x8 bits multiplication
|| XORMPY $Ahi,$B_2,$Ahix2
|| EXTU $B,16,24,$B_1
XORMPY $Alo,$B_0,$Alox0
diff --git a/deps/openssl/openssl/crypto/bn/asm/co-586.pl b/deps/openssl/openssl/crypto/bn/asm/co-586.pl
index 60d0363660..97f5e3a19f 100644
--- a/deps/openssl/openssl/crypto/bn/asm/co-586.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/co-586.pl
@@ -13,7 +13,7 @@ require "x86asm.pl";
$output = pop;
open STDOUT,">$output";
-&asm_init($ARGV[0],$0);
+&asm_init($ARGV[0]);
&bn_mul_comba("bn_mul_comba8",8);
&bn_mul_comba("bn_mul_comba4",4);
@@ -47,7 +47,7 @@ sub mul_add_c
&mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b
###
&adc($c2,0);
- # is pos > 1, it means it is the last loop
+ # is pos > 1, it means it is the last loop
&mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[];
&mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a
}
@@ -76,7 +76,7 @@ sub sqr_add_c
&mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb);
###
&adc($c2,0);
- # is pos > 1, it means it is the last loop
+ # is pos > 1, it means it is the last loop
&mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
&mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
}
@@ -127,7 +127,7 @@ sub bn_mul_comba
$c2="ebp";
$a="esi";
$b="edi";
-
+
$as=0;
$ae=0;
$bs=0;
@@ -142,9 +142,9 @@ sub bn_mul_comba
&push("ebx");
&xor($c0,$c0);
- &mov("eax",&DWP(0,$a,"",0)); # load the first word
+ &mov("eax",&DWP(0,$a,"",0)); # load the first word
&xor($c1,$c1);
- &mov("edx",&DWP(0,$b,"",0)); # load the first second
+ &mov("edx",&DWP(0,$b,"",0)); # load the first second
for ($i=0; $i<$tot; $i++)
{
@@ -152,7 +152,7 @@ sub bn_mul_comba
$bi=$bs;
$end=$be+1;
- &comment("################## Calculate word $i");
+ &comment("################## Calculate word $i");
for ($j=$bs; $j<$end; $j++)
{
diff --git a/deps/openssl/openssl/crypto/bn/asm/ia64-mont.pl b/deps/openssl/openssl/crypto/bn/asm/ia64-mont.pl
index 0df1fad115..ec486f7779 100644
--- a/deps/openssl/openssl/crypto/bn/asm/ia64-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/ia64-mont.pl
@@ -8,7 +8,7 @@
#
# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
@@ -80,7 +80,7 @@ $code=<<___;
// int bn_mul_mont (BN_ULONG *rp,const BN_ULONG *ap,
// const BN_ULONG *bp,const BN_ULONG *np,
-// const BN_ULONG *n0p,int num);
+// const BN_ULONG *n0p,int num);
.align 64
.global bn_mul_mont#
.proc bn_mul_mont#
@@ -203,7 +203,7 @@ bn_mul_mont_general:
{ .mmi; .pred.rel "mutex",p39,p41
(p39) add topbit=r0,r0
(p41) add topbit=r0,r0,1
- nop.i 0 }
+ nop.i 0 }
{ .mmi; st8 [tp_1]=n[0]
add tptr=16,sp
add tp_1=8,sp };;
diff --git a/deps/openssl/openssl/crypto/bn/asm/ia64.S b/deps/openssl/openssl/crypto/bn/asm/ia64.S
index f2404a3c1e..d235c45e2d 100644
--- a/deps/openssl/openssl/crypto/bn/asm/ia64.S
+++ b/deps/openssl/openssl/crypto/bn/asm/ia64.S
@@ -1,9 +1,9 @@
.explicit
.text
.ident "ia64.S, Version 2.1"
-.ident "IA-64 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
+.ident "IA-64 ISA artwork by Andy Polyakov <appro@openssl.org>"
-// Copyright 2001-2016 The OpenSSL Project Authors. All Rights Reserved.
+// Copyright 2001-2018 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the OpenSSL license (the "License"). You may not use
// this file except in compliance with the License. You can obtain a copy
@@ -12,7 +12,7 @@
//
// ====================================================================
-// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
// project.
//
// Rights for redistribution and usage in source and binary forms are
@@ -20,7 +20,7 @@
// disclaimed.
// ====================================================================
//
-// Version 2.x is Itanium2 re-tune. Few words about how Itanum2 is
+// Version 2.x is Itanium2 re-tune. Few words about how Itanium2 is
// different from Itanium to this module viewpoint. Most notably, is it
// "wider" than Itanium? Can you experience loop scalability as
// discussed in commentary sections? Not really:-( Itanium2 has 6
@@ -141,7 +141,7 @@
// User Mask I want to excuse the kernel from preserving upper
// (f32-f128) FP register bank over process context switch, thus
// minimizing bus bandwidth consumption during the switch (i.e.
-// after PKI opration completes and the program is off doing
+// after PKI operation completes and the program is off doing
// something else like bulk symmetric encryption). Having said
// this, I also want to point out that it might be good idea
// to compile the whole toolkit (as well as majority of the
@@ -157,12 +157,15 @@
#else
#define ADDP add
#endif
+#ifdef __VMS
+.alias abort, "decc$abort"
+#endif
#if 1
//
// bn_[add|sub]_words routines.
//
-// Loops are spinning in 2*(n+5) ticks on Itanuim (provided that the
+// Loops are spinning in 2*(n+5) ticks on Itanium (provided that the
// data reside in L1 cache, i.e. 2 ticks away). It's possible to
// compress the epilogue and get down to 2*n+6, but at the cost of
// scalability (the neat feature of this implementation is that it
@@ -500,7 +503,7 @@ bn_sqr_words:
// possible to compress the epilogue (I'm getting tired to write this
// comment over and over) and get down to 2*n+16 at the cost of
// scalability. The decision will very likely be reconsidered after the
-// benchmark program is profiled. I.e. if perfomance gain on Itanium
+// benchmark program is profiled. I.e. if performance gain on Itanium
// will appear larger than loss on "wider" IA-64, then the loop should
// be explicitly split and the epilogue compressed.
.L_bn_sqr_words_ctop:
@@ -936,7 +939,7 @@ bn_mul_comba8:
xma.hu f118=f39,f127,f117 }
{ .mfi; xma.lu f117=f39,f127,f117 };;//
//-------------------------------------------------//
-// Leaving muliplier's heaven... Quite a ride, huh?
+// Leaving multiplier's heaven... Quite a ride, huh?
{ .mii; getf.sig r31=f47
add r25=r25,r24
@@ -1428,6 +1431,7 @@ bn_div_words:
mov ar.ec=0 // don't rotate at exit
mov pr.rot=0 }
{ .mii; mov L=r33 // save l
+ mov r25=r0 // needed if abort is called on VMS
mov r36=r0 };;
.L_divw_shift: // -vv- note signed comparison
@@ -1529,9 +1533,8 @@ bn_div_words:
// output: f8 = (int)(a/b)
// clobbered: f8,f9,f10,f11,pred
pred=p15
-// One can argue that this snippet is copyrighted to Intel
-// Corporation, as it's essentially identical to one of those
-// found in "Divide, Square Root and Remainder" section at
+// This snippet is based on text found in the "Divide, Square
+// Root and Remainder" section at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
// Yes, I admit that the referred code was used as template,
// but after I realized that there hardly is any other instruction
diff --git a/deps/openssl/openssl/crypto/bn/asm/mips-mont.pl b/deps/openssl/openssl/crypto/bn/asm/mips-mont.pl
index e141e1a925..fbe5d04f71 100644
--- a/deps/openssl/openssl/crypto/bn/asm/mips-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/mips-mont.pl
@@ -56,14 +56,14 @@
$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
if ($flavour =~ /64|n32/i) {
- $PTR_ADD="dadd"; # incidentally works even on n32
- $PTR_SUB="dsub"; # incidentally works even on n32
+ $PTR_ADD="daddu"; # incidentally works even on n32
+ $PTR_SUB="dsubu"; # incidentally works even on n32
$REG_S="sd";
$REG_L="ld";
$SZREG=8;
} else {
- $PTR_ADD="add";
- $PTR_SUB="sub";
+ $PTR_ADD="addu";
+ $PTR_SUB="subu";
$REG_S="sw";
$REG_L="lw";
$SZREG=4;
@@ -121,6 +121,8 @@ $m1=$s11;
$FRAMESIZE=14;
$code=<<___;
+#include "mips_arch.h"
+
.text
.set noat
@@ -183,27 +185,27 @@ $code.=<<___;
$PTR_SUB $sp,$num
and $sp,$at
- $MULTU $aj,$bi
- $LD $alo,$BNSZ($ap)
- $LD $nlo,$BNSZ($np)
- mflo $lo0
- mfhi $hi0
- $MULTU $lo0,$n0
- mflo $m1
-
- $MULTU $alo,$bi
- mflo $alo
- mfhi $ahi
-
- $MULTU $nj,$m1
- mflo $lo1
- mfhi $hi1
- $MULTU $nlo,$m1
+ $MULTU ($aj,$bi)
+ $LD $ahi,$BNSZ($ap)
+ $LD $nhi,$BNSZ($np)
+ mflo ($lo0,$aj,$bi)
+ mfhi ($hi0,$aj,$bi)
+ $MULTU ($lo0,$n0)
+ mflo ($m1,$lo0,$n0)
+
+ $MULTU ($ahi,$bi)
+ mflo ($alo,$ahi,$bi)
+ mfhi ($ahi,$ahi,$bi)
+
+ $MULTU ($nj,$m1)
+ mflo ($lo1,$nj,$m1)
+ mfhi ($hi1,$nj,$m1)
+ $MULTU ($nhi,$m1)
$ADDU $lo1,$lo0
sltu $at,$lo1,$lo0
$ADDU $hi1,$at
- mflo $nlo
- mfhi $nhi
+ mflo ($nlo,$nhi,$m1)
+ mfhi ($nhi,$nhi,$m1)
move $tp,$sp
li $j,2*$BNSZ
@@ -215,25 +217,25 @@ $code.=<<___;
$LD $aj,($aj)
$LD $nj,($nj)
- $MULTU $aj,$bi
+ $MULTU ($aj,$bi)
$ADDU $lo0,$alo,$hi0
$ADDU $lo1,$nlo,$hi1
sltu $at,$lo0,$hi0
sltu $t0,$lo1,$hi1
$ADDU $hi0,$ahi,$at
$ADDU $hi1,$nhi,$t0
- mflo $alo
- mfhi $ahi
+ mflo ($alo,$aj,$bi)
+ mfhi ($ahi,$aj,$bi)
$ADDU $lo1,$lo0
sltu $at,$lo1,$lo0
- $MULTU $nj,$m1
+ $MULTU ($nj,$m1)
$ADDU $hi1,$at
addu $j,$BNSZ
$ST $lo1,($tp)
sltu $t0,$j,$num
- mflo $nlo
- mfhi $nhi
+ mflo ($nlo,$nj,$m1)
+ mfhi ($nhi,$nj,$m1)
bnez $t0,.L1st
$PTR_ADD $tp,$BNSZ
@@ -263,34 +265,34 @@ $code.=<<___;
$PTR_ADD $bi,$bp,$i
$LD $bi,($bi)
$LD $aj,($ap)
- $LD $alo,$BNSZ($ap)
+ $LD $ahi,$BNSZ($ap)
$LD $tj,($sp)
- $MULTU $aj,$bi
+ $MULTU ($aj,$bi)
$LD $nj,($np)
- $LD $nlo,$BNSZ($np)
- mflo $lo0
- mfhi $hi0
+ $LD $nhi,$BNSZ($np)
+ mflo ($lo0,$aj,$bi)
+ mfhi ($hi0,$aj,$bi)
$ADDU $lo0,$tj
- $MULTU $lo0,$n0
+ $MULTU ($lo0,$n0)
sltu $at,$lo0,$tj
$ADDU $hi0,$at
- mflo $m1
+ mflo ($m1,$lo0,$n0)
- $MULTU $alo,$bi
- mflo $alo
- mfhi $ahi
+ $MULTU ($ahi,$bi)
+ mflo ($alo,$ahi,$bi)
+ mfhi ($ahi,$ahi,$bi)
- $MULTU $nj,$m1
- mflo $lo1
- mfhi $hi1
+ $MULTU ($nj,$m1)
+ mflo ($lo1,$nj,$m1)
+ mfhi ($hi1,$nj,$m1)
- $MULTU $nlo,$m1
+ $MULTU ($nhi,$m1)
$ADDU $lo1,$lo0
sltu $at,$lo1,$lo0
$ADDU $hi1,$at
- mflo $nlo
- mfhi $nhi
+ mflo ($nlo,$nhi,$m1)
+ mfhi ($nhi,$nhi,$m1)
move $tp,$sp
li $j,2*$BNSZ
@@ -303,19 +305,19 @@ $code.=<<___;
$LD $aj,($aj)
$LD $nj,($nj)
- $MULTU $aj,$bi
+ $MULTU ($aj,$bi)
$ADDU $lo0,$alo,$hi0
$ADDU $lo1,$nlo,$hi1
sltu $at,$lo0,$hi0
sltu $t0,$lo1,$hi1
$ADDU $hi0,$ahi,$at
$ADDU $hi1,$nhi,$t0
- mflo $alo
- mfhi $ahi
+ mflo ($alo,$aj,$bi)
+ mfhi ($ahi,$aj,$bi)
$ADDU $lo0,$tj
addu $j,$BNSZ
- $MULTU $nj,$m1
+ $MULTU ($nj,$m1)
sltu $at,$lo0,$tj
$ADDU $lo1,$lo0
$ADDU $hi0,$at
@@ -323,8 +325,8 @@ $code.=<<___;
$LD $tj,2*$BNSZ($tp)
$ADDU $hi1,$t0
sltu $at,$j,$num
- mflo $nlo
- mfhi $nhi
+ mflo ($nlo,$nj,$m1)
+ mfhi ($nhi,$nj,$m1)
$ST $lo1,($tp)
bnez $at,.Linner
$PTR_ADD $tp,$BNSZ
diff --git a/deps/openssl/openssl/crypto/bn/asm/mips.pl b/deps/openssl/openssl/crypto/bn/asm/mips.pl
index 420f01f3a4..da35ec1b30 100644
--- a/deps/openssl/openssl/crypto/bn/asm/mips.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/mips.pl
@@ -8,7 +8,7 @@
#
# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project.
#
# Rights for redistribution and usage in source and binary forms are
@@ -42,7 +42,7 @@
# Performance improvement is astonishing! 'apps/openssl speed rsa dsa'
# goes way over 3 times faster!
#
-# <appro@fy.chalmers.se>
+# <appro@openssl.org>
# October 2010
#
@@ -109,6 +109,22 @@ $gp=$v1 if ($flavour =~ /nubi/i);
$minus4=$v1;
$code.=<<___;
+#include "mips_arch.h"
+
+#if defined(_MIPS_ARCH_MIPS64R6)
+# define ddivu(rs,rt)
+# define mfqt(rd,rs,rt) ddivu rd,rs,rt
+# define mfrm(rd,rs,rt) dmodu rd,rs,rt
+#elif defined(_MIPS_ARCH_MIPS32R6)
+# define divu(rs,rt)
+# define mfqt(rd,rs,rt) divu rd,rs,rt
+# define mfrm(rd,rs,rt) modu rd,rs,rt
+#else
+# define $DIVU(rs,rt) $DIVU $zero,rs,rt
+# define mfqt(rd,rs,rt) mflo rd
+# define mfrm(rd,rs,rt) mfhi rd
+#endif
+
.rdata
.asciiz "mips3.s, Version 1.2"
.asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro\@fy.chalmers.se>"
@@ -151,7 +167,7 @@ $code.=<<___;
.L_bn_mul_add_words_loop:
$LD $t0,0($a1)
- $MULTU $t0,$a3
+ $MULTU ($t0,$a3)
$LD $t1,0($a0)
$LD $t2,$BNSZ($a1)
$LD $t3,$BNSZ($a0)
@@ -161,11 +177,11 @@ $code.=<<___;
sltu $v0,$t1,$v0 # All manuals say it "compares 32-bit
# values", but it seems to work fine
# even on 64-bit registers.
- mflo $at
- mfhi $t0
+ mflo ($at,$t0,$a3)
+ mfhi ($t0,$t0,$a3)
$ADDU $t1,$at
$ADDU $v0,$t0
- $MULTU $t2,$a3
+ $MULTU ($t2,$a3)
sltu $at,$t1,$at
$ST $t1,0($a0)
$ADDU $v0,$at
@@ -174,11 +190,11 @@ $code.=<<___;
$LD $ta3,3*$BNSZ($a0)
$ADDU $t3,$v0
sltu $v0,$t3,$v0
- mflo $at
- mfhi $t2
+ mflo ($at,$t2,$a3)
+ mfhi ($t2,$t2,$a3)
$ADDU $t3,$at
$ADDU $v0,$t2
- $MULTU $ta0,$a3
+ $MULTU ($ta0,$a3)
sltu $at,$t3,$at
$ST $t3,$BNSZ($a0)
$ADDU $v0,$at
@@ -188,11 +204,11 @@ $code.=<<___;
$PTR_ADD $a1,4*$BNSZ
$ADDU $ta1,$v0
sltu $v0,$ta1,$v0
- mflo $at
- mfhi $ta0
+ mflo ($at,$ta0,$a3)
+ mfhi ($ta0,$ta0,$a3)
$ADDU $ta1,$at
$ADDU $v0,$ta0
- $MULTU $ta2,$a3
+ $MULTU ($ta2,$a3)
sltu $at,$ta1,$at
$ST $ta1,-2*$BNSZ($a0)
$ADDU $v0,$at
@@ -201,8 +217,8 @@ $code.=<<___;
and $ta0,$a2,$minus4
$ADDU $ta3,$v0
sltu $v0,$ta3,$v0
- mflo $at
- mfhi $ta2
+ mflo ($at,$ta2,$a3)
+ mfhi ($ta2,$ta2,$a3)
$ADDU $ta3,$at
$ADDU $v0,$ta2
sltu $at,$ta3,$at
@@ -217,13 +233,13 @@ $code.=<<___;
.L_bn_mul_add_words_tail:
.set reorder
$LD $t0,0($a1)
- $MULTU $t0,$a3
+ $MULTU ($t0,$a3)
$LD $t1,0($a0)
subu $a2,1
$ADDU $t1,$v0
sltu $v0,$t1,$v0
- mflo $at
- mfhi $t0
+ mflo ($at,$t0,$a3)
+ mfhi ($t0,$t0,$a3)
$ADDU $t1,$at
$ADDU $v0,$t0
sltu $at,$t1,$at
@@ -232,13 +248,13 @@ $code.=<<___;
beqz $a2,.L_bn_mul_add_words_return
$LD $t0,$BNSZ($a1)
- $MULTU $t0,$a3
+ $MULTU ($t0,$a3)
$LD $t1,$BNSZ($a0)
subu $a2,1
$ADDU $t1,$v0
sltu $v0,$t1,$v0
- mflo $at
- mfhi $t0
+ mflo ($at,$t0,$a3)
+ mfhi ($t0,$t0,$a3)
$ADDU $t1,$at
$ADDU $v0,$t0
sltu $at,$t1,$at
@@ -247,12 +263,12 @@ $code.=<<___;
beqz $a2,.L_bn_mul_add_words_return
$LD $t0,2*$BNSZ($a1)
- $MULTU $t0,$a3
+ $MULTU ($t0,$a3)
$LD $t1,2*$BNSZ($a0)
$ADDU $t1,$v0
sltu $v0,$t1,$v0
- mflo $at
- mfhi $t0
+ mflo ($at,$t0,$a3)
+ mfhi ($t0,$t0,$a3)
$ADDU $t1,$at
$ADDU $v0,$t0
sltu $at,$t1,$at
@@ -310,40 +326,40 @@ $code.=<<___;
.L_bn_mul_words_loop:
$LD $t0,0($a1)
- $MULTU $t0,$a3
+ $MULTU ($t0,$a3)
$LD $t2,$BNSZ($a1)
$LD $ta0,2*$BNSZ($a1)
$LD $ta2,3*$BNSZ($a1)
- mflo $at
- mfhi $t0
+ mflo ($at,$t0,$a3)
+ mfhi ($t0,$t0,$a3)
$ADDU $v0,$at
sltu $t1,$v0,$at
- $MULTU $t2,$a3
+ $MULTU ($t2,$a3)
$ST $v0,0($a0)
$ADDU $v0,$t1,$t0
subu $a2,4
$PTR_ADD $a0,4*$BNSZ
$PTR_ADD $a1,4*$BNSZ
- mflo $at
- mfhi $t2
+ mflo ($at,$t2,$a3)
+ mfhi ($t2,$t2,$a3)
$ADDU $v0,$at
sltu $t3,$v0,$at
- $MULTU $ta0,$a3
+ $MULTU ($ta0,$a3)
$ST $v0,-3*$BNSZ($a0)
$ADDU $v0,$t3,$t2
- mflo $at
- mfhi $ta0
+ mflo ($at,$ta0,$a3)
+ mfhi ($ta0,$ta0,$a3)
$ADDU $v0,$at
sltu $ta1,$v0,$at
- $MULTU $ta2,$a3
+ $MULTU ($ta2,$a3)
$ST $v0,-2*$BNSZ($a0)
$ADDU $v0,$ta1,$ta0
and $ta0,$a2,$minus4
- mflo $at
- mfhi $ta2
+ mflo ($at,$ta2,$a3)
+ mfhi ($ta2,$ta2,$a3)
$ADDU $v0,$at
sltu $ta3,$v0,$at
$ST $v0,-$BNSZ($a0)
@@ -357,10 +373,10 @@ $code.=<<___;
.L_bn_mul_words_tail:
.set reorder
$LD $t0,0($a1)
- $MULTU $t0,$a3
+ $MULTU ($t0,$a3)
subu $a2,1
- mflo $at
- mfhi $t0
+ mflo ($at,$t0,$a3)
+ mfhi ($t0,$t0,$a3)
$ADDU $v0,$at
sltu $t1,$v0,$at
$ST $v0,0($a0)
@@ -368,10 +384,10 @@ $code.=<<___;
beqz $a2,.L_bn_mul_words_return
$LD $t0,$BNSZ($a1)
- $MULTU $t0,$a3
+ $MULTU ($t0,$a3)
subu $a2,1
- mflo $at
- mfhi $t0
+ mflo ($at,$t0,$a3)
+ mfhi ($t0,$t0,$a3)
$ADDU $v0,$at
sltu $t1,$v0,$at
$ST $v0,$BNSZ($a0)
@@ -379,9 +395,9 @@ $code.=<<___;
beqz $a2,.L_bn_mul_words_return
$LD $t0,2*$BNSZ($a1)
- $MULTU $t0,$a3
- mflo $at
- mfhi $t0
+ $MULTU ($t0,$a3)
+ mflo ($at,$t0,$a3)
+ mfhi ($t0,$t0,$a3)
$ADDU $v0,$at
sltu $t1,$v0,$at
$ST $v0,2*$BNSZ($a0)
@@ -438,35 +454,35 @@ $code.=<<___;
.L_bn_sqr_words_loop:
$LD $t0,0($a1)
- $MULTU $t0,$t0
+ $MULTU ($t0,$t0)
$LD $t2,$BNSZ($a1)
$LD $ta0,2*$BNSZ($a1)
$LD $ta2,3*$BNSZ($a1)
- mflo $t1
- mfhi $t0
+ mflo ($t1,$t0,$t0)
+ mfhi ($t0,$t0,$t0)
$ST $t1,0($a0)
$ST $t0,$BNSZ($a0)
- $MULTU $t2,$t2
+ $MULTU ($t2,$t2)
subu $a2,4
$PTR_ADD $a0,8*$BNSZ
$PTR_ADD $a1,4*$BNSZ
- mflo $t3
- mfhi $t2
+ mflo ($t3,$t2,$t2)
+ mfhi ($t2,$t2,$t2)
$ST $t3,-6*$BNSZ($a0)
$ST $t2,-5*$BNSZ($a0)
- $MULTU $ta0,$ta0
- mflo $ta1
- mfhi $ta0
+ $MULTU ($ta0,$ta0)
+ mflo ($ta1,$ta0,$ta0)
+ mfhi ($ta0,$ta0,$ta0)
$ST $ta1,-4*$BNSZ($a0)
$ST $ta0,-3*$BNSZ($a0)
- $MULTU $ta2,$ta2
+ $MULTU ($ta2,$ta2)
and $ta0,$a2,$minus4
- mflo $ta3
- mfhi $ta2
+ mflo ($ta3,$ta2,$ta2)
+ mfhi ($ta2,$ta2,$ta2)
$ST $ta3,-2*$BNSZ($a0)
.set noreorder
@@ -479,27 +495,27 @@ $code.=<<___;
.L_bn_sqr_words_tail:
.set reorder
$LD $t0,0($a1)
- $MULTU $t0,$t0
+ $MULTU ($t0,$t0)
subu $a2,1
- mflo $t1
- mfhi $t0
+ mflo ($t1,$t0,$t0)
+ mfhi ($t0,$t0,$t0)
$ST $t1,0($a0)
$ST $t0,$BNSZ($a0)
beqz $a2,.L_bn_sqr_words_return
$LD $t0,$BNSZ($a1)
- $MULTU $t0,$t0
+ $MULTU ($t0,$t0)
subu $a2,1
- mflo $t1
- mfhi $t0
+ mflo ($t1,$t0,$t0)
+ mfhi ($t0,$t0,$t0)
$ST $t1,2*$BNSZ($a0)
$ST $t0,3*$BNSZ($a0)
beqz $a2,.L_bn_sqr_words_return
$LD $t0,2*$BNSZ($a1)
- $MULTU $t0,$t0
- mflo $t1
- mfhi $t0
+ $MULTU ($t0,$t0)
+ mflo ($t1,$t0,$t0)
+ mfhi ($t0,$t0,$t0)
$ST $t1,4*$BNSZ($a0)
$ST $t0,5*$BNSZ($a0)
@@ -587,13 +603,13 @@ $code.=<<___;
sltu $v0,$t2,$ta2
$ST $t2,-2*$BNSZ($a0)
$ADDU $v0,$t8
-
+
$ADDU $ta3,$t3
sltu $t9,$ta3,$t3
$ADDU $t3,$ta3,$v0
sltu $v0,$t3,$ta3
$ST $t3,-$BNSZ($a0)
-
+
.set noreorder
bgtz $at,.L_bn_add_words_loop
$ADDU $v0,$t9
@@ -792,7 +808,7 @@ bn_div_3_words:
# so that we can save two arguments
# and return address in registers
# instead of stack:-)
-
+
$LD $a0,($a3)
move $ta2,$a1
bne $a0,$a2,bn_div_3_words_internal
@@ -823,11 +839,11 @@ $code.=<<___;
move $ta3,$ra
bal bn_div_words_internal
move $ra,$ta3
- $MULTU $ta2,$v0
+ $MULTU ($ta2,$v0)
$LD $t2,-2*$BNSZ($a3)
move $ta0,$zero
- mfhi $t1
- mflo $t0
+ mfhi ($t1,$ta2,$v0)
+ mflo ($t0,$ta2,$v0)
sltu $t8,$t1,$a1
.L_bn_div_3_words_inner_loop:
bnez $t8,.L_bn_div_3_words_inner_loop_done
@@ -930,15 +946,15 @@ $code.=<<___;
$SRL $HH,$a0,4*$BNSZ # bits
$SRL $QT,4*$BNSZ # q=0xffffffff
beq $DH,$HH,.L_bn_div_words_skip_div1
- $DIVU $zero,$a0,$DH
- mflo $QT
+ $DIVU ($a0,$DH)
+ mfqt ($QT,$a0,$DH)
.L_bn_div_words_skip_div1:
- $MULTU $a2,$QT
+ $MULTU ($a2,$QT)
$SLL $t3,$a0,4*$BNSZ # bits
$SRL $at,$a1,4*$BNSZ # bits
or $t3,$at
- mflo $t0
- mfhi $t1
+ mflo ($t0,$a2,$QT)
+ mfhi ($t1,$a2,$QT)
.L_bn_div_words_inner_loop1:
sltu $t2,$t3,$t0
seq $t8,$HH,$t1
@@ -963,15 +979,15 @@ $code.=<<___;
$SRL $HH,$a0,4*$BNSZ # bits
$SRL $QT,4*$BNSZ # q=0xffffffff
beq $DH,$HH,.L_bn_div_words_skip_div2
- $DIVU $zero,$a0,$DH
- mflo $QT
+ $DIVU ($a0,$DH)
+ mfqt ($QT,$a0,$DH)
.L_bn_div_words_skip_div2:
- $MULTU $a2,$QT
+ $MULTU ($a2,$QT)
$SLL $t3,$a0,4*$BNSZ # bits
$SRL $at,$a1,4*$BNSZ # bits
or $t3,$at
- mflo $t0
- mfhi $t1
+ mflo ($t0,$a2,$QT)
+ mfhi ($t1,$a2,$QT)
.L_bn_div_words_inner_loop2:
sltu $t2,$t3,$t0
seq $t8,$HH,$t1
@@ -1070,592 +1086,592 @@ $code.=<<___;
$LD $b_0,0($a2)
$LD $a_1,$BNSZ($a1)
$LD $a_2,2*$BNSZ($a1)
- $MULTU $a_0,$b_0 # mul_add_c(a[0],b[0],c1,c2,c3);
+ $MULTU ($a_0,$b_0) # mul_add_c(a[0],b[0],c1,c2,c3);
$LD $a_3,3*$BNSZ($a1)
$LD $b_1,$BNSZ($a2)
$LD $b_2,2*$BNSZ($a2)
$LD $b_3,3*$BNSZ($a2)
- mflo $c_1
- mfhi $c_2
+ mflo ($c_1,$a_0,$b_0)
+ mfhi ($c_2,$a_0,$b_0)
$LD $a_4,4*$BNSZ($a1)
$LD $a_5,5*$BNSZ($a1)
- $MULTU $a_0,$b_1 # mul_add_c(a[0],b[1],c2,c3,c1);
+ $MULTU ($a_0,$b_1) # mul_add_c(a[0],b[1],c2,c3,c1);
$LD $a_6,6*$BNSZ($a1)
$LD $a_7,7*$BNSZ($a1)
$LD $b_4,4*$BNSZ($a2)
$LD $b_5,5*$BNSZ($a2)
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_0,$b_1)
+ mfhi ($t_2,$a_0,$b_1)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_1,$b_0 # mul_add_c(a[1],b[0],c2,c3,c1);
+ $MULTU ($a_1,$b_0) # mul_add_c(a[1],b[0],c2,c3,c1);
$ADDU $c_3,$t_2,$at
$LD $b_6,6*$BNSZ($a2)
$LD $b_7,7*$BNSZ($a2)
$ST $c_1,0($a0) # r[0]=c1;
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_1,$b_0)
+ mfhi ($t_2,$a_1,$b_0)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_2,$b_0 # mul_add_c(a[2],b[0],c3,c1,c2);
+ $MULTU ($a_2,$b_0) # mul_add_c(a[2],b[0],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $c_1,$c_3,$t_2
$ST $c_2,$BNSZ($a0) # r[1]=c2;
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_2,$b_0)
+ mfhi ($t_2,$a_2,$b_0)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_1,$b_1 # mul_add_c(a[1],b[1],c3,c1,c2);
+ $MULTU ($a_1,$b_1) # mul_add_c(a[1],b[1],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_1,$b_1)
+ mfhi ($t_2,$a_1,$b_1)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_0,$b_2 # mul_add_c(a[0],b[2],c3,c1,c2);
+ $MULTU ($a_0,$b_2) # mul_add_c(a[0],b[2],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $c_2,$c_1,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_0,$b_2)
+ mfhi ($t_2,$a_0,$b_2)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_0,$b_3 # mul_add_c(a[0],b[3],c1,c2,c3);
+ $MULTU ($a_0,$b_3) # mul_add_c(a[0],b[3],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
$ST $c_3,2*$BNSZ($a0) # r[2]=c3;
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_0,$b_3)
+ mfhi ($t_2,$a_0,$b_3)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_1,$b_2 # mul_add_c(a[1],b[2],c1,c2,c3);
+ $MULTU ($a_1,$b_2) # mul_add_c(a[1],b[2],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $c_3,$c_2,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_1,$b_2)
+ mfhi ($t_2,$a_1,$b_2)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_2,$b_1 # mul_add_c(a[2],b[1],c1,c2,c3);
+ $MULTU ($a_2,$b_1) # mul_add_c(a[2],b[1],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_2,$b_1)
+ mfhi ($t_2,$a_2,$b_1)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_3,$b_0 # mul_add_c(a[3],b[0],c1,c2,c3);
+ $MULTU ($a_3,$b_0) # mul_add_c(a[3],b[0],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_3,$b_0)
+ mfhi ($t_2,$a_3,$b_0)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_4,$b_0 # mul_add_c(a[4],b[0],c2,c3,c1);
+ $MULTU ($a_4,$b_0) # mul_add_c(a[4],b[0],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
$ST $c_1,3*$BNSZ($a0) # r[3]=c1;
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_4,$b_0)
+ mfhi ($t_2,$a_4,$b_0)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_3,$b_1 # mul_add_c(a[3],b[1],c2,c3,c1);
+ $MULTU ($a_3,$b_1) # mul_add_c(a[3],b[1],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $c_1,$c_3,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_3,$b_1)
+ mfhi ($t_2,$a_3,$b_1)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_2,$b_2 # mul_add_c(a[2],b[2],c2,c3,c1);
+ $MULTU ($a_2,$b_2) # mul_add_c(a[2],b[2],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_2,$b_2)
+ mfhi ($t_2,$a_2,$b_2)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_1,$b_3 # mul_add_c(a[1],b[3],c2,c3,c1);
+ $MULTU ($a_1,$b_3) # mul_add_c(a[1],b[3],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_1,$b_3)
+ mfhi ($t_2,$a_1,$b_3)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_0,$b_4 # mul_add_c(a[0],b[4],c2,c3,c1);
+ $MULTU ($a_0,$b_4) # mul_add_c(a[0],b[4],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_0,$b_4)
+ mfhi ($t_2,$a_0,$b_4)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_0,$b_5 # mul_add_c(a[0],b[5],c3,c1,c2);
+ $MULTU ($a_0,$b_5) # mul_add_c(a[0],b[5],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
$ST $c_2,4*$BNSZ($a0) # r[4]=c2;
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_0,$b_5)
+ mfhi ($t_2,$a_0,$b_5)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_1,$b_4 # mul_add_c(a[1],b[4],c3,c1,c2);
+ $MULTU ($a_1,$b_4) # mul_add_c(a[1],b[4],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $c_2,$c_1,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_1,$b_4)
+ mfhi ($t_2,$a_1,$b_4)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_2,$b_3 # mul_add_c(a[2],b[3],c3,c1,c2);
+ $MULTU ($a_2,$b_3) # mul_add_c(a[2],b[3],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_2,$b_3)
+ mfhi ($t_2,$a_2,$b_3)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_3,$b_2 # mul_add_c(a[3],b[2],c3,c1,c2);
+ $MULTU ($a_3,$b_2) # mul_add_c(a[3],b[2],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_3,$b_2)
+ mfhi ($t_2,$a_3,$b_2)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_4,$b_1 # mul_add_c(a[4],b[1],c3,c1,c2);
+ $MULTU ($a_4,$b_1) # mul_add_c(a[4],b[1],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_4,$b_1)
+ mfhi ($t_2,$a_4,$b_1)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_5,$b_0 # mul_add_c(a[5],b[0],c3,c1,c2);
+ $MULTU ($a_5,$b_0) # mul_add_c(a[5],b[0],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_5,$b_0)
+ mfhi ($t_2,$a_5,$b_0)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_6,$b_0 # mul_add_c(a[6],b[0],c1,c2,c3);
+ $MULTU ($a_6,$b_0) # mul_add_c(a[6],b[0],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
$ST $c_3,5*$BNSZ($a0) # r[5]=c3;
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_6,$b_0)
+ mfhi ($t_2,$a_6,$b_0)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_5,$b_1 # mul_add_c(a[5],b[1],c1,c2,c3);
+ $MULTU ($a_5,$b_1) # mul_add_c(a[5],b[1],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $c_3,$c_2,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_5,$b_1)
+ mfhi ($t_2,$a_5,$b_1)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_4,$b_2 # mul_add_c(a[4],b[2],c1,c2,c3);
+ $MULTU ($a_4,$b_2) # mul_add_c(a[4],b[2],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_4,$b_2)
+ mfhi ($t_2,$a_4,$b_2)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_3,$b_3 # mul_add_c(a[3],b[3],c1,c2,c3);
+ $MULTU ($a_3,$b_3) # mul_add_c(a[3],b[3],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_3,$b_3)
+ mfhi ($t_2,$a_3,$b_3)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_2,$b_4 # mul_add_c(a[2],b[4],c1,c2,c3);
+ $MULTU ($a_2,$b_4) # mul_add_c(a[2],b[4],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_2,$b_4)
+ mfhi ($t_2,$a_2,$b_4)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_1,$b_5 # mul_add_c(a[1],b[5],c1,c2,c3);
+ $MULTU ($a_1,$b_5) # mul_add_c(a[1],b[5],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_1,$b_5)
+ mfhi ($t_2,$a_1,$b_5)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_0,$b_6 # mul_add_c(a[0],b[6],c1,c2,c3);
+ $MULTU ($a_0,$b_6) # mul_add_c(a[0],b[6],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_0,$b_6)
+ mfhi ($t_2,$a_0,$b_6)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_0,$b_7 # mul_add_c(a[0],b[7],c2,c3,c1);
+ $MULTU ($a_0,$b_7) # mul_add_c(a[0],b[7],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
$ST $c_1,6*$BNSZ($a0) # r[6]=c1;
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_0,$b_7)
+ mfhi ($t_2,$a_0,$b_7)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_1,$b_6 # mul_add_c(a[1],b[6],c2,c3,c1);
+ $MULTU ($a_1,$b_6) # mul_add_c(a[1],b[6],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $c_1,$c_3,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_1,$b_6)
+ mfhi ($t_2,$a_1,$b_6)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_2,$b_5 # mul_add_c(a[2],b[5],c2,c3,c1);
+ $MULTU ($a_2,$b_5) # mul_add_c(a[2],b[5],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_2,$b_5)
+ mfhi ($t_2,$a_2,$b_5)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_3,$b_4 # mul_add_c(a[3],b[4],c2,c3,c1);
+ $MULTU ($a_3,$b_4) # mul_add_c(a[3],b[4],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_3,$b_4)
+ mfhi ($t_2,$a_3,$b_4)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_4,$b_3 # mul_add_c(a[4],b[3],c2,c3,c1);
+ $MULTU ($a_4,$b_3) # mul_add_c(a[4],b[3],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_4,$b_3)
+ mfhi ($t_2,$a_4,$b_3)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_5,$b_2 # mul_add_c(a[5],b[2],c2,c3,c1);
+ $MULTU ($a_5,$b_2) # mul_add_c(a[5],b[2],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_5,$b_2)
+ mfhi ($t_2,$a_5,$b_2)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_6,$b_1 # mul_add_c(a[6],b[1],c2,c3,c1);
+ $MULTU ($a_6,$b_1) # mul_add_c(a[6],b[1],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_6,$b_1)
+ mfhi ($t_2,$a_6,$b_1)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_7,$b_0 # mul_add_c(a[7],b[0],c2,c3,c1);
+ $MULTU ($a_7,$b_0) # mul_add_c(a[7],b[0],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_7,$b_0)
+ mfhi ($t_2,$a_7,$b_0)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_7,$b_1 # mul_add_c(a[7],b[1],c3,c1,c2);
+ $MULTU ($a_7,$b_1) # mul_add_c(a[7],b[1],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
$ST $c_2,7*$BNSZ($a0) # r[7]=c2;
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_7,$b_1)
+ mfhi ($t_2,$a_7,$b_1)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_6,$b_2 # mul_add_c(a[6],b[2],c3,c1,c2);
+ $MULTU ($a_6,$b_2) # mul_add_c(a[6],b[2],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $c_2,$c_1,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_6,$b_2)
+ mfhi ($t_2,$a_6,$b_2)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_5,$b_3 # mul_add_c(a[5],b[3],c3,c1,c2);
+ $MULTU ($a_5,$b_3) # mul_add_c(a[5],b[3],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_5,$b_3)
+ mfhi ($t_2,$a_5,$b_3)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_4,$b_4 # mul_add_c(a[4],b[4],c3,c1,c2);
+ $MULTU ($a_4,$b_4) # mul_add_c(a[4],b[4],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_4,$b_4)
+ mfhi ($t_2,$a_4,$b_4)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_3,$b_5 # mul_add_c(a[3],b[5],c3,c1,c2);
+ $MULTU ($a_3,$b_5) # mul_add_c(a[3],b[5],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_3,$b_5)
+ mfhi ($t_2,$a_3,$b_5)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_2,$b_6 # mul_add_c(a[2],b[6],c3,c1,c2);
+ $MULTU ($a_2,$b_6) # mul_add_c(a[2],b[6],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_2,$b_6)
+ mfhi ($t_2,$a_2,$b_6)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_1,$b_7 # mul_add_c(a[1],b[7],c3,c1,c2);
+ $MULTU ($a_1,$b_7) # mul_add_c(a[1],b[7],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_1,$b_7)
+ mfhi ($t_2,$a_1,$b_7)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_2,$b_7 # mul_add_c(a[2],b[7],c1,c2,c3);
+ $MULTU ($a_2,$b_7) # mul_add_c(a[2],b[7],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
$ST $c_3,8*$BNSZ($a0) # r[8]=c3;
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_2,$b_7)
+ mfhi ($t_2,$a_2,$b_7)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_3,$b_6 # mul_add_c(a[3],b[6],c1,c2,c3);
+ $MULTU ($a_3,$b_6) # mul_add_c(a[3],b[6],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $c_3,$c_2,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_3,$b_6)
+ mfhi ($t_2,$a_3,$b_6)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_4,$b_5 # mul_add_c(a[4],b[5],c1,c2,c3);
+ $MULTU ($a_4,$b_5) # mul_add_c(a[4],b[5],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_4,$b_5)
+ mfhi ($t_2,$a_4,$b_5)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_5,$b_4 # mul_add_c(a[5],b[4],c1,c2,c3);
+ $MULTU ($a_5,$b_4) # mul_add_c(a[5],b[4],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_5,$b_4)
+ mfhi ($t_2,$a_5,$b_4)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_6,$b_3 # mul_add_c(a[6],b[3],c1,c2,c3);
+ $MULTU ($a_6,$b_3) # mul_add_c(a[6],b[3],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_6,$b_3)
+ mfhi ($t_2,$a_6,$b_3)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_7,$b_2 # mul_add_c(a[7],b[2],c1,c2,c3);
+ $MULTU ($a_7,$b_2) # mul_add_c(a[7],b[2],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_7,$b_2)
+ mfhi ($t_2,$a_7,$b_2)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_7,$b_3 # mul_add_c(a[7],b[3],c2,c3,c1);
+ $MULTU ($a_7,$b_3) # mul_add_c(a[7],b[3],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
$ST $c_1,9*$BNSZ($a0) # r[9]=c1;
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_7,$b_3)
+ mfhi ($t_2,$a_7,$b_3)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_6,$b_4 # mul_add_c(a[6],b[4],c2,c3,c1);
+ $MULTU ($a_6,$b_4) # mul_add_c(a[6],b[4],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $c_1,$c_3,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_6,$b_4)
+ mfhi ($t_2,$a_6,$b_4)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_5,$b_5 # mul_add_c(a[5],b[5],c2,c3,c1);
+ $MULTU ($a_5,$b_5) # mul_add_c(a[5],b[5],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_5,$b_5)
+ mfhi ($t_2,$a_5,$b_5)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_4,$b_6 # mul_add_c(a[4],b[6],c2,c3,c1);
+ $MULTU ($a_4,$b_6) # mul_add_c(a[4],b[6],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_4,$b_6)
+ mfhi ($t_2,$a_4,$b_6)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_3,$b_7 # mul_add_c(a[3],b[7],c2,c3,c1);
+ $MULTU ($a_3,$b_7) # mul_add_c(a[3],b[7],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_3,$b_7)
+ mfhi ($t_2,$a_3,$b_7)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_4,$b_7 # mul_add_c(a[4],b[7],c3,c1,c2);
+ $MULTU ($a_4,$b_7) # mul_add_c(a[4],b[7],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
$ST $c_2,10*$BNSZ($a0) # r[10]=c2;
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_4,$b_7)
+ mfhi ($t_2,$a_4,$b_7)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_5,$b_6 # mul_add_c(a[5],b[6],c3,c1,c2);
+ $MULTU ($a_5,$b_6) # mul_add_c(a[5],b[6],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $c_2,$c_1,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_5,$b_6)
+ mfhi ($t_2,$a_5,$b_6)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_6,$b_5 # mul_add_c(a[6],b[5],c3,c1,c2);
+ $MULTU ($a_6,$b_5) # mul_add_c(a[6],b[5],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_6,$b_5)
+ mfhi ($t_2,$a_6,$b_5)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_7,$b_4 # mul_add_c(a[7],b[4],c3,c1,c2);
+ $MULTU ($a_7,$b_4) # mul_add_c(a[7],b[4],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_7,$b_4)
+ mfhi ($t_2,$a_7,$b_4)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_7,$b_5 # mul_add_c(a[7],b[5],c1,c2,c3);
+ $MULTU ($a_7,$b_5) # mul_add_c(a[7],b[5],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
$ST $c_3,11*$BNSZ($a0) # r[11]=c3;
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_7,$b_5)
+ mfhi ($t_2,$a_7,$b_5)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_6,$b_6 # mul_add_c(a[6],b[6],c1,c2,c3);
+ $MULTU ($a_6,$b_6) # mul_add_c(a[6],b[6],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $c_3,$c_2,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_6,$b_6)
+ mfhi ($t_2,$a_6,$b_6)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_5,$b_7 # mul_add_c(a[5],b[7],c1,c2,c3);
+ $MULTU ($a_5,$b_7) # mul_add_c(a[5],b[7],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_5,$b_7)
+ mfhi ($t_2,$a_5,$b_7)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_6,$b_7 # mul_add_c(a[6],b[7],c2,c3,c1);
+ $MULTU ($a_6,$b_7) # mul_add_c(a[6],b[7],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
$ST $c_1,12*$BNSZ($a0) # r[12]=c1;
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_6,$b_7)
+ mfhi ($t_2,$a_6,$b_7)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_7,$b_6 # mul_add_c(a[7],b[6],c2,c3,c1);
+ $MULTU ($a_7,$b_6) # mul_add_c(a[7],b[6],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $c_1,$c_3,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_7,$b_6)
+ mfhi ($t_2,$a_7,$b_6)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_7,$b_7 # mul_add_c(a[7],b[7],c3,c1,c2);
+ $MULTU ($a_7,$b_7) # mul_add_c(a[7],b[7],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
$ST $c_2,13*$BNSZ($a0) # r[13]=c2;
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_7,$b_7)
+ mfhi ($t_2,$a_7,$b_7)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
$ADDU $t_2,$at
@@ -1716,144 +1732,144 @@ $code.=<<___;
$LD $b_0,0($a2)
$LD $a_1,$BNSZ($a1)
$LD $a_2,2*$BNSZ($a1)
- $MULTU $a_0,$b_0 # mul_add_c(a[0],b[0],c1,c2,c3);
+ $MULTU ($a_0,$b_0) # mul_add_c(a[0],b[0],c1,c2,c3);
$LD $a_3,3*$BNSZ($a1)
$LD $b_1,$BNSZ($a2)
$LD $b_2,2*$BNSZ($a2)
$LD $b_3,3*$BNSZ($a2)
- mflo $c_1
- mfhi $c_2
+ mflo ($c_1,$a_0,$b_0)
+ mfhi ($c_2,$a_0,$b_0)
$ST $c_1,0($a0)
- $MULTU $a_0,$b_1 # mul_add_c(a[0],b[1],c2,c3,c1);
- mflo $t_1
- mfhi $t_2
+ $MULTU ($a_0,$b_1) # mul_add_c(a[0],b[1],c2,c3,c1);
+ mflo ($t_1,$a_0,$b_1)
+ mfhi ($t_2,$a_0,$b_1)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_1,$b_0 # mul_add_c(a[1],b[0],c2,c3,c1);
+ $MULTU ($a_1,$b_0) # mul_add_c(a[1],b[0],c2,c3,c1);
$ADDU $c_3,$t_2,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_1,$b_0)
+ mfhi ($t_2,$a_1,$b_0)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_2,$b_0 # mul_add_c(a[2],b[0],c3,c1,c2);
+ $MULTU ($a_2,$b_0) # mul_add_c(a[2],b[0],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $c_1,$c_3,$t_2
$ST $c_2,$BNSZ($a0)
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_2,$b_0)
+ mfhi ($t_2,$a_2,$b_0)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_1,$b_1 # mul_add_c(a[1],b[1],c3,c1,c2);
+ $MULTU ($a_1,$b_1) # mul_add_c(a[1],b[1],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_1,$b_1)
+ mfhi ($t_2,$a_1,$b_1)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_0,$b_2 # mul_add_c(a[0],b[2],c3,c1,c2);
+ $MULTU ($a_0,$b_2) # mul_add_c(a[0],b[2],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $c_2,$c_1,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_0,$b_2)
+ mfhi ($t_2,$a_0,$b_2)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_0,$b_3 # mul_add_c(a[0],b[3],c1,c2,c3);
+ $MULTU ($a_0,$b_3) # mul_add_c(a[0],b[3],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
$ST $c_3,2*$BNSZ($a0)
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_0,$b_3)
+ mfhi ($t_2,$a_0,$b_3)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_1,$b_2 # mul_add_c(a[1],b[2],c1,c2,c3);
+ $MULTU ($a_1,$b_2) # mul_add_c(a[1],b[2],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $c_3,$c_2,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_1,$b_2)
+ mfhi ($t_2,$a_1,$b_2)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_2,$b_1 # mul_add_c(a[2],b[1],c1,c2,c3);
+ $MULTU ($a_2,$b_1) # mul_add_c(a[2],b[1],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_2,$b_1)
+ mfhi ($t_2,$a_2,$b_1)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_3,$b_0 # mul_add_c(a[3],b[0],c1,c2,c3);
+ $MULTU ($a_3,$b_0) # mul_add_c(a[3],b[0],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_3,$b_0)
+ mfhi ($t_2,$a_3,$b_0)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_3,$b_1 # mul_add_c(a[3],b[1],c2,c3,c1);
+ $MULTU ($a_3,$b_1) # mul_add_c(a[3],b[1],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
$ST $c_1,3*$BNSZ($a0)
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_3,$b_1)
+ mfhi ($t_2,$a_3,$b_1)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_2,$b_2 # mul_add_c(a[2],b[2],c2,c3,c1);
+ $MULTU ($a_2,$b_2) # mul_add_c(a[2],b[2],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $c_1,$c_3,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_2,$b_2)
+ mfhi ($t_2,$a_2,$b_2)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_1,$b_3 # mul_add_c(a[1],b[3],c2,c3,c1);
+ $MULTU ($a_1,$b_3) # mul_add_c(a[1],b[3],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_1,$b_3)
+ mfhi ($t_2,$a_1,$b_3)
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_2,$b_3 # mul_add_c(a[2],b[3],c3,c1,c2);
+ $MULTU ($a_2,$b_3) # mul_add_c(a[2],b[3],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
$ST $c_2,4*$BNSZ($a0)
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_2,$b_3)
+ mfhi ($t_2,$a_2,$b_3)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_3,$b_2 # mul_add_c(a[3],b[2],c3,c1,c2);
+ $MULTU ($a_3,$b_2) # mul_add_c(a[3],b[2],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $c_2,$c_1,$t_2
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_3,$b_2)
+ mfhi ($t_2,$a_3,$b_2)
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_3,$b_3 # mul_add_c(a[3],b[3],c1,c2,c3);
+ $MULTU ($a_3,$b_3) # mul_add_c(a[3],b[3],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
$ST $c_3,5*$BNSZ($a0)
- mflo $t_1
- mfhi $t_2
+ mflo ($t_1,$a_3,$b_3)
+ mfhi ($t_2,$a_3,$b_3)
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
$ADDU $t_2,$at
@@ -1888,11 +1904,9 @@ my ($hi,$lo,$c0,$c1,$c2,
# commented as "forward multiplication" below];
)=@_;
$code.=<<___;
- mflo $lo
- mfhi $hi
$ADDU $c0,$lo
sltu $at,$c0,$lo
- $MULTU $an,$bn # forward multiplication
+ $MULTU ($an,$bn) # forward multiplication
$ADDU $c0,$lo
$ADDU $at,$hi
sltu $lo,$c0,$lo
@@ -1902,15 +1916,17 @@ ___
$code.=<<___ if (!$warm);
sltu $c2,$c1,$at
$ADDU $c1,$hi
- sltu $hi,$c1,$hi
- $ADDU $c2,$hi
___
$code.=<<___ if ($warm);
sltu $at,$c1,$at
$ADDU $c1,$hi
$ADDU $c2,$at
+___
+$code.=<<___;
sltu $hi,$c1,$hi
$ADDU $c2,$hi
+ mflo ($lo,$an,$bn)
+ mfhi ($hi,$an,$bn)
___
}
@@ -1940,21 +1956,21 @@ $code.=<<___;
$LD $a_2,2*$BNSZ($a1)
$LD $a_3,3*$BNSZ($a1)
- $MULTU $a_0,$a_0 # mul_add_c(a[0],b[0],c1,c2,c3);
+ $MULTU ($a_0,$a_0) # mul_add_c(a[0],b[0],c1,c2,c3);
$LD $a_4,4*$BNSZ($a1)
$LD $a_5,5*$BNSZ($a1)
$LD $a_6,6*$BNSZ($a1)
$LD $a_7,7*$BNSZ($a1)
- mflo $c_1
- mfhi $c_2
+ mflo ($c_1,$a_0,$a_0)
+ mfhi ($c_2,$a_0,$a_0)
$ST $c_1,0($a0)
- $MULTU $a_0,$a_1 # mul_add_c2(a[0],b[1],c2,c3,c1);
- mflo $t_1
- mfhi $t_2
+ $MULTU ($a_0,$a_1) # mul_add_c2(a[0],b[1],c2,c3,c1);
+ mflo ($t_1,$a_0,$a_1)
+ mfhi ($t_2,$a_0,$a_1)
slt $c_1,$t_2,$zero
$SLL $t_2,1
- $MULTU $a_2,$a_0 # mul_add_c2(a[2],b[0],c3,c1,c2);
+ $MULTU ($a_2,$a_0) # mul_add_c2(a[2],b[0],c3,c1,c2);
slt $a2,$t_1,$zero
$ADDU $t_2,$a2
$SLL $t_1,1
@@ -1962,20 +1978,22 @@ $code.=<<___;
sltu $at,$c_2,$t_1
$ADDU $c_3,$t_2,$at
$ST $c_2,$BNSZ($a0)
+ mflo ($t_1,$a_2,$a_0)
+ mfhi ($t_2,$a_2,$a_0)
___
&add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
$a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2);
$code.=<<___;
- mflo $t_1
- mfhi $t_2
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_0,$a_3 # mul_add_c2(a[0],b[3],c1,c2,c3);
+ $MULTU ($a_0,$a_3) # mul_add_c2(a[0],b[3],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
$ST $c_3,2*$BNSZ($a0)
+ mflo ($t_1,$a_0,$a_3)
+ mfhi ($t_2,$a_0,$a_3)
___
&add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
$a_1,$a_2); # mul_add_c2(a[1],b[2],c1,c2,c3);
@@ -1989,16 +2007,16 @@ ___
&add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
$a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1);
$code.=<<___;
- mflo $t_1
- mfhi $t_2
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_0,$a_5 # mul_add_c2(a[0],b[5],c3,c1,c2);
+ $MULTU ($a_0,$a_5) # mul_add_c2(a[0],b[5],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
$ST $c_2,4*$BNSZ($a0)
+ mflo ($t_1,$a_0,$a_5)
+ mfhi ($t_2,$a_0,$a_5)
___
&add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
$a_1,$a_4); # mul_add_c2(a[1],b[4],c3,c1,c2);
@@ -2016,16 +2034,16 @@ ___
&add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
$a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3);
$code.=<<___;
- mflo $t_1
- mfhi $t_2
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_0,$a_7 # mul_add_c2(a[0],b[7],c2,c3,c1);
+ $MULTU ($a_0,$a_7) # mul_add_c2(a[0],b[7],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
$ST $c_1,6*$BNSZ($a0)
+ mflo ($t_1,$a_0,$a_7)
+ mfhi ($t_2,$a_0,$a_7)
___
&add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
$a_1,$a_6); # mul_add_c2(a[1],b[6],c2,c3,c1);
@@ -2045,16 +2063,16 @@ ___
&add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
$a_4,$a_4); # mul_add_c(a[4],b[4],c3,c1,c2);
$code.=<<___;
- mflo $t_1
- mfhi $t_2
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_2,$a_7 # mul_add_c2(a[2],b[7],c1,c2,c3);
+ $MULTU ($a_2,$a_7) # mul_add_c2(a[2],b[7],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
$ST $c_3,8*$BNSZ($a0)
+ mflo ($t_1,$a_2,$a_7)
+ mfhi ($t_2,$a_2,$a_7)
___
&add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
$a_3,$a_6); # mul_add_c2(a[3],b[6],c1,c2,c3);
@@ -2070,16 +2088,16 @@ ___
&add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
$a_5,$a_5); # mul_add_c(a[5],b[5],c2,c3,c1);
$code.=<<___;
- mflo $t_1
- mfhi $t_2
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_4,$a_7 # mul_add_c2(a[4],b[7],c3,c1,c2);
+ $MULTU ($a_4,$a_7) # mul_add_c2(a[4],b[7],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
$ST $c_2,10*$BNSZ($a0)
+ mflo ($t_1,$a_4,$a_7)
+ mfhi ($t_2,$a_4,$a_7)
___
&add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
$a_5,$a_6); # mul_add_c2(a[5],b[6],c3,c1,c2);
@@ -2091,24 +2109,22 @@ ___
&add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
$a_6,$a_6); # mul_add_c(a[6],b[6],c1,c2,c3);
$code.=<<___;
- mflo $t_1
- mfhi $t_2
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
- $MULTU $a_6,$a_7 # mul_add_c2(a[6],b[7],c2,c3,c1);
+ $MULTU ($a_6,$a_7) # mul_add_c2(a[6],b[7],c2,c3,c1);
$ADDU $t_2,$at
$ADDU $c_2,$t_2
sltu $at,$c_2,$t_2
$ADDU $c_3,$at
$ST $c_1,12*$BNSZ($a0)
+ mflo ($t_1,$a_6,$a_7)
+ mfhi ($t_2,$a_6,$a_7)
___
&add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
$a_7,$a_7); # mul_add_c(a[7],b[7],c3,c1,c2);
$code.=<<___;
$ST $c_2,13*$BNSZ($a0)
- mflo $t_1
- mfhi $t_2
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
$ADDU $t_2,$at
@@ -2152,19 +2168,19 @@ $code.=<<___;
.set reorder
$LD $a_0,0($a1)
$LD $a_1,$BNSZ($a1)
- $MULTU $a_0,$a_0 # mul_add_c(a[0],b[0],c1,c2,c3);
+ $MULTU ($a_0,$a_0) # mul_add_c(a[0],b[0],c1,c2,c3);
$LD $a_2,2*$BNSZ($a1)
$LD $a_3,3*$BNSZ($a1)
- mflo $c_1
- mfhi $c_2
+ mflo ($c_1,$a_0,$a_0)
+ mfhi ($c_2,$a_0,$a_0)
$ST $c_1,0($a0)
- $MULTU $a_0,$a_1 # mul_add_c2(a[0],b[1],c2,c3,c1);
- mflo $t_1
- mfhi $t_2
+ $MULTU ($a_0,$a_1) # mul_add_c2(a[0],b[1],c2,c3,c1);
+ mflo ($t_1,$a_0,$a_1)
+ mfhi ($t_2,$a_0,$a_1)
slt $c_1,$t_2,$zero
$SLL $t_2,1
- $MULTU $a_2,$a_0 # mul_add_c2(a[2],b[0],c3,c1,c2);
+ $MULTU ($a_2,$a_0) # mul_add_c2(a[2],b[0],c3,c1,c2);
slt $a2,$t_1,$zero
$ADDU $t_2,$a2
$SLL $t_1,1
@@ -2172,20 +2188,22 @@ $code.=<<___;
sltu $at,$c_2,$t_1
$ADDU $c_3,$t_2,$at
$ST $c_2,$BNSZ($a0)
+ mflo ($t_1,$a_2,$a_0)
+ mfhi ($t_2,$a_2,$a_0)
___
&add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
$a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2);
$code.=<<___;
- mflo $t_1
- mfhi $t_2
$ADDU $c_3,$t_1
sltu $at,$c_3,$t_1
- $MULTU $a_0,$a_3 # mul_add_c2(a[0],b[3],c1,c2,c3);
+ $MULTU ($a_0,$a_3) # mul_add_c2(a[0],b[3],c1,c2,c3);
$ADDU $t_2,$at
$ADDU $c_1,$t_2
sltu $at,$c_1,$t_2
$ADDU $c_2,$at
$ST $c_3,2*$BNSZ($a0)
+ mflo ($t_1,$a_0,$a_3)
+ mfhi ($t_2,$a_0,$a_3)
___
&add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
$a_1,$a_2); # mul_add_c2(a2[1],b[2],c1,c2,c3);
@@ -2197,24 +2215,22 @@ ___
&add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
$a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1);
$code.=<<___;
- mflo $t_1
- mfhi $t_2
$ADDU $c_2,$t_1
sltu $at,$c_2,$t_1
- $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2);
+ $MULTU ($a_2,$a_3) # mul_add_c2(a[2],b[3],c3,c1,c2);
$ADDU $t_2,$at
$ADDU $c_3,$t_2
sltu $at,$c_3,$t_2
$ADDU $c_1,$at
$ST $c_2,4*$BNSZ($a0)
+ mflo ($t_1,$a_2,$a_3)
+ mfhi ($t_2,$a_2,$a_3)
___
&add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
$a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3);
$code.=<<___;
$ST $c_3,5*$BNSZ($a0)
- mflo $t_1
- mfhi $t_2
$ADDU $c_1,$t_1
sltu $at,$c_1,$t_1
$ADDU $t_2,$at
diff --git a/deps/openssl/openssl/crypto/bn/asm/pa-risc2.s b/deps/openssl/openssl/crypto/bn/asm/pa-risc2.s
deleted file mode 100644
index 413eac7123..0000000000
--- a/deps/openssl/openssl/crypto/bn/asm/pa-risc2.s
+++ /dev/null
@@ -1,1624 +0,0 @@
-; Copyright 1998-2016 The OpenSSL Project Authors. All Rights Reserved.
-;
-; Licensed under the OpenSSL license (the "License"). You may not use
-; this file except in compliance with the License. You can obtain a copy
-; in the file LICENSE in the source distribution or at
-; https://www.openssl.org/source/license.html
-;
-; PA-RISC 2.0 implementation of bn_asm code, based on the
-; 64-bit version of the code. This code is effectively the
-; same as the 64-bit version except the register model is
-; slightly different given all values must be 32-bit between
-; function calls. Thus the 64-bit return values are returned
-; in %ret0 and %ret1 vs just %ret0 as is done in 64-bit
-;
-;
-; This code is approximately 2x faster than the C version
-; for RSA/DSA.
-;
-; See http://devresource.hp.com/ for more details on the PA-RISC
-; architecture. Also see the book "PA-RISC 2.0 Architecture"
-; by Gerry Kane for information on the instruction set architecture.
-;
-; Code written by Chris Ruemmler (with some help from the HP C
-; compiler).
-;
-; The code compiles with HP's assembler
-;
-
- .level 2.0N
- .space $TEXT$
- .subspa $CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY
-
-;
-; Global Register definitions used for the routines.
-;
-; Some information about HP's runtime architecture for 32-bits.
-;
-; "Caller save" means the calling function must save the register
-; if it wants the register to be preserved.
-; "Callee save" means if a function uses the register, it must save
-; the value before using it.
-;
-; For the floating point registers
-;
-; "caller save" registers: fr4-fr11, fr22-fr31
-; "callee save" registers: fr12-fr21
-; "special" registers: fr0-fr3 (status and exception registers)
-;
-; For the integer registers
-; value zero : r0
-; "caller save" registers: r1,r19-r26
-; "callee save" registers: r3-r18
-; return register : r2 (rp)
-; return values ; r28,r29 (ret0,ret1)
-; Stack pointer ; r30 (sp)
-; millicode return ptr ; r31 (also a caller save register)
-
-
-;
-; Arguments to the routines
-;
-r_ptr .reg %r26
-a_ptr .reg %r25
-b_ptr .reg %r24
-num .reg %r24
-n .reg %r23
-
-;
-; Note that the "w" argument for bn_mul_add_words and bn_mul_words
-; is passed on the stack at a delta of -56 from the top of stack
-; as the routine is entered.
-;
-
-;
-; Globals used in some routines
-;
-
-top_overflow .reg %r23
-high_mask .reg %r22 ; value 0xffffffff80000000L
-
-
-;------------------------------------------------------------------------------
-;
-; bn_mul_add_words
-;
-;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr,
-; int num, BN_ULONG w)
-;
-; arg0 = r_ptr
-; arg1 = a_ptr
-; arg3 = num
-; -56(sp) = w
-;
-; Local register definitions
-;
-
-fm1 .reg %fr22
-fm .reg %fr23
-ht_temp .reg %fr24
-ht_temp_1 .reg %fr25
-lt_temp .reg %fr26
-lt_temp_1 .reg %fr27
-fm1_1 .reg %fr28
-fm_1 .reg %fr29
-
-fw_h .reg %fr7L
-fw_l .reg %fr7R
-fw .reg %fr7
-
-fht_0 .reg %fr8L
-flt_0 .reg %fr8R
-t_float_0 .reg %fr8
-
-fht_1 .reg %fr9L
-flt_1 .reg %fr9R
-t_float_1 .reg %fr9
-
-tmp_0 .reg %r31
-tmp_1 .reg %r21
-m_0 .reg %r20
-m_1 .reg %r19
-ht_0 .reg %r1
-ht_1 .reg %r3
-lt_0 .reg %r4
-lt_1 .reg %r5
-m1_0 .reg %r6
-m1_1 .reg %r7
-rp_val .reg %r8
-rp_val_1 .reg %r9
-
-bn_mul_add_words
- .export bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN
- .proc
- .callinfo frame=128
- .entry
- .align 64
-
- STD %r3,0(%sp) ; save r3
- STD %r4,8(%sp) ; save r4
- NOP ; Needed to make the loop 16-byte aligned
- NOP ; needed to make the loop 16-byte aligned
-
- STD %r5,16(%sp) ; save r5
- NOP
- STD %r6,24(%sp) ; save r6
- STD %r7,32(%sp) ; save r7
-
- STD %r8,40(%sp) ; save r8
- STD %r9,48(%sp) ; save r9
- COPY %r0,%ret1 ; return 0 by default
- DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32
-
- CMPIB,>= 0,num,bn_mul_add_words_exit ; if (num <= 0) then exit
- LDO 128(%sp),%sp ; bump stack
-
- ;
- ; The loop is unrolled twice, so if there is only 1 number
- ; then go straight to the cleanup code.
- ;
- CMPIB,= 1,num,bn_mul_add_words_single_top
- FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l)
-
- ;
- ; This loop is unrolled 2 times (64-byte aligned as well)
- ;
- ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
- ; two 32-bit mutiplies can be issued per cycle.
- ;
-bn_mul_add_words_unroll2
-
- FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
- FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R)
- LDD 0(r_ptr),rp_val ; rp[0]
- LDD 8(r_ptr),rp_val_1 ; rp[1]
-
- XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l
- XMPYU fht_1,fw_l,fm1_1 ; m1[1] = fht_1*fw_l
- FSTD fm1,-16(%sp) ; -16(sp) = m1[0]
- FSTD fm1_1,-48(%sp) ; -48(sp) = m1[1]
-
- XMPYU flt_0,fw_h,fm ; m[0] = flt_0*fw_h
- XMPYU flt_1,fw_h,fm_1 ; m[1] = flt_1*fw_h
- FSTD fm,-8(%sp) ; -8(sp) = m[0]
- FSTD fm_1,-40(%sp) ; -40(sp) = m[1]
-
- XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h
- XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp_1 = fht_1*fw_h
- FSTD ht_temp,-24(%sp) ; -24(sp) = ht_temp
- FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht_temp_1
-
- XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
- XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l
- FSTD lt_temp,-32(%sp) ; -32(sp) = lt_temp
- FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt_temp_1
-
- LDD -8(%sp),m_0 ; m[0]
- LDD -40(%sp),m_1 ; m[1]
- LDD -16(%sp),m1_0 ; m1[0]
- LDD -48(%sp),m1_1 ; m1[1]
-
- LDD -24(%sp),ht_0 ; ht[0]
- LDD -56(%sp),ht_1 ; ht[1]
- ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m[0] + m1[0];
- ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m[1] + m1[1];
-
- LDD -32(%sp),lt_0
- LDD -64(%sp),lt_1
- CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m[0] < m1[0])
- ADD,L ht_0,top_overflow,ht_0 ; ht[0] += (1<<32)
-
- CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m[1] < m1[1])
- ADD,L ht_1,top_overflow,ht_1 ; ht[1] += (1<<32)
- EXTRD,U tmp_0,31,32,m_0 ; m[0]>>32
- DEPD,Z tmp_0,31,32,m1_0 ; m1[0] = m[0]<<32
-
- EXTRD,U tmp_1,31,32,m_1 ; m[1]>>32
- DEPD,Z tmp_1,31,32,m1_1 ; m1[1] = m[1]<<32
- ADD,L ht_0,m_0,ht_0 ; ht[0]+= (m[0]>>32)
- ADD,L ht_1,m_1,ht_1 ; ht[1]+= (m[1]>>32)
-
- ADD lt_0,m1_0,lt_0 ; lt[0] = lt[0]+m1[0];
- ADD,DC ht_0,%r0,ht_0 ; ht[0]++
- ADD lt_1,m1_1,lt_1 ; lt[1] = lt[1]+m1[1];
- ADD,DC ht_1,%r0,ht_1 ; ht[1]++
-
- ADD %ret1,lt_0,lt_0 ; lt[0] = lt[0] + c;
- ADD,DC ht_0,%r0,ht_0 ; ht[0]++
- ADD lt_0,rp_val,lt_0 ; lt[0] = lt[0]+rp[0]
- ADD,DC ht_0,%r0,ht_0 ; ht[0]++
-
- LDO -2(num),num ; num = num - 2;
- ADD ht_0,lt_1,lt_1 ; lt[1] = lt[1] + ht_0 (c);
- ADD,DC ht_1,%r0,ht_1 ; ht[1]++
- STD lt_0,0(r_ptr) ; rp[0] = lt[0]
-
- ADD lt_1,rp_val_1,lt_1 ; lt[1] = lt[1]+rp[1]
- ADD,DC ht_1,%r0,%ret1 ; ht[1]++
- LDO 16(a_ptr),a_ptr ; a_ptr += 2
-
- STD lt_1,8(r_ptr) ; rp[1] = lt[1]
- CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do
- LDO 16(r_ptr),r_ptr ; r_ptr += 2
-
- CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one
-
- ;
- ; Top of loop aligned on 64-byte boundary
- ;
-bn_mul_add_words_single_top
- FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
- LDD 0(r_ptr),rp_val ; rp[0]
- LDO 8(a_ptr),a_ptr ; a_ptr++
- XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l
- FSTD fm1,-16(%sp) ; -16(sp) = m1
- XMPYU flt_0,fw_h,fm ; m = lt*fw_h
- FSTD fm,-8(%sp) ; -8(sp) = m
- XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h
- FSTD ht_temp,-24(%sp) ; -24(sp) = ht
- XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
- FSTD lt_temp,-32(%sp) ; -32(sp) = lt
-
- LDD -8(%sp),m_0
- LDD -16(%sp),m1_0 ; m1 = temp1
- ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1;
- LDD -24(%sp),ht_0
- LDD -32(%sp),lt_0
-
- CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1)
- ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
-
- EXTRD,U tmp_0,31,32,m_0 ; m>>32
- DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
-
- ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
- ADD lt_0,m1_0,tmp_0 ; tmp_0 = lt+m1;
- ADD,DC ht_0,%r0,ht_0 ; ht++
- ADD %ret1,tmp_0,lt_0 ; lt = lt + c;
- ADD,DC ht_0,%r0,ht_0 ; ht++
- ADD lt_0,rp_val,lt_0 ; lt = lt+rp[0]
- ADD,DC ht_0,%r0,%ret1 ; ht++
- STD lt_0,0(r_ptr) ; rp[0] = lt
-
-bn_mul_add_words_exit
- .EXIT
-
- EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1
- LDD -80(%sp),%r9 ; restore r9
- LDD -88(%sp),%r8 ; restore r8
- LDD -96(%sp),%r7 ; restore r7
- LDD -104(%sp),%r6 ; restore r6
- LDD -112(%sp),%r5 ; restore r5
- LDD -120(%sp),%r4 ; restore r4
- BVE (%rp)
- LDD,MB -128(%sp),%r3 ; restore r3
- .PROCEND ;in=23,24,25,26,29;out=28;
-
-;----------------------------------------------------------------------------
-;
-;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
-;
-; arg0 = rp
-; arg1 = ap
-; arg3 = num
-; w on stack at -56(sp)
-
-bn_mul_words
- .proc
- .callinfo frame=128
- .entry
- .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
- .align 64
-
- STD %r3,0(%sp) ; save r3
- STD %r4,8(%sp) ; save r4
- NOP
- STD %r5,16(%sp) ; save r5
-
- STD %r6,24(%sp) ; save r6
- STD %r7,32(%sp) ; save r7
- COPY %r0,%ret1 ; return 0 by default
- DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32
-
- CMPIB,>= 0,num,bn_mul_words_exit
- LDO 128(%sp),%sp ; bump stack
-
- ;
- ; See if only 1 word to do, thus just do cleanup
- ;
- CMPIB,= 1,num,bn_mul_words_single_top
- FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l)
-
- ;
- ; This loop is unrolled 2 times (64-byte aligned as well)
- ;
- ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
- ; two 32-bit mutiplies can be issued per cycle.
- ;
-bn_mul_words_unroll2
-
- FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
- FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R)
- XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l
- XMPYU fht_1,fw_l,fm1_1 ; m1[1] = ht*fw_l
-
- FSTD fm1,-16(%sp) ; -16(sp) = m1
- FSTD fm1_1,-48(%sp) ; -48(sp) = m1
- XMPYU flt_0,fw_h,fm ; m = lt*fw_h
- XMPYU flt_1,fw_h,fm_1 ; m = lt*fw_h
-
- FSTD fm,-8(%sp) ; -8(sp) = m
- FSTD fm_1,-40(%sp) ; -40(sp) = m
- XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h
- XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp = ht*fw_h
-
- FSTD ht_temp,-24(%sp) ; -24(sp) = ht
- FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht
- XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
- XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l
-
- FSTD lt_temp,-32(%sp) ; -32(sp) = lt
- FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt
- LDD -8(%sp),m_0
- LDD -40(%sp),m_1
-
- LDD -16(%sp),m1_0
- LDD -48(%sp),m1_1
- LDD -24(%sp),ht_0
- LDD -56(%sp),ht_1
-
- ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m + m1;
- ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m + m1;
- LDD -32(%sp),lt_0
- LDD -64(%sp),lt_1
-
- CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m < m1)
- ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
- CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m < m1)
- ADD,L ht_1,top_overflow,ht_1 ; ht += (1<<32)
-
- EXTRD,U tmp_0,31,32,m_0 ; m>>32
- DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
- EXTRD,U tmp_1,31,32,m_1 ; m>>32
- DEPD,Z tmp_1,31,32,m1_1 ; m1 = m<<32
-
- ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
- ADD,L ht_1,m_1,ht_1 ; ht+= (m>>32)
- ADD lt_0,m1_0,lt_0 ; lt = lt+m1;
- ADD,DC ht_0,%r0,ht_0 ; ht++
-
- ADD lt_1,m1_1,lt_1 ; lt = lt+m1;
- ADD,DC ht_1,%r0,ht_1 ; ht++
- ADD %ret1,lt_0,lt_0 ; lt = lt + c (ret1);
- ADD,DC ht_0,%r0,ht_0 ; ht++
-
- ADD ht_0,lt_1,lt_1 ; lt = lt + c (ht_0)
- ADD,DC ht_1,%r0,ht_1 ; ht++
- STD lt_0,0(r_ptr) ; rp[0] = lt
- STD lt_1,8(r_ptr) ; rp[1] = lt
-
- COPY ht_1,%ret1 ; carry = ht
- LDO -2(num),num ; num = num - 2;
- LDO 16(a_ptr),a_ptr ; ap += 2
- CMPIB,<= 2,num,bn_mul_words_unroll2
- LDO 16(r_ptr),r_ptr ; rp++
-
- CMPIB,=,N 0,num,bn_mul_words_exit ; are we done?
-
- ;
- ; Top of loop aligned on 64-byte boundary
- ;
-bn_mul_words_single_top
- FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
-
- XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l
- FSTD fm1,-16(%sp) ; -16(sp) = m1
- XMPYU flt_0,fw_h,fm ; m = lt*fw_h
- FSTD fm,-8(%sp) ; -8(sp) = m
- XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h
- FSTD ht_temp,-24(%sp) ; -24(sp) = ht
- XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
- FSTD lt_temp,-32(%sp) ; -32(sp) = lt
-
- LDD -8(%sp),m_0
- LDD -16(%sp),m1_0
- ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1;
- LDD -24(%sp),ht_0
- LDD -32(%sp),lt_0
-
- CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1)
- ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
-
- EXTRD,U tmp_0,31,32,m_0 ; m>>32
- DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
-
- ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
- ADD lt_0,m1_0,lt_0 ; lt= lt+m1;
- ADD,DC ht_0,%r0,ht_0 ; ht++
-
- ADD %ret1,lt_0,lt_0 ; lt = lt + c;
- ADD,DC ht_0,%r0,ht_0 ; ht++
-
- COPY ht_0,%ret1 ; copy carry
- STD lt_0,0(r_ptr) ; rp[0] = lt
-
-bn_mul_words_exit
- .EXIT
- EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1
- LDD -96(%sp),%r7 ; restore r7
- LDD -104(%sp),%r6 ; restore r6
- LDD -112(%sp),%r5 ; restore r5
- LDD -120(%sp),%r4 ; restore r4
- BVE (%rp)
- LDD,MB -128(%sp),%r3 ; restore r3
- .PROCEND
-
-;----------------------------------------------------------------------------
-;
-;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num)
-;
-; arg0 = rp
-; arg1 = ap
-; arg2 = num
-;
-
-bn_sqr_words
- .proc
- .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
- .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
- .entry
- .align 64
-
- STD %r3,0(%sp) ; save r3
- STD %r4,8(%sp) ; save r4
- NOP
- STD %r5,16(%sp) ; save r5
-
- CMPIB,>= 0,num,bn_sqr_words_exit
- LDO 128(%sp),%sp ; bump stack
-
- ;
- ; If only 1, the goto straight to cleanup
- ;
- CMPIB,= 1,num,bn_sqr_words_single_top
- DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
-
- ;
- ; This loop is unrolled 2 times (64-byte aligned as well)
- ;
-
-bn_sqr_words_unroll2
- FLDD 0(a_ptr),t_float_0 ; a[0]
- FLDD 8(a_ptr),t_float_1 ; a[1]
- XMPYU fht_0,flt_0,fm ; m[0]
- XMPYU fht_1,flt_1,fm_1 ; m[1]
-
- FSTD fm,-24(%sp) ; store m[0]
- FSTD fm_1,-56(%sp) ; store m[1]
- XMPYU flt_0,flt_0,lt_temp ; lt[0]
- XMPYU flt_1,flt_1,lt_temp_1 ; lt[1]
-
- FSTD lt_temp,-16(%sp) ; store lt[0]
- FSTD lt_temp_1,-48(%sp) ; store lt[1]
- XMPYU fht_0,fht_0,ht_temp ; ht[0]
- XMPYU fht_1,fht_1,ht_temp_1 ; ht[1]
-
- FSTD ht_temp,-8(%sp) ; store ht[0]
- FSTD ht_temp_1,-40(%sp) ; store ht[1]
- LDD -24(%sp),m_0
- LDD -56(%sp),m_1
-
- AND m_0,high_mask,tmp_0 ; m[0] & Mask
- AND m_1,high_mask,tmp_1 ; m[1] & Mask
- DEPD,Z m_0,30,31,m_0 ; m[0] << 32+1
- DEPD,Z m_1,30,31,m_1 ; m[1] << 32+1
-
- LDD -16(%sp),lt_0
- LDD -48(%sp),lt_1
- EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m[0]&Mask >> 32-1
- EXTRD,U tmp_1,32,33,tmp_1 ; tmp_1 = m[1]&Mask >> 32-1
-
- LDD -8(%sp),ht_0
- LDD -40(%sp),ht_1
- ADD,L ht_0,tmp_0,ht_0 ; ht[0] += tmp_0
- ADD,L ht_1,tmp_1,ht_1 ; ht[1] += tmp_1
-
- ADD lt_0,m_0,lt_0 ; lt = lt+m
- ADD,DC ht_0,%r0,ht_0 ; ht[0]++
- STD lt_0,0(r_ptr) ; rp[0] = lt[0]
- STD ht_0,8(r_ptr) ; rp[1] = ht[1]
-
- ADD lt_1,m_1,lt_1 ; lt = lt+m
- ADD,DC ht_1,%r0,ht_1 ; ht[1]++
- STD lt_1,16(r_ptr) ; rp[2] = lt[1]
- STD ht_1,24(r_ptr) ; rp[3] = ht[1]
-
- LDO -2(num),num ; num = num - 2;
- LDO 16(a_ptr),a_ptr ; ap += 2
- CMPIB,<= 2,num,bn_sqr_words_unroll2
- LDO 32(r_ptr),r_ptr ; rp += 4
-
- CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done?
-
- ;
- ; Top of loop aligned on 64-byte boundary
- ;
-bn_sqr_words_single_top
- FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
-
- XMPYU fht_0,flt_0,fm ; m
- FSTD fm,-24(%sp) ; store m
-
- XMPYU flt_0,flt_0,lt_temp ; lt
- FSTD lt_temp,-16(%sp) ; store lt
-
- XMPYU fht_0,fht_0,ht_temp ; ht
- FSTD ht_temp,-8(%sp) ; store ht
-
- LDD -24(%sp),m_0 ; load m
- AND m_0,high_mask,tmp_0 ; m & Mask
- DEPD,Z m_0,30,31,m_0 ; m << 32+1
- LDD -16(%sp),lt_0 ; lt
-
- LDD -8(%sp),ht_0 ; ht
- EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m&Mask >> 32-1
- ADD m_0,lt_0,lt_0 ; lt = lt+m
- ADD,L ht_0,tmp_0,ht_0 ; ht += tmp_0
- ADD,DC ht_0,%r0,ht_0 ; ht++
-
- STD lt_0,0(r_ptr) ; rp[0] = lt
- STD ht_0,8(r_ptr) ; rp[1] = ht
-
-bn_sqr_words_exit
- .EXIT
- LDD -112(%sp),%r5 ; restore r5
- LDD -120(%sp),%r4 ; restore r4
- BVE (%rp)
- LDD,MB -128(%sp),%r3
- .PROCEND ;in=23,24,25,26,29;out=28;
-
-
-;----------------------------------------------------------------------------
-;
-;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
-;
-; arg0 = rp
-; arg1 = ap
-; arg2 = bp
-; arg3 = n
-
-t .reg %r22
-b .reg %r21
-l .reg %r20
-
-bn_add_words
- .proc
- .entry
- .callinfo
- .EXPORT bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
- .align 64
-
- CMPIB,>= 0,n,bn_add_words_exit
- COPY %r0,%ret1 ; return 0 by default
-
- ;
- ; If 2 or more numbers do the loop
- ;
- CMPIB,= 1,n,bn_add_words_single_top
- NOP
-
- ;
- ; This loop is unrolled 2 times (64-byte aligned as well)
- ;
-bn_add_words_unroll2
- LDD 0(a_ptr),t
- LDD 0(b_ptr),b
- ADD t,%ret1,t ; t = t+c;
- ADD,DC %r0,%r0,%ret1 ; set c to carry
- ADD t,b,l ; l = t + b[0]
- ADD,DC %ret1,%r0,%ret1 ; c+= carry
- STD l,0(r_ptr)
-
- LDD 8(a_ptr),t
- LDD 8(b_ptr),b
- ADD t,%ret1,t ; t = t+c;
- ADD,DC %r0,%r0,%ret1 ; set c to carry
- ADD t,b,l ; l = t + b[0]
- ADD,DC %ret1,%r0,%ret1 ; c+= carry
- STD l,8(r_ptr)
-
- LDO -2(n),n
- LDO 16(a_ptr),a_ptr
- LDO 16(b_ptr),b_ptr
-
- CMPIB,<= 2,n,bn_add_words_unroll2
- LDO 16(r_ptr),r_ptr
-
- CMPIB,=,N 0,n,bn_add_words_exit ; are we done?
-
-bn_add_words_single_top
- LDD 0(a_ptr),t
- LDD 0(b_ptr),b
-
- ADD t,%ret1,t ; t = t+c;
- ADD,DC %r0,%r0,%ret1 ; set c to carry (could use CMPCLR??)
- ADD t,b,l ; l = t + b[0]
- ADD,DC %ret1,%r0,%ret1 ; c+= carry
- STD l,0(r_ptr)
-
-bn_add_words_exit
- .EXIT
- BVE (%rp)
- EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1
- .PROCEND ;in=23,24,25,26,29;out=28;
-
-;----------------------------------------------------------------------------
-;
-;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
-;
-; arg0 = rp
-; arg1 = ap
-; arg2 = bp
-; arg3 = n
-
-t1 .reg %r22
-t2 .reg %r21
-sub_tmp1 .reg %r20
-sub_tmp2 .reg %r19
-
-
-bn_sub_words
- .proc
- .callinfo
- .EXPORT bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
- .entry
- .align 64
-
- CMPIB,>= 0,n,bn_sub_words_exit
- COPY %r0,%ret1 ; return 0 by default
-
- ;
- ; If 2 or more numbers do the loop
- ;
- CMPIB,= 1,n,bn_sub_words_single_top
- NOP
-
- ;
- ; This loop is unrolled 2 times (64-byte aligned as well)
- ;
-bn_sub_words_unroll2
- LDD 0(a_ptr),t1
- LDD 0(b_ptr),t2
- SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
- SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c;
-
- CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
- LDO 1(%r0),sub_tmp2
-
- CMPCLR,*= t1,t2,%r0
- COPY sub_tmp2,%ret1
- STD sub_tmp1,0(r_ptr)
-
- LDD 8(a_ptr),t1
- LDD 8(b_ptr),t2
- SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
- SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c;
- CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
- LDO 1(%r0),sub_tmp2
-
- CMPCLR,*= t1,t2,%r0
- COPY sub_tmp2,%ret1
- STD sub_tmp1,8(r_ptr)
-
- LDO -2(n),n
- LDO 16(a_ptr),a_ptr
- LDO 16(b_ptr),b_ptr
-
- CMPIB,<= 2,n,bn_sub_words_unroll2
- LDO 16(r_ptr),r_ptr
-
- CMPIB,=,N 0,n,bn_sub_words_exit ; are we done?
-
-bn_sub_words_single_top
- LDD 0(a_ptr),t1
- LDD 0(b_ptr),t2
- SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
- SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c;
- CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
- LDO 1(%r0),sub_tmp2
-
- CMPCLR,*= t1,t2,%r0
- COPY sub_tmp2,%ret1
-
- STD sub_tmp1,0(r_ptr)
-
-bn_sub_words_exit
- .EXIT
- BVE (%rp)
- EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1
- .PROCEND ;in=23,24,25,26,29;out=28;
-
-;------------------------------------------------------------------------------
-;
-; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d)
-;
-; arg0 = h
-; arg1 = l
-; arg2 = d
-;
-; This is mainly just output from the HP C compiler.
-;
-;------------------------------------------------------------------------------
-bn_div_words
- .PROC
- .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR,LONG_RETURN
- .IMPORT BN_num_bits_word,CODE
- ;--- not PIC .IMPORT __iob,DATA
- ;--- not PIC .IMPORT fprintf,CODE
- .IMPORT abort,CODE
- .IMPORT $$div2U,MILLICODE
- .CALLINFO CALLER,FRAME=144,ENTRY_GR=%r9,SAVE_RP,ARGS_SAVED,ORDERING_AWARE
- .ENTRY
- STW %r2,-20(%r30) ;offset 0x8ec
- STW,MA %r3,192(%r30) ;offset 0x8f0
- STW %r4,-188(%r30) ;offset 0x8f4
- DEPD %r5,31,32,%r6 ;offset 0x8f8
- STD %r6,-184(%r30) ;offset 0x8fc
- DEPD %r7,31,32,%r8 ;offset 0x900
- STD %r8,-176(%r30) ;offset 0x904
- STW %r9,-168(%r30) ;offset 0x908
- LDD -248(%r30),%r3 ;offset 0x90c
- COPY %r26,%r4 ;offset 0x910
- COPY %r24,%r5 ;offset 0x914
- DEPD %r25,31,32,%r4 ;offset 0x918
- CMPB,*<> %r3,%r0,$0006000C ;offset 0x91c
- DEPD %r23,31,32,%r5 ;offset 0x920
- MOVIB,TR -1,%r29,$00060002 ;offset 0x924
- EXTRD,U %r29,31,32,%r28 ;offset 0x928
-$0006002A
- LDO -1(%r29),%r29 ;offset 0x92c
- SUB %r23,%r7,%r23 ;offset 0x930
-$00060024
- SUB %r4,%r31,%r25 ;offset 0x934
- AND %r25,%r19,%r26 ;offset 0x938
- CMPB,*<>,N %r0,%r26,$00060046 ;offset 0x93c
- DEPD,Z %r25,31,32,%r20 ;offset 0x940
- OR %r20,%r24,%r21 ;offset 0x944
- CMPB,*<<,N %r21,%r23,$0006002A ;offset 0x948
- SUB %r31,%r2,%r31 ;offset 0x94c
-$00060046
-$0006002E
- DEPD,Z %r23,31,32,%r25 ;offset 0x950
- EXTRD,U %r23,31,32,%r26 ;offset 0x954
- AND %r25,%r19,%r24 ;offset 0x958
- ADD,L %r31,%r26,%r31 ;offset 0x95c
- CMPCLR,*>>= %r5,%r24,%r0 ;offset 0x960
- LDO 1(%r31),%r31 ;offset 0x964
-$00060032
- CMPB,*<<=,N %r31,%r4,$00060036 ;offset 0x968
- LDO -1(%r29),%r29 ;offset 0x96c
- ADD,L %r4,%r3,%r4 ;offset 0x970
-$00060036
- ADDIB,=,N -1,%r8,$D0 ;offset 0x974
- SUB %r5,%r24,%r28 ;offset 0x978
-$0006003A
- SUB %r4,%r31,%r24 ;offset 0x97c
- SHRPD %r24,%r28,32,%r4 ;offset 0x980
- DEPD,Z %r29,31,32,%r9 ;offset 0x984
- DEPD,Z %r28,31,32,%r5 ;offset 0x988
-$0006001C
- EXTRD,U %r4,31,32,%r31 ;offset 0x98c
- CMPB,*<>,N %r31,%r2,$00060020 ;offset 0x990
- MOVB,TR %r6,%r29,$D1 ;offset 0x994
- STD %r29,-152(%r30) ;offset 0x998
-$0006000C
- EXTRD,U %r3,31,32,%r25 ;offset 0x99c
- COPY %r3,%r26 ;offset 0x9a0
- EXTRD,U %r3,31,32,%r9 ;offset 0x9a4
- EXTRD,U %r4,31,32,%r8 ;offset 0x9a8
- .CALL ARGW0=GR,ARGW1=GR,RTNVAL=GR ;in=25,26;out=28;
- B,L BN_num_bits_word,%r2 ;offset 0x9ac
- EXTRD,U %r5,31,32,%r7 ;offset 0x9b0
- LDI 64,%r20 ;offset 0x9b4
- DEPD %r7,31,32,%r5 ;offset 0x9b8
- DEPD %r8,31,32,%r4 ;offset 0x9bc
- DEPD %r9,31,32,%r3 ;offset 0x9c0
- CMPB,= %r28,%r20,$00060012 ;offset 0x9c4
- COPY %r28,%r24 ;offset 0x9c8
- MTSARCM %r24 ;offset 0x9cc
- DEPDI,Z -1,%sar,1,%r19 ;offset 0x9d0
- CMPB,*>>,N %r4,%r19,$D2 ;offset 0x9d4
-$00060012
- SUBI 64,%r24,%r31 ;offset 0x9d8
- CMPCLR,*<< %r4,%r3,%r0 ;offset 0x9dc
- SUB %r4,%r3,%r4 ;offset 0x9e0
-$00060016
- CMPB,= %r31,%r0,$0006001A ;offset 0x9e4
- COPY %r0,%r9 ;offset 0x9e8
- MTSARCM %r31 ;offset 0x9ec
- DEPD,Z %r3,%sar,64,%r3 ;offset 0x9f0
- SUBI 64,%r31,%r26 ;offset 0x9f4
- MTSAR %r26 ;offset 0x9f8
- SHRPD %r4,%r5,%sar,%r4 ;offset 0x9fc
- MTSARCM %r31 ;offset 0xa00
- DEPD,Z %r5,%sar,64,%r5 ;offset 0xa04
-$0006001A
- DEPDI,Z -1,31,32,%r19 ;offset 0xa08
- AND %r3,%r19,%r29 ;offset 0xa0c
- EXTRD,U %r29,31,32,%r2 ;offset 0xa10
- DEPDI,Z -1,63,32,%r6 ;offset 0xa14
- MOVIB,TR 2,%r8,$0006001C ;offset 0xa18
- EXTRD,U %r3,63,32,%r7 ;offset 0xa1c
-$D2
- ;--- not PIC ADDIL LR'__iob-$global$,%r27,%r1 ;offset 0xa20
- ;--- not PIC LDIL LR'C$7,%r21 ;offset 0xa24
- ;--- not PIC LDO RR'__iob-$global$+32(%r1),%r26 ;offset 0xa28
- ;--- not PIC .CALL ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR ;in=24,25,26;out=28;
- ;--- not PIC B,L fprintf,%r2 ;offset 0xa2c
- ;--- not PIC LDO RR'C$7(%r21),%r25 ;offset 0xa30
- .CALL ;
- B,L abort,%r2 ;offset 0xa34
- NOP ;offset 0xa38
- B $D3 ;offset 0xa3c
- LDW -212(%r30),%r2 ;offset 0xa40
-$00060020
- COPY %r4,%r26 ;offset 0xa44
- EXTRD,U %r4,31,32,%r25 ;offset 0xa48
- COPY %r2,%r24 ;offset 0xa4c
- .CALL ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL)
- B,L $$div2U,%r31 ;offset 0xa50
- EXTRD,U %r2,31,32,%r23 ;offset 0xa54
- DEPD %r28,31,32,%r29 ;offset 0xa58
-$00060022
- STD %r29,-152(%r30) ;offset 0xa5c
-$D1
- AND %r5,%r19,%r24 ;offset 0xa60
- EXTRD,U %r24,31,32,%r24 ;offset 0xa64
- STW %r2,-160(%r30) ;offset 0xa68
- STW %r7,-128(%r30) ;offset 0xa6c
- FLDD -152(%r30),%fr4 ;offset 0xa70
- FLDD -152(%r30),%fr7 ;offset 0xa74
- FLDW -160(%r30),%fr8L ;offset 0xa78
- FLDW -128(%r30),%fr5L ;offset 0xa7c
- XMPYU %fr8L,%fr7L,%fr10 ;offset 0xa80
- FSTD %fr10,-136(%r30) ;offset 0xa84
- XMPYU %fr8L,%fr7R,%fr22 ;offset 0xa88
- FSTD %fr22,-144(%r30) ;offset 0xa8c
- XMPYU %fr5L,%fr4L,%fr11 ;offset 0xa90
- XMPYU %fr5L,%fr4R,%fr23 ;offset 0xa94
- FSTD %fr11,-112(%r30) ;offset 0xa98
- FSTD %fr23,-120(%r30) ;offset 0xa9c
- LDD -136(%r30),%r28 ;offset 0xaa0
- DEPD,Z %r28,31,32,%r31 ;offset 0xaa4
- LDD -144(%r30),%r20 ;offset 0xaa8
- ADD,L %r20,%r31,%r31 ;offset 0xaac
- LDD -112(%r30),%r22 ;offset 0xab0
- DEPD,Z %r22,31,32,%r22 ;offset 0xab4
- LDD -120(%r30),%r21 ;offset 0xab8
- B $00060024 ;offset 0xabc
- ADD,L %r21,%r22,%r23 ;offset 0xac0
-$D0
- OR %r9,%r29,%r29 ;offset 0xac4
-$00060040
- EXTRD,U %r29,31,32,%r28 ;offset 0xac8
-$00060002
-$L2
- LDW -212(%r30),%r2 ;offset 0xacc
-$D3
- LDW -168(%r30),%r9 ;offset 0xad0
- LDD -176(%r30),%r8 ;offset 0xad4
- EXTRD,U %r8,31,32,%r7 ;offset 0xad8
- LDD -184(%r30),%r6 ;offset 0xadc
- EXTRD,U %r6,31,32,%r5 ;offset 0xae0
- LDW -188(%r30),%r4 ;offset 0xae4
- BVE (%r2) ;offset 0xae8
- .EXIT
- LDW,MB -192(%r30),%r3 ;offset 0xaec
- .PROCEND ;in=23,25;out=28,29;fpin=105,107;
-
-
-
-
-;----------------------------------------------------------------------------
-;
-; Registers to hold 64-bit values to manipulate. The "L" part
-; of the register corresponds to the upper 32-bits, while the "R"
-; part corresponds to the lower 32-bits
-;
-; Note, that when using b6 and b7, the code must save these before
-; using them because they are callee save registers
-;
-;
-; Floating point registers to use to save values that
-; are manipulated. These don't collide with ftemp1-6 and
-; are all caller save registers
-;
-a0 .reg %fr22
-a0L .reg %fr22L
-a0R .reg %fr22R
-
-a1 .reg %fr23
-a1L .reg %fr23L
-a1R .reg %fr23R
-
-a2 .reg %fr24
-a2L .reg %fr24L
-a2R .reg %fr24R
-
-a3 .reg %fr25
-a3L .reg %fr25L
-a3R .reg %fr25R
-
-a4 .reg %fr26
-a4L .reg %fr26L
-a4R .reg %fr26R
-
-a5 .reg %fr27
-a5L .reg %fr27L
-a5R .reg %fr27R
-
-a6 .reg %fr28
-a6L .reg %fr28L
-a6R .reg %fr28R
-
-a7 .reg %fr29
-a7L .reg %fr29L
-a7R .reg %fr29R
-
-b0 .reg %fr30
-b0L .reg %fr30L
-b0R .reg %fr30R
-
-b1 .reg %fr31
-b1L .reg %fr31L
-b1R .reg %fr31R
-
-;
-; Temporary floating point variables, these are all caller save
-; registers
-;
-ftemp1 .reg %fr4
-ftemp2 .reg %fr5
-ftemp3 .reg %fr6
-ftemp4 .reg %fr7
-
-;
-; The B set of registers when used.
-;
-
-b2 .reg %fr8
-b2L .reg %fr8L
-b2R .reg %fr8R
-
-b3 .reg %fr9
-b3L .reg %fr9L
-b3R .reg %fr9R
-
-b4 .reg %fr10
-b4L .reg %fr10L
-b4R .reg %fr10R
-
-b5 .reg %fr11
-b5L .reg %fr11L
-b5R .reg %fr11R
-
-b6 .reg %fr12
-b6L .reg %fr12L
-b6R .reg %fr12R
-
-b7 .reg %fr13
-b7L .reg %fr13L
-b7R .reg %fr13R
-
-c1 .reg %r21 ; only reg
-temp1 .reg %r20 ; only reg
-temp2 .reg %r19 ; only reg
-temp3 .reg %r31 ; only reg
-
-m1 .reg %r28
-c2 .reg %r23
-high_one .reg %r1
-ht .reg %r6
-lt .reg %r5
-m .reg %r4
-c3 .reg %r3
-
-SQR_ADD_C .macro A0L,A0R,C1,C2,C3
- XMPYU A0L,A0R,ftemp1 ; m
- FSTD ftemp1,-24(%sp) ; store m
-
- XMPYU A0R,A0R,ftemp2 ; lt
- FSTD ftemp2,-16(%sp) ; store lt
-
- XMPYU A0L,A0L,ftemp3 ; ht
- FSTD ftemp3,-8(%sp) ; store ht
-
- LDD -24(%sp),m ; load m
- AND m,high_mask,temp2 ; m & Mask
- DEPD,Z m,30,31,temp3 ; m << 32+1
- LDD -16(%sp),lt ; lt
-
- LDD -8(%sp),ht ; ht
- EXTRD,U temp2,32,33,temp1 ; temp1 = m&Mask >> 32-1
- ADD temp3,lt,lt ; lt = lt+m
- ADD,L ht,temp1,ht ; ht += temp1
- ADD,DC ht,%r0,ht ; ht++
-
- ADD C1,lt,C1 ; c1=c1+lt
- ADD,DC ht,%r0,ht ; ht++
-
- ADD C2,ht,C2 ; c2=c2+ht
- ADD,DC C3,%r0,C3 ; c3++
-.endm
-
-SQR_ADD_C2 .macro A0L,A0R,A1L,A1R,C1,C2,C3
- XMPYU A0L,A1R,ftemp1 ; m1 = bl*ht
- FSTD ftemp1,-16(%sp) ;
- XMPYU A0R,A1L,ftemp2 ; m = bh*lt
- FSTD ftemp2,-8(%sp) ;
- XMPYU A0R,A1R,ftemp3 ; lt = bl*lt
- FSTD ftemp3,-32(%sp)
- XMPYU A0L,A1L,ftemp4 ; ht = bh*ht
- FSTD ftemp4,-24(%sp) ;
-
- LDD -8(%sp),m ; r21 = m
- LDD -16(%sp),m1 ; r19 = m1
- ADD,L m,m1,m ; m+m1
-
- DEPD,Z m,31,32,temp3 ; (m+m1<<32)
- LDD -24(%sp),ht ; r24 = ht
-
- CMPCLR,*>>= m,m1,%r0 ; if (m < m1)
- ADD,L ht,high_one,ht ; ht+=high_one
-
- EXTRD,U m,31,32,temp1 ; m >> 32
- LDD -32(%sp),lt ; lt
- ADD,L ht,temp1,ht ; ht+= m>>32
- ADD lt,temp3,lt ; lt = lt+m1
- ADD,DC ht,%r0,ht ; ht++
-
- ADD ht,ht,ht ; ht=ht+ht;
- ADD,DC C3,%r0,C3 ; add in carry (c3++)
-
- ADD lt,lt,lt ; lt=lt+lt;
- ADD,DC ht,%r0,ht ; add in carry (ht++)
-
- ADD C1,lt,C1 ; c1=c1+lt
- ADD,DC,*NUV ht,%r0,ht ; add in carry (ht++)
- LDO 1(C3),C3 ; bump c3 if overflow,nullify otherwise
-
- ADD C2,ht,C2 ; c2 = c2 + ht
- ADD,DC C3,%r0,C3 ; add in carry (c3++)
-.endm
-
-;
-;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
-; arg0 = r_ptr
-; arg1 = a_ptr
-;
-
-bn_sqr_comba8
- .PROC
- .CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
- .EXPORT bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
- .ENTRY
- .align 64
-
- STD %r3,0(%sp) ; save r3
- STD %r4,8(%sp) ; save r4
- STD %r5,16(%sp) ; save r5
- STD %r6,24(%sp) ; save r6
-
- ;
- ; Zero out carries
- ;
- COPY %r0,c1
- COPY %r0,c2
- COPY %r0,c3
-
- LDO 128(%sp),%sp ; bump stack
- DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
- DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
-
- ;
- ; Load up all of the values we are going to use
- ;
- FLDD 0(a_ptr),a0
- FLDD 8(a_ptr),a1
- FLDD 16(a_ptr),a2
- FLDD 24(a_ptr),a3
- FLDD 32(a_ptr),a4
- FLDD 40(a_ptr),a5
- FLDD 48(a_ptr),a6
- FLDD 56(a_ptr),a7
-
- SQR_ADD_C a0L,a0R,c1,c2,c3
- STD c1,0(r_ptr) ; r[0] = c1;
- COPY %r0,c1
-
- SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
- STD c2,8(r_ptr) ; r[1] = c2;
- COPY %r0,c2
-
- SQR_ADD_C a1L,a1R,c3,c1,c2
- SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
- STD c3,16(r_ptr) ; r[2] = c3;
- COPY %r0,c3
-
- SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
- SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
- STD c1,24(r_ptr) ; r[3] = c1;
- COPY %r0,c1
-
- SQR_ADD_C a2L,a2R,c2,c3,c1
- SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
- SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1
- STD c2,32(r_ptr) ; r[4] = c2;
- COPY %r0,c2
-
- SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2
- SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2
- SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
- STD c3,40(r_ptr) ; r[5] = c3;
- COPY %r0,c3
-
- SQR_ADD_C a3L,a3R,c1,c2,c3
- SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3
- SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3
- SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3
- STD c1,48(r_ptr) ; r[6] = c1;
- COPY %r0,c1
-
- SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1
- SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1
- SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1
- SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1
- STD c2,56(r_ptr) ; r[7] = c2;
- COPY %r0,c2
-
- SQR_ADD_C a4L,a4R,c3,c1,c2
- SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2
- SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2
- SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2
- STD c3,64(r_ptr) ; r[8] = c3;
- COPY %r0,c3
-
- SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3
- SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3
- SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3
- STD c1,72(r_ptr) ; r[9] = c1;
- COPY %r0,c1
-
- SQR_ADD_C a5L,a5R,c2,c3,c1
- SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1
- SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1
- STD c2,80(r_ptr) ; r[10] = c2;
- COPY %r0,c2
-
- SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2
- SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2
- STD c3,88(r_ptr) ; r[11] = c3;
- COPY %r0,c3
-
- SQR_ADD_C a6L,a6R,c1,c2,c3
- SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3
- STD c1,96(r_ptr) ; r[12] = c1;
- COPY %r0,c1
-
- SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1
- STD c2,104(r_ptr) ; r[13] = c2;
- COPY %r0,c2
-
- SQR_ADD_C a7L,a7R,c3,c1,c2
- STD c3, 112(r_ptr) ; r[14] = c3
- STD c1, 120(r_ptr) ; r[15] = c1
-
- .EXIT
- LDD -104(%sp),%r6 ; restore r6
- LDD -112(%sp),%r5 ; restore r5
- LDD -120(%sp),%r4 ; restore r4
- BVE (%rp)
- LDD,MB -128(%sp),%r3
-
- .PROCEND
-
-;-----------------------------------------------------------------------------
-;
-;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
-; arg0 = r_ptr
-; arg1 = a_ptr
-;
-
-bn_sqr_comba4
- .proc
- .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
- .EXPORT bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
- .entry
- .align 64
- STD %r3,0(%sp) ; save r3
- STD %r4,8(%sp) ; save r4
- STD %r5,16(%sp) ; save r5
- STD %r6,24(%sp) ; save r6
-
- ;
- ; Zero out carries
- ;
- COPY %r0,c1
- COPY %r0,c2
- COPY %r0,c3
-
- LDO 128(%sp),%sp ; bump stack
- DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
- DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
-
- ;
- ; Load up all of the values we are going to use
- ;
- FLDD 0(a_ptr),a0
- FLDD 8(a_ptr),a1
- FLDD 16(a_ptr),a2
- FLDD 24(a_ptr),a3
- FLDD 32(a_ptr),a4
- FLDD 40(a_ptr),a5
- FLDD 48(a_ptr),a6
- FLDD 56(a_ptr),a7
-
- SQR_ADD_C a0L,a0R,c1,c2,c3
-
- STD c1,0(r_ptr) ; r[0] = c1;
- COPY %r0,c1
-
- SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
-
- STD c2,8(r_ptr) ; r[1] = c2;
- COPY %r0,c2
-
- SQR_ADD_C a1L,a1R,c3,c1,c2
- SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
-
- STD c3,16(r_ptr) ; r[2] = c3;
- COPY %r0,c3
-
- SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
- SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
-
- STD c1,24(r_ptr) ; r[3] = c1;
- COPY %r0,c1
-
- SQR_ADD_C a2L,a2R,c2,c3,c1
- SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
-
- STD c2,32(r_ptr) ; r[4] = c2;
- COPY %r0,c2
-
- SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
- STD c3,40(r_ptr) ; r[5] = c3;
- COPY %r0,c3
-
- SQR_ADD_C a3L,a3R,c1,c2,c3
- STD c1,48(r_ptr) ; r[6] = c1;
- STD c2,56(r_ptr) ; r[7] = c2;
-
- .EXIT
- LDD -104(%sp),%r6 ; restore r6
- LDD -112(%sp),%r5 ; restore r5
- LDD -120(%sp),%r4 ; restore r4
- BVE (%rp)
- LDD,MB -128(%sp),%r3
-
- .PROCEND
-
-
-;---------------------------------------------------------------------------
-
-MUL_ADD_C .macro A0L,A0R,B0L,B0R,C1,C2,C3
- XMPYU A0L,B0R,ftemp1 ; m1 = bl*ht
- FSTD ftemp1,-16(%sp) ;
- XMPYU A0R,B0L,ftemp2 ; m = bh*lt
- FSTD ftemp2,-8(%sp) ;
- XMPYU A0R,B0R,ftemp3 ; lt = bl*lt
- FSTD ftemp3,-32(%sp)
- XMPYU A0L,B0L,ftemp4 ; ht = bh*ht
- FSTD ftemp4,-24(%sp) ;
-
- LDD -8(%sp),m ; r21 = m
- LDD -16(%sp),m1 ; r19 = m1
- ADD,L m,m1,m ; m+m1
-
- DEPD,Z m,31,32,temp3 ; (m+m1<<32)
- LDD -24(%sp),ht ; r24 = ht
-
- CMPCLR,*>>= m,m1,%r0 ; if (m < m1)
- ADD,L ht,high_one,ht ; ht+=high_one
-
- EXTRD,U m,31,32,temp1 ; m >> 32
- LDD -32(%sp),lt ; lt
- ADD,L ht,temp1,ht ; ht+= m>>32
- ADD lt,temp3,lt ; lt = lt+m1
- ADD,DC ht,%r0,ht ; ht++
-
- ADD C1,lt,C1 ; c1=c1+lt
- ADD,DC ht,%r0,ht ; bump c3 if overflow,nullify otherwise
-
- ADD C2,ht,C2 ; c2 = c2 + ht
- ADD,DC C3,%r0,C3 ; add in carry (c3++)
-.endm
-
-
-;
-;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
-; arg0 = r_ptr
-; arg1 = a_ptr
-; arg2 = b_ptr
-;
-
-bn_mul_comba8
- .proc
- .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
- .EXPORT bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
- .entry
- .align 64
-
- STD %r3,0(%sp) ; save r3
- STD %r4,8(%sp) ; save r4
- STD %r5,16(%sp) ; save r5
- STD %r6,24(%sp) ; save r6
- FSTD %fr12,32(%sp) ; save r6
- FSTD %fr13,40(%sp) ; save r7
-
- ;
- ; Zero out carries
- ;
- COPY %r0,c1
- COPY %r0,c2
- COPY %r0,c3
-
- LDO 128(%sp),%sp ; bump stack
- DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
-
- ;
- ; Load up all of the values we are going to use
- ;
- FLDD 0(a_ptr),a0
- FLDD 8(a_ptr),a1
- FLDD 16(a_ptr),a2
- FLDD 24(a_ptr),a3
- FLDD 32(a_ptr),a4
- FLDD 40(a_ptr),a5
- FLDD 48(a_ptr),a6
- FLDD 56(a_ptr),a7
-
- FLDD 0(b_ptr),b0
- FLDD 8(b_ptr),b1
- FLDD 16(b_ptr),b2
- FLDD 24(b_ptr),b3
- FLDD 32(b_ptr),b4
- FLDD 40(b_ptr),b5
- FLDD 48(b_ptr),b6
- FLDD 56(b_ptr),b7
-
- MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
- STD c1,0(r_ptr)
- COPY %r0,c1
-
- MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
- MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
- STD c2,8(r_ptr)
- COPY %r0,c2
-
- MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
- MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
- MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
- STD c3,16(r_ptr)
- COPY %r0,c3
-
- MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
- MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
- MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
- MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
- STD c1,24(r_ptr)
- COPY %r0,c1
-
- MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1
- MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
- MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
- MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
- MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1
- STD c2,32(r_ptr)
- COPY %r0,c2
-
- MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2
- MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2
- MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
- MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
- MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2
- MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2
- STD c3,40(r_ptr)
- COPY %r0,c3
-
- MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3
- MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3
- MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3
- MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
- MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3
- MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3
- MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3
- STD c1,48(r_ptr)
- COPY %r0,c1
-
- MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1
- MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1
- MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1
- MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1
- MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1
- MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1
- MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1
- MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1
- STD c2,56(r_ptr)
- COPY %r0,c2
-
- MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2
- MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2
- MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2
- MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2
- MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2
- MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2
- MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2
- STD c3,64(r_ptr)
- COPY %r0,c3
-
- MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3
- MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3
- MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3
- MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3
- MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3
- MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3
- STD c1,72(r_ptr)
- COPY %r0,c1
-
- MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1
- MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1
- MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1
- MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1
- MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1
- STD c2,80(r_ptr)
- COPY %r0,c2
-
- MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2
- MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2
- MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2
- MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2
- STD c3,88(r_ptr)
- COPY %r0,c3
-
- MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3
- MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3
- MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3
- STD c1,96(r_ptr)
- COPY %r0,c1
-
- MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1
- MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1
- STD c2,104(r_ptr)
- COPY %r0,c2
-
- MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2
- STD c3,112(r_ptr)
- STD c1,120(r_ptr)
-
- .EXIT
- FLDD -88(%sp),%fr13
- FLDD -96(%sp),%fr12
- LDD -104(%sp),%r6 ; restore r6
- LDD -112(%sp),%r5 ; restore r5
- LDD -120(%sp),%r4 ; restore r4
- BVE (%rp)
- LDD,MB -128(%sp),%r3
-
- .PROCEND
-
-;-----------------------------------------------------------------------------
-;
-;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
-; arg0 = r_ptr
-; arg1 = a_ptr
-; arg2 = b_ptr
-;
-
-bn_mul_comba4
- .proc
- .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
- .EXPORT bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
- .entry
- .align 64
-
- STD %r3,0(%sp) ; save r3
- STD %r4,8(%sp) ; save r4
- STD %r5,16(%sp) ; save r5
- STD %r6,24(%sp) ; save r6
- FSTD %fr12,32(%sp) ; save r6
- FSTD %fr13,40(%sp) ; save r7
-
- ;
- ; Zero out carries
- ;
- COPY %r0,c1
- COPY %r0,c2
- COPY %r0,c3
-
- LDO 128(%sp),%sp ; bump stack
- DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
-
- ;
- ; Load up all of the values we are going to use
- ;
- FLDD 0(a_ptr),a0
- FLDD 8(a_ptr),a1
- FLDD 16(a_ptr),a2
- FLDD 24(a_ptr),a3
-
- FLDD 0(b_ptr),b0
- FLDD 8(b_ptr),b1
- FLDD 16(b_ptr),b2
- FLDD 24(b_ptr),b3
-
- MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
- STD c1,0(r_ptr)
- COPY %r0,c1
-
- MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
- MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
- STD c2,8(r_ptr)
- COPY %r0,c2
-
- MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
- MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
- MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
- STD c3,16(r_ptr)
- COPY %r0,c3
-
- MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
- MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
- MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
- MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
- STD c1,24(r_ptr)
- COPY %r0,c1
-
- MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
- MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
- MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
- STD c2,32(r_ptr)
- COPY %r0,c2
-
- MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
- MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
- STD c3,40(r_ptr)
- COPY %r0,c3
-
- MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
- STD c1,48(r_ptr)
- STD c2,56(r_ptr)
-
- .EXIT
- FLDD -88(%sp),%fr13
- FLDD -96(%sp),%fr12
- LDD -104(%sp),%r6 ; restore r6
- LDD -112(%sp),%r5 ; restore r5
- LDD -120(%sp),%r4 ; restore r4
- BVE (%rp)
- LDD,MB -128(%sp),%r3
-
- .PROCEND
-
-
-;--- not PIC .SPACE $TEXT$
-;--- not PIC .SUBSPA $CODE$
-;--- not PIC .SPACE $PRIVATE$,SORT=16
-;--- not PIC .IMPORT $global$,DATA
-;--- not PIC .SPACE $TEXT$
-;--- not PIC .SUBSPA $CODE$
-;--- not PIC .SUBSPA $LIT$,ACCESS=0x2c
-;--- not PIC C$7
-;--- not PIC .ALIGN 8
-;--- not PIC .STRINGZ "Division would overflow (%d)\n"
- .END
diff --git a/deps/openssl/openssl/crypto/bn/asm/pa-risc2W.s b/deps/openssl/openssl/crypto/bn/asm/pa-risc2W.s
deleted file mode 100644
index 97381172e7..0000000000
--- a/deps/openssl/openssl/crypto/bn/asm/pa-risc2W.s
+++ /dev/null
@@ -1,1612 +0,0 @@
-; Copyright 2000-2016 The OpenSSL Project Authors. All Rights Reserved.
-;
-; Licensed under the OpenSSL license (the "License"). You may not use
-; this file except in compliance with the License. You can obtain a copy
-; in the file LICENSE in the source distribution or at
-; https://www.openssl.org/source/license.html
-
-;
-; PA-RISC 64-bit implementation of bn_asm code
-;
-; This code is approximately 2x faster than the C version
-; for RSA/DSA.
-;
-; See http://devresource.hp.com/ for more details on the PA-RISC
-; architecture. Also see the book "PA-RISC 2.0 Architecture"
-; by Gerry Kane for information on the instruction set architecture.
-;
-; Code written by Chris Ruemmler (with some help from the HP C
-; compiler).
-;
-; The code compiles with HP's assembler
-;
-
- .level 2.0W
- .space $TEXT$
- .subspa $CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY
-
-;
-; Global Register definitions used for the routines.
-;
-; Some information about HP's runtime architecture for 64-bits.
-;
-; "Caller save" means the calling function must save the register
-; if it wants the register to be preserved.
-; "Callee save" means if a function uses the register, it must save
-; the value before using it.
-;
-; For the floating point registers
-;
-; "caller save" registers: fr4-fr11, fr22-fr31
-; "callee save" registers: fr12-fr21
-; "special" registers: fr0-fr3 (status and exception registers)
-;
-; For the integer registers
-; value zero : r0
-; "caller save" registers: r1,r19-r26
-; "callee save" registers: r3-r18
-; return register : r2 (rp)
-; return values ; r28 (ret0,ret1)
-; Stack pointer ; r30 (sp)
-; global data pointer ; r27 (dp)
-; argument pointer ; r29 (ap)
-; millicode return ptr ; r31 (also a caller save register)
-
-
-;
-; Arguments to the routines
-;
-r_ptr .reg %r26
-a_ptr .reg %r25
-b_ptr .reg %r24
-num .reg %r24
-w .reg %r23
-n .reg %r23
-
-
-;
-; Globals used in some routines
-;
-
-top_overflow .reg %r29
-high_mask .reg %r22 ; value 0xffffffff80000000L
-
-
-;------------------------------------------------------------------------------
-;
-; bn_mul_add_words
-;
-;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr,
-; int num, BN_ULONG w)
-;
-; arg0 = r_ptr
-; arg1 = a_ptr
-; arg2 = num
-; arg3 = w
-;
-; Local register definitions
-;
-
-fm1 .reg %fr22
-fm .reg %fr23
-ht_temp .reg %fr24
-ht_temp_1 .reg %fr25
-lt_temp .reg %fr26
-lt_temp_1 .reg %fr27
-fm1_1 .reg %fr28
-fm_1 .reg %fr29
-
-fw_h .reg %fr7L
-fw_l .reg %fr7R
-fw .reg %fr7
-
-fht_0 .reg %fr8L
-flt_0 .reg %fr8R
-t_float_0 .reg %fr8
-
-fht_1 .reg %fr9L
-flt_1 .reg %fr9R
-t_float_1 .reg %fr9
-
-tmp_0 .reg %r31
-tmp_1 .reg %r21
-m_0 .reg %r20
-m_1 .reg %r19
-ht_0 .reg %r1
-ht_1 .reg %r3
-lt_0 .reg %r4
-lt_1 .reg %r5
-m1_0 .reg %r6
-m1_1 .reg %r7
-rp_val .reg %r8
-rp_val_1 .reg %r9
-
-bn_mul_add_words
- .export bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN
- .proc
- .callinfo frame=128
- .entry
- .align 64
-
- STD %r3,0(%sp) ; save r3
- STD %r4,8(%sp) ; save r4
- NOP ; Needed to make the loop 16-byte aligned
- NOP ; Needed to make the loop 16-byte aligned
-
- STD %r5,16(%sp) ; save r5
- STD %r6,24(%sp) ; save r6
- STD %r7,32(%sp) ; save r7
- STD %r8,40(%sp) ; save r8
-
- STD %r9,48(%sp) ; save r9
- COPY %r0,%ret0 ; return 0 by default
- DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32
- STD w,56(%sp) ; store w on stack
-
- CMPIB,>= 0,num,bn_mul_add_words_exit ; if (num <= 0) then exit
- LDO 128(%sp),%sp ; bump stack
-
- ;
- ; The loop is unrolled twice, so if there is only 1 number
- ; then go straight to the cleanup code.
- ;
- CMPIB,= 1,num,bn_mul_add_words_single_top
- FLDD -72(%sp),fw ; load up w into fp register fw (fw_h/fw_l)
-
- ;
- ; This loop is unrolled 2 times (64-byte aligned as well)
- ;
- ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
- ; two 32-bit mutiplies can be issued per cycle.
- ;
-bn_mul_add_words_unroll2
-
- FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
- FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R)
- LDD 0(r_ptr),rp_val ; rp[0]
- LDD 8(r_ptr),rp_val_1 ; rp[1]
-
- XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l
- XMPYU fht_1,fw_l,fm1_1 ; m1[1] = fht_1*fw_l
- FSTD fm1,-16(%sp) ; -16(sp) = m1[0]
- FSTD fm1_1,-48(%sp) ; -48(sp) = m1[1]
-
- XMPYU flt_0,fw_h,fm ; m[0] = flt_0*fw_h
- XMPYU flt_1,fw_h,fm_1 ; m[1] = flt_1*fw_h
- FSTD fm,-8(%sp) ; -8(sp) = m[0]
- FSTD fm_1,-40(%sp) ; -40(sp) = m[1]
-
- XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h
- XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp_1 = fht_1*fw_h
- FSTD ht_temp,-24(%sp) ; -24(sp) = ht_temp
- FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht_temp_1
-
- XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
- XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l
- FSTD lt_temp,-32(%sp) ; -32(sp) = lt_temp
- FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt_temp_1
-
- LDD -8(%sp),m_0 ; m[0]
- LDD -40(%sp),m_1 ; m[1]
- LDD -16(%sp),m1_0 ; m1[0]
- LDD -48(%sp),m1_1 ; m1[1]
-
- LDD -24(%sp),ht_0 ; ht[0]
- LDD -56(%sp),ht_1 ; ht[1]
- ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m[0] + m1[0];
- ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m[1] + m1[1];
-
- LDD -32(%sp),lt_0
- LDD -64(%sp),lt_1
- CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m[0] < m1[0])
- ADD,L ht_0,top_overflow,ht_0 ; ht[0] += (1<<32)
-
- CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m[1] < m1[1])
- ADD,L ht_1,top_overflow,ht_1 ; ht[1] += (1<<32)
- EXTRD,U tmp_0,31,32,m_0 ; m[0]>>32
- DEPD,Z tmp_0,31,32,m1_0 ; m1[0] = m[0]<<32
-
- EXTRD,U tmp_1,31,32,m_1 ; m[1]>>32
- DEPD,Z tmp_1,31,32,m1_1 ; m1[1] = m[1]<<32
- ADD,L ht_0,m_0,ht_0 ; ht[0]+= (m[0]>>32)
- ADD,L ht_1,m_1,ht_1 ; ht[1]+= (m[1]>>32)
-
- ADD lt_0,m1_0,lt_0 ; lt[0] = lt[0]+m1[0];
- ADD,DC ht_0,%r0,ht_0 ; ht[0]++
- ADD lt_1,m1_1,lt_1 ; lt[1] = lt[1]+m1[1];
- ADD,DC ht_1,%r0,ht_1 ; ht[1]++
-
- ADD %ret0,lt_0,lt_0 ; lt[0] = lt[0] + c;
- ADD,DC ht_0,%r0,ht_0 ; ht[0]++
- ADD lt_0,rp_val,lt_0 ; lt[0] = lt[0]+rp[0]
- ADD,DC ht_0,%r0,ht_0 ; ht[0]++
-
- LDO -2(num),num ; num = num - 2;
- ADD ht_0,lt_1,lt_1 ; lt[1] = lt[1] + ht_0 (c);
- ADD,DC ht_1,%r0,ht_1 ; ht[1]++
- STD lt_0,0(r_ptr) ; rp[0] = lt[0]
-
- ADD lt_1,rp_val_1,lt_1 ; lt[1] = lt[1]+rp[1]
- ADD,DC ht_1,%r0,%ret0 ; ht[1]++
- LDO 16(a_ptr),a_ptr ; a_ptr += 2
-
- STD lt_1,8(r_ptr) ; rp[1] = lt[1]
- CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do
- LDO 16(r_ptr),r_ptr ; r_ptr += 2
-
- CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one
-
- ;
- ; Top of loop aligned on 64-byte boundary
- ;
-bn_mul_add_words_single_top
- FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
- LDD 0(r_ptr),rp_val ; rp[0]
- LDO 8(a_ptr),a_ptr ; a_ptr++
- XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l
- FSTD fm1,-16(%sp) ; -16(sp) = m1
- XMPYU flt_0,fw_h,fm ; m = lt*fw_h
- FSTD fm,-8(%sp) ; -8(sp) = m
- XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h
- FSTD ht_temp,-24(%sp) ; -24(sp) = ht
- XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
- FSTD lt_temp,-32(%sp) ; -32(sp) = lt
-
- LDD -8(%sp),m_0
- LDD -16(%sp),m1_0 ; m1 = temp1
- ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1;
- LDD -24(%sp),ht_0
- LDD -32(%sp),lt_0
-
- CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1)
- ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
-
- EXTRD,U tmp_0,31,32,m_0 ; m>>32
- DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
-
- ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
- ADD lt_0,m1_0,tmp_0 ; tmp_0 = lt+m1;
- ADD,DC ht_0,%r0,ht_0 ; ht++
- ADD %ret0,tmp_0,lt_0 ; lt = lt + c;
- ADD,DC ht_0,%r0,ht_0 ; ht++
- ADD lt_0,rp_val,lt_0 ; lt = lt+rp[0]
- ADD,DC ht_0,%r0,%ret0 ; ht++
- STD lt_0,0(r_ptr) ; rp[0] = lt
-
-bn_mul_add_words_exit
- .EXIT
- LDD -80(%sp),%r9 ; restore r9
- LDD -88(%sp),%r8 ; restore r8
- LDD -96(%sp),%r7 ; restore r7
- LDD -104(%sp),%r6 ; restore r6
- LDD -112(%sp),%r5 ; restore r5
- LDD -120(%sp),%r4 ; restore r4
- BVE (%rp)
- LDD,MB -128(%sp),%r3 ; restore r3
- .PROCEND ;in=23,24,25,26,29;out=28;
-
-;----------------------------------------------------------------------------
-;
-;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
-;
-; arg0 = rp
-; arg1 = ap
-; arg2 = num
-; arg3 = w
-
-bn_mul_words
- .proc
- .callinfo frame=128
- .entry
- .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
- .align 64
-
- STD %r3,0(%sp) ; save r3
- STD %r4,8(%sp) ; save r4
- STD %r5,16(%sp) ; save r5
- STD %r6,24(%sp) ; save r6
-
- STD %r7,32(%sp) ; save r7
- COPY %r0,%ret0 ; return 0 by default
- DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32
- STD w,56(%sp) ; w on stack
-
- CMPIB,>= 0,num,bn_mul_words_exit
- LDO 128(%sp),%sp ; bump stack
-
- ;
- ; See if only 1 word to do, thus just do cleanup
- ;
- CMPIB,= 1,num,bn_mul_words_single_top
- FLDD -72(%sp),fw ; load up w into fp register fw (fw_h/fw_l)
-
- ;
- ; This loop is unrolled 2 times (64-byte aligned as well)
- ;
- ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
- ; two 32-bit mutiplies can be issued per cycle.
- ;
-bn_mul_words_unroll2
-
- FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
- FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R)
- XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l
- XMPYU fht_1,fw_l,fm1_1 ; m1[1] = ht*fw_l
-
- FSTD fm1,-16(%sp) ; -16(sp) = m1
- FSTD fm1_1,-48(%sp) ; -48(sp) = m1
- XMPYU flt_0,fw_h,fm ; m = lt*fw_h
- XMPYU flt_1,fw_h,fm_1 ; m = lt*fw_h
-
- FSTD fm,-8(%sp) ; -8(sp) = m
- FSTD fm_1,-40(%sp) ; -40(sp) = m
- XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h
- XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp = ht*fw_h
-
- FSTD ht_temp,-24(%sp) ; -24(sp) = ht
- FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht
- XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
- XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l
-
- FSTD lt_temp,-32(%sp) ; -32(sp) = lt
- FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt
- LDD -8(%sp),m_0
- LDD -40(%sp),m_1
-
- LDD -16(%sp),m1_0
- LDD -48(%sp),m1_1
- LDD -24(%sp),ht_0
- LDD -56(%sp),ht_1
-
- ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m + m1;
- ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m + m1;
- LDD -32(%sp),lt_0
- LDD -64(%sp),lt_1
-
- CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m < m1)
- ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
- CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m < m1)
- ADD,L ht_1,top_overflow,ht_1 ; ht += (1<<32)
-
- EXTRD,U tmp_0,31,32,m_0 ; m>>32
- DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
- EXTRD,U tmp_1,31,32,m_1 ; m>>32
- DEPD,Z tmp_1,31,32,m1_1 ; m1 = m<<32
-
- ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
- ADD,L ht_1,m_1,ht_1 ; ht+= (m>>32)
- ADD lt_0,m1_0,lt_0 ; lt = lt+m1;
- ADD,DC ht_0,%r0,ht_0 ; ht++
-
- ADD lt_1,m1_1,lt_1 ; lt = lt+m1;
- ADD,DC ht_1,%r0,ht_1 ; ht++
- ADD %ret0,lt_0,lt_0 ; lt = lt + c (ret0);
- ADD,DC ht_0,%r0,ht_0 ; ht++
-
- ADD ht_0,lt_1,lt_1 ; lt = lt + c (ht_0)
- ADD,DC ht_1,%r0,ht_1 ; ht++
- STD lt_0,0(r_ptr) ; rp[0] = lt
- STD lt_1,8(r_ptr) ; rp[1] = lt
-
- COPY ht_1,%ret0 ; carry = ht
- LDO -2(num),num ; num = num - 2;
- LDO 16(a_ptr),a_ptr ; ap += 2
- CMPIB,<= 2,num,bn_mul_words_unroll2
- LDO 16(r_ptr),r_ptr ; rp++
-
- CMPIB,=,N 0,num,bn_mul_words_exit ; are we done?
-
- ;
- ; Top of loop aligned on 64-byte boundary
- ;
-bn_mul_words_single_top
- FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
-
- XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l
- FSTD fm1,-16(%sp) ; -16(sp) = m1
- XMPYU flt_0,fw_h,fm ; m = lt*fw_h
- FSTD fm,-8(%sp) ; -8(sp) = m
- XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h
- FSTD ht_temp,-24(%sp) ; -24(sp) = ht
- XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l
- FSTD lt_temp,-32(%sp) ; -32(sp) = lt
-
- LDD -8(%sp),m_0
- LDD -16(%sp),m1_0
- ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1;
- LDD -24(%sp),ht_0
- LDD -32(%sp),lt_0
-
- CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1)
- ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32)
-
- EXTRD,U tmp_0,31,32,m_0 ; m>>32
- DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32
-
- ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32)
- ADD lt_0,m1_0,lt_0 ; lt= lt+m1;
- ADD,DC ht_0,%r0,ht_0 ; ht++
-
- ADD %ret0,lt_0,lt_0 ; lt = lt + c;
- ADD,DC ht_0,%r0,ht_0 ; ht++
-
- COPY ht_0,%ret0 ; copy carry
- STD lt_0,0(r_ptr) ; rp[0] = lt
-
-bn_mul_words_exit
- .EXIT
- LDD -96(%sp),%r7 ; restore r7
- LDD -104(%sp),%r6 ; restore r6
- LDD -112(%sp),%r5 ; restore r5
- LDD -120(%sp),%r4 ; restore r4
- BVE (%rp)
- LDD,MB -128(%sp),%r3 ; restore r3
- .PROCEND ;in=23,24,25,26,29;out=28;
-
-;----------------------------------------------------------------------------
-;
-;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num)
-;
-; arg0 = rp
-; arg1 = ap
-; arg2 = num
-;
-
-bn_sqr_words
- .proc
- .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
- .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
- .entry
- .align 64
-
- STD %r3,0(%sp) ; save r3
- STD %r4,8(%sp) ; save r4
- NOP
- STD %r5,16(%sp) ; save r5
-
- CMPIB,>= 0,num,bn_sqr_words_exit
- LDO 128(%sp),%sp ; bump stack
-
- ;
- ; If only 1, the goto straight to cleanup
- ;
- CMPIB,= 1,num,bn_sqr_words_single_top
- DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
-
- ;
- ; This loop is unrolled 2 times (64-byte aligned as well)
- ;
-
-bn_sqr_words_unroll2
- FLDD 0(a_ptr),t_float_0 ; a[0]
- FLDD 8(a_ptr),t_float_1 ; a[1]
- XMPYU fht_0,flt_0,fm ; m[0]
- XMPYU fht_1,flt_1,fm_1 ; m[1]
-
- FSTD fm,-24(%sp) ; store m[0]
- FSTD fm_1,-56(%sp) ; store m[1]
- XMPYU flt_0,flt_0,lt_temp ; lt[0]
- XMPYU flt_1,flt_1,lt_temp_1 ; lt[1]
-
- FSTD lt_temp,-16(%sp) ; store lt[0]
- FSTD lt_temp_1,-48(%sp) ; store lt[1]
- XMPYU fht_0,fht_0,ht_temp ; ht[0]
- XMPYU fht_1,fht_1,ht_temp_1 ; ht[1]
-
- FSTD ht_temp,-8(%sp) ; store ht[0]
- FSTD ht_temp_1,-40(%sp) ; store ht[1]
- LDD -24(%sp),m_0
- LDD -56(%sp),m_1
-
- AND m_0,high_mask,tmp_0 ; m[0] & Mask
- AND m_1,high_mask,tmp_1 ; m[1] & Mask
- DEPD,Z m_0,30,31,m_0 ; m[0] << 32+1
- DEPD,Z m_1,30,31,m_1 ; m[1] << 32+1
-
- LDD -16(%sp),lt_0
- LDD -48(%sp),lt_1
- EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m[0]&Mask >> 32-1
- EXTRD,U tmp_1,32,33,tmp_1 ; tmp_1 = m[1]&Mask >> 32-1
-
- LDD -8(%sp),ht_0
- LDD -40(%sp),ht_1
- ADD,L ht_0,tmp_0,ht_0 ; ht[0] += tmp_0
- ADD,L ht_1,tmp_1,ht_1 ; ht[1] += tmp_1
-
- ADD lt_0,m_0,lt_0 ; lt = lt+m
- ADD,DC ht_0,%r0,ht_0 ; ht[0]++
- STD lt_0,0(r_ptr) ; rp[0] = lt[0]
- STD ht_0,8(r_ptr) ; rp[1] = ht[1]
-
- ADD lt_1,m_1,lt_1 ; lt = lt+m
- ADD,DC ht_1,%r0,ht_1 ; ht[1]++
- STD lt_1,16(r_ptr) ; rp[2] = lt[1]
- STD ht_1,24(r_ptr) ; rp[3] = ht[1]
-
- LDO -2(num),num ; num = num - 2;
- LDO 16(a_ptr),a_ptr ; ap += 2
- CMPIB,<= 2,num,bn_sqr_words_unroll2
- LDO 32(r_ptr),r_ptr ; rp += 4
-
- CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done?
-
- ;
- ; Top of loop aligned on 64-byte boundary
- ;
-bn_sqr_words_single_top
- FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R)
-
- XMPYU fht_0,flt_0,fm ; m
- FSTD fm,-24(%sp) ; store m
-
- XMPYU flt_0,flt_0,lt_temp ; lt
- FSTD lt_temp,-16(%sp) ; store lt
-
- XMPYU fht_0,fht_0,ht_temp ; ht
- FSTD ht_temp,-8(%sp) ; store ht
-
- LDD -24(%sp),m_0 ; load m
- AND m_0,high_mask,tmp_0 ; m & Mask
- DEPD,Z m_0,30,31,m_0 ; m << 32+1
- LDD -16(%sp),lt_0 ; lt
-
- LDD -8(%sp),ht_0 ; ht
- EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m&Mask >> 32-1
- ADD m_0,lt_0,lt_0 ; lt = lt+m
- ADD,L ht_0,tmp_0,ht_0 ; ht += tmp_0
- ADD,DC ht_0,%r0,ht_0 ; ht++
-
- STD lt_0,0(r_ptr) ; rp[0] = lt
- STD ht_0,8(r_ptr) ; rp[1] = ht
-
-bn_sqr_words_exit
- .EXIT
- LDD -112(%sp),%r5 ; restore r5
- LDD -120(%sp),%r4 ; restore r4
- BVE (%rp)
- LDD,MB -128(%sp),%r3
- .PROCEND ;in=23,24,25,26,29;out=28;
-
-
-;----------------------------------------------------------------------------
-;
-;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
-;
-; arg0 = rp
-; arg1 = ap
-; arg2 = bp
-; arg3 = n
-
-t .reg %r22
-b .reg %r21
-l .reg %r20
-
-bn_add_words
- .proc
- .entry
- .callinfo
- .EXPORT bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
- .align 64
-
- CMPIB,>= 0,n,bn_add_words_exit
- COPY %r0,%ret0 ; return 0 by default
-
- ;
- ; If 2 or more numbers do the loop
- ;
- CMPIB,= 1,n,bn_add_words_single_top
- NOP
-
- ;
- ; This loop is unrolled 2 times (64-byte aligned as well)
- ;
-bn_add_words_unroll2
- LDD 0(a_ptr),t
- LDD 0(b_ptr),b
- ADD t,%ret0,t ; t = t+c;
- ADD,DC %r0,%r0,%ret0 ; set c to carry
- ADD t,b,l ; l = t + b[0]
- ADD,DC %ret0,%r0,%ret0 ; c+= carry
- STD l,0(r_ptr)
-
- LDD 8(a_ptr),t
- LDD 8(b_ptr),b
- ADD t,%ret0,t ; t = t+c;
- ADD,DC %r0,%r0,%ret0 ; set c to carry
- ADD t,b,l ; l = t + b[0]
- ADD,DC %ret0,%r0,%ret0 ; c+= carry
- STD l,8(r_ptr)
-
- LDO -2(n),n
- LDO 16(a_ptr),a_ptr
- LDO 16(b_ptr),b_ptr
-
- CMPIB,<= 2,n,bn_add_words_unroll2
- LDO 16(r_ptr),r_ptr
-
- CMPIB,=,N 0,n,bn_add_words_exit ; are we done?
-
-bn_add_words_single_top
- LDD 0(a_ptr),t
- LDD 0(b_ptr),b
-
- ADD t,%ret0,t ; t = t+c;
- ADD,DC %r0,%r0,%ret0 ; set c to carry (could use CMPCLR??)
- ADD t,b,l ; l = t + b[0]
- ADD,DC %ret0,%r0,%ret0 ; c+= carry
- STD l,0(r_ptr)
-
-bn_add_words_exit
- .EXIT
- BVE (%rp)
- NOP
- .PROCEND ;in=23,24,25,26,29;out=28;
-
-;----------------------------------------------------------------------------
-;
-;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
-;
-; arg0 = rp
-; arg1 = ap
-; arg2 = bp
-; arg3 = n
-
-t1 .reg %r22
-t2 .reg %r21
-sub_tmp1 .reg %r20
-sub_tmp2 .reg %r19
-
-
-bn_sub_words
- .proc
- .callinfo
- .EXPORT bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
- .entry
- .align 64
-
- CMPIB,>= 0,n,bn_sub_words_exit
- COPY %r0,%ret0 ; return 0 by default
-
- ;
- ; If 2 or more numbers do the loop
- ;
- CMPIB,= 1,n,bn_sub_words_single_top
- NOP
-
- ;
- ; This loop is unrolled 2 times (64-byte aligned as well)
- ;
-bn_sub_words_unroll2
- LDD 0(a_ptr),t1
- LDD 0(b_ptr),t2
- SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
- SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c;
-
- CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
- LDO 1(%r0),sub_tmp2
-
- CMPCLR,*= t1,t2,%r0
- COPY sub_tmp2,%ret0
- STD sub_tmp1,0(r_ptr)
-
- LDD 8(a_ptr),t1
- LDD 8(b_ptr),t2
- SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
- SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c;
- CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
- LDO 1(%r0),sub_tmp2
-
- CMPCLR,*= t1,t2,%r0
- COPY sub_tmp2,%ret0
- STD sub_tmp1,8(r_ptr)
-
- LDO -2(n),n
- LDO 16(a_ptr),a_ptr
- LDO 16(b_ptr),b_ptr
-
- CMPIB,<= 2,n,bn_sub_words_unroll2
- LDO 16(r_ptr),r_ptr
-
- CMPIB,=,N 0,n,bn_sub_words_exit ; are we done?
-
-bn_sub_words_single_top
- LDD 0(a_ptr),t1
- LDD 0(b_ptr),t2
- SUB t1,t2,sub_tmp1 ; t3 = t1-t2;
- SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c;
- CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2
- LDO 1(%r0),sub_tmp2
-
- CMPCLR,*= t1,t2,%r0
- COPY sub_tmp2,%ret0
-
- STD sub_tmp1,0(r_ptr)
-
-bn_sub_words_exit
- .EXIT
- BVE (%rp)
- NOP
- .PROCEND ;in=23,24,25,26,29;out=28;
-
-;------------------------------------------------------------------------------
-;
-; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d)
-;
-; arg0 = h
-; arg1 = l
-; arg2 = d
-;
-; This is mainly just modified assembly from the compiler, thus the
-; lack of variable names.
-;
-;------------------------------------------------------------------------------
-bn_div_words
- .proc
- .callinfo CALLER,FRAME=272,ENTRY_GR=%r10,SAVE_RP,ARGS_SAVED,ORDERING_AWARE
- .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
- .IMPORT BN_num_bits_word,CODE,NO_RELOCATION
- .IMPORT __iob,DATA
- .IMPORT fprintf,CODE,NO_RELOCATION
- .IMPORT abort,CODE,NO_RELOCATION
- .IMPORT $$div2U,MILLICODE
- .entry
- STD %r2,-16(%r30)
- STD,MA %r3,352(%r30)
- STD %r4,-344(%r30)
- STD %r5,-336(%r30)
- STD %r6,-328(%r30)
- STD %r7,-320(%r30)
- STD %r8,-312(%r30)
- STD %r9,-304(%r30)
- STD %r10,-296(%r30)
-
- STD %r27,-288(%r30) ; save gp
-
- COPY %r24,%r3 ; save d
- COPY %r26,%r4 ; save h (high 64-bits)
- LDO -1(%r0),%ret0 ; return -1 by default
-
- CMPB,*= %r0,%arg2,$D3 ; if (d == 0)
- COPY %r25,%r5 ; save l (low 64-bits)
-
- LDO -48(%r30),%r29 ; create ap
- .CALL ;in=26,29;out=28;
- B,L BN_num_bits_word,%r2
- COPY %r3,%r26
- LDD -288(%r30),%r27 ; restore gp
- LDI 64,%r21
-
- CMPB,= %r21,%ret0,$00000012 ;if (i == 64) (forward)
- COPY %ret0,%r24 ; i
- MTSARCM %r24
- DEPDI,Z -1,%sar,1,%r29
- CMPB,*<<,N %r29,%r4,bn_div_err_case ; if (h > 1<<i) (forward)
-
-$00000012
- SUBI 64,%r24,%r31 ; i = 64 - i;
- CMPCLR,*<< %r4,%r3,%r0 ; if (h >= d)
- SUB %r4,%r3,%r4 ; h -= d
- CMPB,= %r31,%r0,$0000001A ; if (i)
- COPY %r0,%r10 ; ret = 0
- MTSARCM %r31 ; i to shift
- DEPD,Z %r3,%sar,64,%r3 ; d <<= i;
- SUBI 64,%r31,%r19 ; 64 - i; redundent
- MTSAR %r19 ; (64 -i) to shift
- SHRPD %r4,%r5,%sar,%r4 ; l>> (64-i)
- MTSARCM %r31 ; i to shift
- DEPD,Z %r5,%sar,64,%r5 ; l <<= i;
-
-$0000001A
- DEPDI,Z -1,31,32,%r19
- EXTRD,U %r3,31,32,%r6 ; dh=(d&0xfff)>>32
- EXTRD,U %r3,63,32,%r8 ; dl = d&0xffffff
- LDO 2(%r0),%r9
- STD %r3,-280(%r30) ; "d" to stack
-
-$0000001C
- DEPDI,Z -1,63,32,%r29 ;
- EXTRD,U %r4,31,32,%r31 ; h >> 32
- CMPB,*=,N %r31,%r6,$D2 ; if ((h>>32) != dh)(forward) div
- COPY %r4,%r26
- EXTRD,U %r4,31,32,%r25
- COPY %r6,%r24
- .CALL ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL)
- B,L $$div2U,%r2
- EXTRD,U %r6,31,32,%r23
- DEPD %r28,31,32,%r29
-$D2
- STD %r29,-272(%r30) ; q
- AND %r5,%r19,%r24 ; t & 0xffffffff00000000;
- EXTRD,U %r24,31,32,%r24 ; ???
- FLDD -272(%r30),%fr7 ; q
- FLDD -280(%r30),%fr8 ; d
- XMPYU %fr8L,%fr7L,%fr10
- FSTD %fr10,-256(%r30)
- XMPYU %fr8L,%fr7R,%fr22
- FSTD %fr22,-264(%r30)
- XMPYU %fr8R,%fr7L,%fr11
- XMPYU %fr8R,%fr7R,%fr23
- FSTD %fr11,-232(%r30)
- FSTD %fr23,-240(%r30)
- LDD -256(%r30),%r28
- DEPD,Z %r28,31,32,%r2
- LDD -264(%r30),%r20
- ADD,L %r20,%r2,%r31
- LDD -232(%r30),%r22
- DEPD,Z %r22,31,32,%r22
- LDD -240(%r30),%r21
- B $00000024 ; enter loop
- ADD,L %r21,%r22,%r23
-
-$0000002A
- LDO -1(%r29),%r29
- SUB %r23,%r8,%r23
-$00000024
- SUB %r4,%r31,%r25
- AND %r25,%r19,%r26
- CMPB,*<>,N %r0,%r26,$00000046 ; (forward)
- DEPD,Z %r25,31,32,%r20
- OR %r20,%r24,%r21
- CMPB,*<<,N %r21,%r23,$0000002A ;(backward)
- SUB %r31,%r6,%r31
-;-------------Break path---------------------
-
-$00000046
- DEPD,Z %r23,31,32,%r25 ;tl
- EXTRD,U %r23,31,32,%r26 ;t
- AND %r25,%r19,%r24 ;tl = (tl<<32)&0xfffffff0000000L
- ADD,L %r31,%r26,%r31 ;th += t;
- CMPCLR,*>>= %r5,%r24,%r0 ;if (l<tl)
- LDO 1(%r31),%r31 ; th++;
- CMPB,*<<=,N %r31,%r4,$00000036 ;if (n < th) (forward)
- LDO -1(%r29),%r29 ;q--;
- ADD,L %r4,%r3,%r4 ;h += d;
-$00000036
- ADDIB,=,N -1,%r9,$D1 ;if (--count == 0) break (forward)
- SUB %r5,%r24,%r28 ; l -= tl;
- SUB %r4,%r31,%r24 ; h -= th;
- SHRPD %r24,%r28,32,%r4 ; h = ((h<<32)|(l>>32));
- DEPD,Z %r29,31,32,%r10 ; ret = q<<32
- b $0000001C
- DEPD,Z %r28,31,32,%r5 ; l = l << 32
-
-$D1
- OR %r10,%r29,%r28 ; ret |= q
-$D3
- LDD -368(%r30),%r2
-$D0
- LDD -296(%r30),%r10
- LDD -304(%r30),%r9
- LDD -312(%r30),%r8
- LDD -320(%r30),%r7
- LDD -328(%r30),%r6
- LDD -336(%r30),%r5
- LDD -344(%r30),%r4
- BVE (%r2)
- .EXIT
- LDD,MB -352(%r30),%r3
-
-bn_div_err_case
- MFIA %r6
- ADDIL L'bn_div_words-bn_div_err_case,%r6,%r1
- LDO R'bn_div_words-bn_div_err_case(%r1),%r6
- ADDIL LT'__iob,%r27,%r1
- LDD RT'__iob(%r1),%r26
- ADDIL L'C$4-bn_div_words,%r6,%r1
- LDO R'C$4-bn_div_words(%r1),%r25
- LDO 64(%r26),%r26
- .CALL ;in=24,25,26,29;out=28;
- B,L fprintf,%r2
- LDO -48(%r30),%r29
- LDD -288(%r30),%r27
- .CALL ;in=29;
- B,L abort,%r2
- LDO -48(%r30),%r29
- LDD -288(%r30),%r27
- B $D0
- LDD -368(%r30),%r2
- .PROCEND ;in=24,25,26,29;out=28;
-
-;----------------------------------------------------------------------------
-;
-; Registers to hold 64-bit values to manipulate. The "L" part
-; of the register corresponds to the upper 32-bits, while the "R"
-; part corresponds to the lower 32-bits
-;
-; Note, that when using b6 and b7, the code must save these before
-; using them because they are callee save registers
-;
-;
-; Floating point registers to use to save values that
-; are manipulated. These don't collide with ftemp1-6 and
-; are all caller save registers
-;
-a0 .reg %fr22
-a0L .reg %fr22L
-a0R .reg %fr22R
-
-a1 .reg %fr23
-a1L .reg %fr23L
-a1R .reg %fr23R
-
-a2 .reg %fr24
-a2L .reg %fr24L
-a2R .reg %fr24R
-
-a3 .reg %fr25
-a3L .reg %fr25L
-a3R .reg %fr25R
-
-a4 .reg %fr26
-a4L .reg %fr26L
-a4R .reg %fr26R
-
-a5 .reg %fr27
-a5L .reg %fr27L
-a5R .reg %fr27R
-
-a6 .reg %fr28
-a6L .reg %fr28L
-a6R .reg %fr28R
-
-a7 .reg %fr29
-a7L .reg %fr29L
-a7R .reg %fr29R
-
-b0 .reg %fr30
-b0L .reg %fr30L
-b0R .reg %fr30R
-
-b1 .reg %fr31
-b1L .reg %fr31L
-b1R .reg %fr31R
-
-;
-; Temporary floating point variables, these are all caller save
-; registers
-;
-ftemp1 .reg %fr4
-ftemp2 .reg %fr5
-ftemp3 .reg %fr6
-ftemp4 .reg %fr7
-
-;
-; The B set of registers when used.
-;
-
-b2 .reg %fr8
-b2L .reg %fr8L
-b2R .reg %fr8R
-
-b3 .reg %fr9
-b3L .reg %fr9L
-b3R .reg %fr9R
-
-b4 .reg %fr10
-b4L .reg %fr10L
-b4R .reg %fr10R
-
-b5 .reg %fr11
-b5L .reg %fr11L
-b5R .reg %fr11R
-
-b6 .reg %fr12
-b6L .reg %fr12L
-b6R .reg %fr12R
-
-b7 .reg %fr13
-b7L .reg %fr13L
-b7R .reg %fr13R
-
-c1 .reg %r21 ; only reg
-temp1 .reg %r20 ; only reg
-temp2 .reg %r19 ; only reg
-temp3 .reg %r31 ; only reg
-
-m1 .reg %r28
-c2 .reg %r23
-high_one .reg %r1
-ht .reg %r6
-lt .reg %r5
-m .reg %r4
-c3 .reg %r3
-
-SQR_ADD_C .macro A0L,A0R,C1,C2,C3
- XMPYU A0L,A0R,ftemp1 ; m
- FSTD ftemp1,-24(%sp) ; store m
-
- XMPYU A0R,A0R,ftemp2 ; lt
- FSTD ftemp2,-16(%sp) ; store lt
-
- XMPYU A0L,A0L,ftemp3 ; ht
- FSTD ftemp3,-8(%sp) ; store ht
-
- LDD -24(%sp),m ; load m
- AND m,high_mask,temp2 ; m & Mask
- DEPD,Z m,30,31,temp3 ; m << 32+1
- LDD -16(%sp),lt ; lt
-
- LDD -8(%sp),ht ; ht
- EXTRD,U temp2,32,33,temp1 ; temp1 = m&Mask >> 32-1
- ADD temp3,lt,lt ; lt = lt+m
- ADD,L ht,temp1,ht ; ht += temp1
- ADD,DC ht,%r0,ht ; ht++
-
- ADD C1,lt,C1 ; c1=c1+lt
- ADD,DC ht,%r0,ht ; ht++
-
- ADD C2,ht,C2 ; c2=c2+ht
- ADD,DC C3,%r0,C3 ; c3++
-.endm
-
-SQR_ADD_C2 .macro A0L,A0R,A1L,A1R,C1,C2,C3
- XMPYU A0L,A1R,ftemp1 ; m1 = bl*ht
- FSTD ftemp1,-16(%sp) ;
- XMPYU A0R,A1L,ftemp2 ; m = bh*lt
- FSTD ftemp2,-8(%sp) ;
- XMPYU A0R,A1R,ftemp3 ; lt = bl*lt
- FSTD ftemp3,-32(%sp)
- XMPYU A0L,A1L,ftemp4 ; ht = bh*ht
- FSTD ftemp4,-24(%sp) ;
-
- LDD -8(%sp),m ; r21 = m
- LDD -16(%sp),m1 ; r19 = m1
- ADD,L m,m1,m ; m+m1
-
- DEPD,Z m,31,32,temp3 ; (m+m1<<32)
- LDD -24(%sp),ht ; r24 = ht
-
- CMPCLR,*>>= m,m1,%r0 ; if (m < m1)
- ADD,L ht,high_one,ht ; ht+=high_one
-
- EXTRD,U m,31,32,temp1 ; m >> 32
- LDD -32(%sp),lt ; lt
- ADD,L ht,temp1,ht ; ht+= m>>32
- ADD lt,temp3,lt ; lt = lt+m1
- ADD,DC ht,%r0,ht ; ht++
-
- ADD ht,ht,ht ; ht=ht+ht;
- ADD,DC C3,%r0,C3 ; add in carry (c3++)
-
- ADD lt,lt,lt ; lt=lt+lt;
- ADD,DC ht,%r0,ht ; add in carry (ht++)
-
- ADD C1,lt,C1 ; c1=c1+lt
- ADD,DC,*NUV ht,%r0,ht ; add in carry (ht++)
- LDO 1(C3),C3 ; bump c3 if overflow,nullify otherwise
-
- ADD C2,ht,C2 ; c2 = c2 + ht
- ADD,DC C3,%r0,C3 ; add in carry (c3++)
-.endm
-
-;
-;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
-; arg0 = r_ptr
-; arg1 = a_ptr
-;
-
-bn_sqr_comba8
- .PROC
- .CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
- .EXPORT bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
- .ENTRY
- .align 64
-
- STD %r3,0(%sp) ; save r3
- STD %r4,8(%sp) ; save r4
- STD %r5,16(%sp) ; save r5
- STD %r6,24(%sp) ; save r6
-
- ;
- ; Zero out carries
- ;
- COPY %r0,c1
- COPY %r0,c2
- COPY %r0,c3
-
- LDO 128(%sp),%sp ; bump stack
- DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
- DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
-
- ;
- ; Load up all of the values we are going to use
- ;
- FLDD 0(a_ptr),a0
- FLDD 8(a_ptr),a1
- FLDD 16(a_ptr),a2
- FLDD 24(a_ptr),a3
- FLDD 32(a_ptr),a4
- FLDD 40(a_ptr),a5
- FLDD 48(a_ptr),a6
- FLDD 56(a_ptr),a7
-
- SQR_ADD_C a0L,a0R,c1,c2,c3
- STD c1,0(r_ptr) ; r[0] = c1;
- COPY %r0,c1
-
- SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
- STD c2,8(r_ptr) ; r[1] = c2;
- COPY %r0,c2
-
- SQR_ADD_C a1L,a1R,c3,c1,c2
- SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
- STD c3,16(r_ptr) ; r[2] = c3;
- COPY %r0,c3
-
- SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
- SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
- STD c1,24(r_ptr) ; r[3] = c1;
- COPY %r0,c1
-
- SQR_ADD_C a2L,a2R,c2,c3,c1
- SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
- SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1
- STD c2,32(r_ptr) ; r[4] = c2;
- COPY %r0,c2
-
- SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2
- SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2
- SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
- STD c3,40(r_ptr) ; r[5] = c3;
- COPY %r0,c3
-
- SQR_ADD_C a3L,a3R,c1,c2,c3
- SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3
- SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3
- SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3
- STD c1,48(r_ptr) ; r[6] = c1;
- COPY %r0,c1
-
- SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1
- SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1
- SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1
- SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1
- STD c2,56(r_ptr) ; r[7] = c2;
- COPY %r0,c2
-
- SQR_ADD_C a4L,a4R,c3,c1,c2
- SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2
- SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2
- SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2
- STD c3,64(r_ptr) ; r[8] = c3;
- COPY %r0,c3
-
- SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3
- SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3
- SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3
- STD c1,72(r_ptr) ; r[9] = c1;
- COPY %r0,c1
-
- SQR_ADD_C a5L,a5R,c2,c3,c1
- SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1
- SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1
- STD c2,80(r_ptr) ; r[10] = c2;
- COPY %r0,c2
-
- SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2
- SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2
- STD c3,88(r_ptr) ; r[11] = c3;
- COPY %r0,c3
-
- SQR_ADD_C a6L,a6R,c1,c2,c3
- SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3
- STD c1,96(r_ptr) ; r[12] = c1;
- COPY %r0,c1
-
- SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1
- STD c2,104(r_ptr) ; r[13] = c2;
- COPY %r0,c2
-
- SQR_ADD_C a7L,a7R,c3,c1,c2
- STD c3, 112(r_ptr) ; r[14] = c3
- STD c1, 120(r_ptr) ; r[15] = c1
-
- .EXIT
- LDD -104(%sp),%r6 ; restore r6
- LDD -112(%sp),%r5 ; restore r5
- LDD -120(%sp),%r4 ; restore r4
- BVE (%rp)
- LDD,MB -128(%sp),%r3
-
- .PROCEND
-
-;-----------------------------------------------------------------------------
-;
-;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
-; arg0 = r_ptr
-; arg1 = a_ptr
-;
-
-bn_sqr_comba4
- .proc
- .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
- .EXPORT bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
- .entry
- .align 64
- STD %r3,0(%sp) ; save r3
- STD %r4,8(%sp) ; save r4
- STD %r5,16(%sp) ; save r5
- STD %r6,24(%sp) ; save r6
-
- ;
- ; Zero out carries
- ;
- COPY %r0,c1
- COPY %r0,c2
- COPY %r0,c3
-
- LDO 128(%sp),%sp ; bump stack
- DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L
- DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
-
- ;
- ; Load up all of the values we are going to use
- ;
- FLDD 0(a_ptr),a0
- FLDD 8(a_ptr),a1
- FLDD 16(a_ptr),a2
- FLDD 24(a_ptr),a3
- FLDD 32(a_ptr),a4
- FLDD 40(a_ptr),a5
- FLDD 48(a_ptr),a6
- FLDD 56(a_ptr),a7
-
- SQR_ADD_C a0L,a0R,c1,c2,c3
-
- STD c1,0(r_ptr) ; r[0] = c1;
- COPY %r0,c1
-
- SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
-
- STD c2,8(r_ptr) ; r[1] = c2;
- COPY %r0,c2
-
- SQR_ADD_C a1L,a1R,c3,c1,c2
- SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
-
- STD c3,16(r_ptr) ; r[2] = c3;
- COPY %r0,c3
-
- SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
- SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
-
- STD c1,24(r_ptr) ; r[3] = c1;
- COPY %r0,c1
-
- SQR_ADD_C a2L,a2R,c2,c3,c1
- SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
-
- STD c2,32(r_ptr) ; r[4] = c2;
- COPY %r0,c2
-
- SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
- STD c3,40(r_ptr) ; r[5] = c3;
- COPY %r0,c3
-
- SQR_ADD_C a3L,a3R,c1,c2,c3
- STD c1,48(r_ptr) ; r[6] = c1;
- STD c2,56(r_ptr) ; r[7] = c2;
-
- .EXIT
- LDD -104(%sp),%r6 ; restore r6
- LDD -112(%sp),%r5 ; restore r5
- LDD -120(%sp),%r4 ; restore r4
- BVE (%rp)
- LDD,MB -128(%sp),%r3
-
- .PROCEND
-
-
-;---------------------------------------------------------------------------
-
-MUL_ADD_C .macro A0L,A0R,B0L,B0R,C1,C2,C3
- XMPYU A0L,B0R,ftemp1 ; m1 = bl*ht
- FSTD ftemp1,-16(%sp) ;
- XMPYU A0R,B0L,ftemp2 ; m = bh*lt
- FSTD ftemp2,-8(%sp) ;
- XMPYU A0R,B0R,ftemp3 ; lt = bl*lt
- FSTD ftemp3,-32(%sp)
- XMPYU A0L,B0L,ftemp4 ; ht = bh*ht
- FSTD ftemp4,-24(%sp) ;
-
- LDD -8(%sp),m ; r21 = m
- LDD -16(%sp),m1 ; r19 = m1
- ADD,L m,m1,m ; m+m1
-
- DEPD,Z m,31,32,temp3 ; (m+m1<<32)
- LDD -24(%sp),ht ; r24 = ht
-
- CMPCLR,*>>= m,m1,%r0 ; if (m < m1)
- ADD,L ht,high_one,ht ; ht+=high_one
-
- EXTRD,U m,31,32,temp1 ; m >> 32
- LDD -32(%sp),lt ; lt
- ADD,L ht,temp1,ht ; ht+= m>>32
- ADD lt,temp3,lt ; lt = lt+m1
- ADD,DC ht,%r0,ht ; ht++
-
- ADD C1,lt,C1 ; c1=c1+lt
- ADD,DC ht,%r0,ht ; bump c3 if overflow,nullify otherwise
-
- ADD C2,ht,C2 ; c2 = c2 + ht
- ADD,DC C3,%r0,C3 ; add in carry (c3++)
-.endm
-
-
-;
-;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
-; arg0 = r_ptr
-; arg1 = a_ptr
-; arg2 = b_ptr
-;
-
-bn_mul_comba8
- .proc
- .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
- .EXPORT bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
- .entry
- .align 64
-
- STD %r3,0(%sp) ; save r3
- STD %r4,8(%sp) ; save r4
- STD %r5,16(%sp) ; save r5
- STD %r6,24(%sp) ; save r6
- FSTD %fr12,32(%sp) ; save r6
- FSTD %fr13,40(%sp) ; save r7
-
- ;
- ; Zero out carries
- ;
- COPY %r0,c1
- COPY %r0,c2
- COPY %r0,c3
-
- LDO 128(%sp),%sp ; bump stack
- DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
-
- ;
- ; Load up all of the values we are going to use
- ;
- FLDD 0(a_ptr),a0
- FLDD 8(a_ptr),a1
- FLDD 16(a_ptr),a2
- FLDD 24(a_ptr),a3
- FLDD 32(a_ptr),a4
- FLDD 40(a_ptr),a5
- FLDD 48(a_ptr),a6
- FLDD 56(a_ptr),a7
-
- FLDD 0(b_ptr),b0
- FLDD 8(b_ptr),b1
- FLDD 16(b_ptr),b2
- FLDD 24(b_ptr),b3
- FLDD 32(b_ptr),b4
- FLDD 40(b_ptr),b5
- FLDD 48(b_ptr),b6
- FLDD 56(b_ptr),b7
-
- MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
- STD c1,0(r_ptr)
- COPY %r0,c1
-
- MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
- MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
- STD c2,8(r_ptr)
- COPY %r0,c2
-
- MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
- MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
- MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
- STD c3,16(r_ptr)
- COPY %r0,c3
-
- MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
- MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
- MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
- MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
- STD c1,24(r_ptr)
- COPY %r0,c1
-
- MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1
- MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
- MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
- MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
- MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1
- STD c2,32(r_ptr)
- COPY %r0,c2
-
- MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2
- MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2
- MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
- MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
- MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2
- MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2
- STD c3,40(r_ptr)
- COPY %r0,c3
-
- MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3
- MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3
- MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3
- MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
- MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3
- MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3
- MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3
- STD c1,48(r_ptr)
- COPY %r0,c1
-
- MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1
- MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1
- MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1
- MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1
- MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1
- MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1
- MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1
- MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1
- STD c2,56(r_ptr)
- COPY %r0,c2
-
- MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2
- MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2
- MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2
- MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2
- MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2
- MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2
- MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2
- STD c3,64(r_ptr)
- COPY %r0,c3
-
- MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3
- MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3
- MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3
- MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3
- MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3
- MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3
- STD c1,72(r_ptr)
- COPY %r0,c1
-
- MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1
- MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1
- MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1
- MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1
- MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1
- STD c2,80(r_ptr)
- COPY %r0,c2
-
- MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2
- MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2
- MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2
- MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2
- STD c3,88(r_ptr)
- COPY %r0,c3
-
- MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3
- MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3
- MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3
- STD c1,96(r_ptr)
- COPY %r0,c1
-
- MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1
- MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1
- STD c2,104(r_ptr)
- COPY %r0,c2
-
- MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2
- STD c3,112(r_ptr)
- STD c1,120(r_ptr)
-
- .EXIT
- FLDD -88(%sp),%fr13
- FLDD -96(%sp),%fr12
- LDD -104(%sp),%r6 ; restore r6
- LDD -112(%sp),%r5 ; restore r5
- LDD -120(%sp),%r4 ; restore r4
- BVE (%rp)
- LDD,MB -128(%sp),%r3
-
- .PROCEND
-
-;-----------------------------------------------------------------------------
-;
-;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
-; arg0 = r_ptr
-; arg1 = a_ptr
-; arg2 = b_ptr
-;
-
-bn_mul_comba4
- .proc
- .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
- .EXPORT bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
- .entry
- .align 64
-
- STD %r3,0(%sp) ; save r3
- STD %r4,8(%sp) ; save r4
- STD %r5,16(%sp) ; save r5
- STD %r6,24(%sp) ; save r6
- FSTD %fr12,32(%sp) ; save r6
- FSTD %fr13,40(%sp) ; save r7
-
- ;
- ; Zero out carries
- ;
- COPY %r0,c1
- COPY %r0,c2
- COPY %r0,c3
-
- LDO 128(%sp),%sp ; bump stack
- DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32
-
- ;
- ; Load up all of the values we are going to use
- ;
- FLDD 0(a_ptr),a0
- FLDD 8(a_ptr),a1
- FLDD 16(a_ptr),a2
- FLDD 24(a_ptr),a3
-
- FLDD 0(b_ptr),b0
- FLDD 8(b_ptr),b1
- FLDD 16(b_ptr),b2
- FLDD 24(b_ptr),b3
-
- MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
- STD c1,0(r_ptr)
- COPY %r0,c1
-
- MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
- MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
- STD c2,8(r_ptr)
- COPY %r0,c2
-
- MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
- MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
- MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
- STD c3,16(r_ptr)
- COPY %r0,c3
-
- MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
- MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
- MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
- MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
- STD c1,24(r_ptr)
- COPY %r0,c1
-
- MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
- MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
- MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
- STD c2,32(r_ptr)
- COPY %r0,c2
-
- MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
- MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
- STD c3,40(r_ptr)
- COPY %r0,c3
-
- MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
- STD c1,48(r_ptr)
- STD c2,56(r_ptr)
-
- .EXIT
- FLDD -88(%sp),%fr13
- FLDD -96(%sp),%fr12
- LDD -104(%sp),%r6 ; restore r6
- LDD -112(%sp),%r5 ; restore r5
- LDD -120(%sp),%r4 ; restore r4
- BVE (%rp)
- LDD,MB -128(%sp),%r3
-
- .PROCEND
-
-
- .SPACE $TEXT$
- .SUBSPA $CODE$
- .SPACE $PRIVATE$,SORT=16
- .IMPORT $global$,DATA
- .SPACE $TEXT$
- .SUBSPA $CODE$
- .SUBSPA $LIT$,ACCESS=0x2c
-C$4
- .ALIGN 8
- .STRINGZ "Division would overflow (%d)\n"
- .END
diff --git a/deps/openssl/openssl/crypto/bn/asm/parisc-mont.pl b/deps/openssl/openssl/crypto/bn/asm/parisc-mont.pl
index cd9926a25f..aa9f626ed2 100644
--- a/deps/openssl/openssl/crypto/bn/asm/parisc-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/parisc-mont.pl
@@ -8,7 +8,7 @@
# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
@@ -21,7 +21,7 @@
# optimal in respect to instruction set capabilities. Fair comparison
# with vendor compiler is problematic, because OpenSSL doesn't define
# BN_LLONG [presumably] for historical reasons, which drives compiler
-# toward 4 times 16x16=32-bit multiplicatons [plus complementary
+# toward 4 times 16x16=32-bit multiplications [plus complementary
# shifts and additions] instead. This means that you should observe
# several times improvement over code generated by vendor compiler
# for PA-RISC 1.1, but the "baseline" is far from optimal. The actual
@@ -864,7 +864,7 @@ L\$copy_pa11
comiclr,= 0,$hi1,%r0
copy $ti0,$hi0
addib,<> 4,$idx,L\$copy_pa11
- stws,ma $hi0,4($rp)
+ stws,ma $hi0,4($rp)
nop ; alignment
L\$done
@@ -984,6 +984,11 @@ sub assemble {
ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
}
+if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
+ =~ /GNU assembler/) {
+ $gnuas = 1;
+}
+
foreach (split("\n",$code)) {
s/\`([^\`]*)\`/eval $1/ge;
# flip word order in 64-bit mode...
@@ -991,7 +996,10 @@ foreach (split("\n",$code)) {
# assemble 2.0 instructions in 32-bit mode...
s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($BN_SZ==4);
- s/\bbv\b/bve/gm if ($SIZE_T==8);
+ s/(\.LEVEL\s+2\.0)W/$1w/ if ($gnuas && $SIZE_T==8);
+ s/\.SPACE\s+\$TEXT\$/.text/ if ($gnuas && $SIZE_T==8);
+ s/\.SUBSPA.*// if ($gnuas && $SIZE_T==8);
+ s/\bbv\b/bve/ if ($SIZE_T==8);
print $_,"\n";
}
diff --git a/deps/openssl/openssl/crypto/bn/asm/ppc-mont.pl b/deps/openssl/openssl/crypto/bn/asm/ppc-mont.pl
index 9d14a12156..ec7e019a43 100644
--- a/deps/openssl/openssl/crypto/bn/asm/ppc-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/ppc-mont.pl
@@ -8,7 +8,7 @@
# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
@@ -26,11 +26,21 @@
# So far RSA *sign* performance improvement over pre-bn_mul_mont asm
# for 64-bit application running on PPC970/G5 is:
#
-# 512-bit +65%
+# 512-bit +65%
# 1024-bit +35%
# 2048-bit +18%
# 4096-bit +4%
+# September 2016
+#
+# Add multiplication procedure operating on lengths divisible by 4
+# and squaring procedure operating on lengths divisible by 8. Length
+# is expressed in number of limbs. RSA private key operations are
+# ~35-50% faster (more for longer keys) on contemporary high-end POWER
+# processors in 64-bit builds, [mysteriously enough] more in 32-bit
+# builds. On low-end 32-bit processors performance improvement turned
+# to be marginal...
+
$flavour = shift;
if ($flavour =~ /32/) {
@@ -49,7 +59,8 @@ if ($flavour =~ /32/) {
$UMULL= "mullw"; # unsigned multiply low
$UMULH= "mulhwu"; # unsigned multiply high
$UCMP= "cmplw"; # unsigned compare
- $SHRI= "srwi"; # unsigned shift right by immediate
+ $SHRI= "srwi"; # unsigned shift right by immediate
+ $SHLI= "slwi"; # unsigned shift left by immediate
$PUSH= $ST;
$POP= $LD;
} elsif ($flavour =~ /64/) {
@@ -69,7 +80,8 @@ if ($flavour =~ /32/) {
$UMULL= "mulld"; # unsigned multiply low
$UMULH= "mulhdu"; # unsigned multiply high
$UCMP= "cmpld"; # unsigned compare
- $SHRI= "srdi"; # unsigned shift right by immediate
+ $SHRI= "srdi"; # unsigned shift right by immediate
+ $SHLI= "sldi"; # unsigned shift left by immediate
$PUSH= $ST;
$POP= $LD;
} else { die "nonsense $flavour"; }
@@ -86,43 +98,44 @@ open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
$sp="r1";
$toc="r2";
-$rp="r3"; $ovf="r3";
+$rp="r3";
$ap="r4";
$bp="r5";
$np="r6";
$n0="r7";
$num="r8";
-$rp="r9"; # $rp is reassigned
-$aj="r10";
-$nj="r11";
-$tj="r12";
+
+{
+my $ovf=$rp;
+my $rp="r9"; # $rp is reassigned
+my $aj="r10";
+my $nj="r11";
+my $tj="r12";
# non-volatile registers
-$i="r20";
-$j="r21";
-$tp="r22";
-$m0="r23";
-$m1="r24";
-$lo0="r25";
-$hi0="r26";
-$lo1="r27";
-$hi1="r28";
-$alo="r29";
-$ahi="r30";
-$nlo="r31";
+my $i="r20";
+my $j="r21";
+my $tp="r22";
+my $m0="r23";
+my $m1="r24";
+my $lo0="r25";
+my $hi0="r26";
+my $lo1="r27";
+my $hi1="r28";
+my $alo="r29";
+my $ahi="r30";
+my $nlo="r31";
#
-$nhi="r0";
+my $nhi="r0";
$code=<<___;
.machine "any"
.text
.globl .bn_mul_mont_int
-.align 4
+.align 5
.bn_mul_mont_int:
- cmpwi $num,4
mr $rp,r3 ; $rp is reassigned
li r3,0
- bltlr
___
$code.=<<___ if ($BNSZ==4);
cmpwi $num,32 ; longer key performance is not better
@@ -334,7 +347,1641 @@ Lcopy: ; conditional copy
.byte 0,12,4,0,0x80,12,6,0
.long 0
.size .bn_mul_mont_int,.-.bn_mul_mont_int
+___
+}
+if (1) {
+my ($a0,$a1,$a2,$a3,
+ $t0,$t1,$t2,$t3,
+ $m0,$m1,$m2,$m3,
+ $acc0,$acc1,$acc2,$acc3,$acc4,
+ $bi,$mi,$tp,$ap_end,$cnt) = map("r$_",(9..12,14..31));
+my ($carry,$zero) = ($rp,"r0");
+
+# sp----------->+-------------------------------+
+# | saved sp |
+# +-------------------------------+
+# . .
+# +8*size_t +-------------------------------+
+# | 4 "n0*t0" |
+# . .
+# . .
+# +12*size_t +-------------------------------+
+# | size_t tmp[num] |
+# . .
+# . .
+# . .
+# +-------------------------------+
+# | topmost carry |
+# . .
+# -18*size_t +-------------------------------+
+# | 18 saved gpr, r14-r31 |
+# . .
+# . .
+# +-------------------------------+
+$code.=<<___;
+.globl .bn_mul4x_mont_int
+.align 5
+.bn_mul4x_mont_int:
+ andi. r0,$num,7
+ bne .Lmul4x_do
+ $UCMP $ap,$bp
+ bne .Lmul4x_do
+ b .Lsqr8x_do
+.Lmul4x_do:
+ slwi $num,$num,`log($SIZE_T)/log(2)`
+ mr $a0,$sp
+ li $a1,-32*$SIZE_T
+ sub $a1,$a1,$num
+ $STUX $sp,$sp,$a1 # alloca
+
+ $PUSH r14,-$SIZE_T*18($a0)
+ $PUSH r15,-$SIZE_T*17($a0)
+ $PUSH r16,-$SIZE_T*16($a0)
+ $PUSH r17,-$SIZE_T*15($a0)
+ $PUSH r18,-$SIZE_T*14($a0)
+ $PUSH r19,-$SIZE_T*13($a0)
+ $PUSH r20,-$SIZE_T*12($a0)
+ $PUSH r21,-$SIZE_T*11($a0)
+ $PUSH r22,-$SIZE_T*10($a0)
+ $PUSH r23,-$SIZE_T*9($a0)
+ $PUSH r24,-$SIZE_T*8($a0)
+ $PUSH r25,-$SIZE_T*7($a0)
+ $PUSH r26,-$SIZE_T*6($a0)
+ $PUSH r27,-$SIZE_T*5($a0)
+ $PUSH r28,-$SIZE_T*4($a0)
+ $PUSH r29,-$SIZE_T*3($a0)
+ $PUSH r30,-$SIZE_T*2($a0)
+ $PUSH r31,-$SIZE_T*1($a0)
+
+ subi $ap,$ap,$SIZE_T # bias by -1
+ subi $np,$np,$SIZE_T # bias by -1
+ subi $rp,$rp,$SIZE_T # bias by -1
+ $LD $n0,0($n0) # *n0
+
+ add $t0,$bp,$num
+ add $ap_end,$ap,$num
+ subi $t0,$t0,$SIZE_T*4 # &b[num-4]
+
+ $LD $bi,$SIZE_T*0($bp) # b[0]
+ li $acc0,0
+ $LD $a0,$SIZE_T*1($ap) # a[0..3]
+ li $acc1,0
+ $LD $a1,$SIZE_T*2($ap)
+ li $acc2,0
+ $LD $a2,$SIZE_T*3($ap)
+ li $acc3,0
+ $LDU $a3,$SIZE_T*4($ap)
+ $LD $m0,$SIZE_T*1($np) # n[0..3]
+ $LD $m1,$SIZE_T*2($np)
+ $LD $m2,$SIZE_T*3($np)
+ $LDU $m3,$SIZE_T*4($np)
+
+ $PUSH $rp,$SIZE_T*6($sp) # offload rp and &b[num-4]
+ $PUSH $t0,$SIZE_T*7($sp)
+ li $carry,0
+ addic $tp,$sp,$SIZE_T*7 # &t[-1], clear carry bit
+ li $cnt,0
+ li $zero,0
+ b .Loop_mul4x_1st_reduction
+
+.align 5
+.Loop_mul4x_1st_reduction:
+ $UMULL $t0,$a0,$bi # lo(a[0..3]*b[0])
+ addze $carry,$carry # modulo-scheduled
+ $UMULL $t1,$a1,$bi
+ addi $cnt,$cnt,$SIZE_T
+ $UMULL $t2,$a2,$bi
+ andi. $cnt,$cnt,$SIZE_T*4-1
+ $UMULL $t3,$a3,$bi
+ addc $acc0,$acc0,$t0
+ $UMULH $t0,$a0,$bi # hi(a[0..3]*b[0])
+ adde $acc1,$acc1,$t1
+ $UMULH $t1,$a1,$bi
+ adde $acc2,$acc2,$t2
+ $UMULL $mi,$acc0,$n0 # t[0]*n0
+ adde $acc3,$acc3,$t3
+ $UMULH $t2,$a2,$bi
+ addze $acc4,$zero
+ $UMULH $t3,$a3,$bi
+ $LDX $bi,$bp,$cnt # next b[i] (or b[0])
+ addc $acc1,$acc1,$t0
+ # (*) mul $t0,$m0,$mi # lo(n[0..3]*t[0]*n0)
+ $STU $mi,$SIZE_T($tp) # put aside t[0]*n0 for tail processing
+ adde $acc2,$acc2,$t1
+ $UMULL $t1,$m1,$mi
+ adde $acc3,$acc3,$t2
+ $UMULL $t2,$m2,$mi
+ adde $acc4,$acc4,$t3 # can't overflow
+ $UMULL $t3,$m3,$mi
+ # (*) addc $acc0,$acc0,$t0
+ # (*) As for removal of first multiplication and addition
+ # instructions. The outcome of first addition is
+ # guaranteed to be zero, which leaves two computationally
+ # significant outcomes: it either carries or not. Then
+ # question is when does it carry? Is there alternative
+ # way to deduce it? If you follow operations, you can
+ # observe that condition for carry is quite simple:
+ # $acc0 being non-zero. So that carry can be calculated
+ # by adding -1 to $acc0. That's what next instruction does.
+ addic $acc0,$acc0,-1 # (*), discarded
+ $UMULH $t0,$m0,$mi # hi(n[0..3]*t[0]*n0)
+ adde $acc0,$acc1,$t1
+ $UMULH $t1,$m1,$mi
+ adde $acc1,$acc2,$t2
+ $UMULH $t2,$m2,$mi
+ adde $acc2,$acc3,$t3
+ $UMULH $t3,$m3,$mi
+ adde $acc3,$acc4,$carry
+ addze $carry,$zero
+ addc $acc0,$acc0,$t0
+ adde $acc1,$acc1,$t1
+ adde $acc2,$acc2,$t2
+ adde $acc3,$acc3,$t3
+ #addze $carry,$carry
+ bne .Loop_mul4x_1st_reduction
+
+ $UCMP $ap_end,$ap
+ beq .Lmul4x4_post_condition
+
+ $LD $a0,$SIZE_T*1($ap) # a[4..7]
+ $LD $a1,$SIZE_T*2($ap)
+ $LD $a2,$SIZE_T*3($ap)
+ $LDU $a3,$SIZE_T*4($ap)
+ $LD $mi,$SIZE_T*8($sp) # a[0]*n0
+ $LD $m0,$SIZE_T*1($np) # n[4..7]
+ $LD $m1,$SIZE_T*2($np)
+ $LD $m2,$SIZE_T*3($np)
+ $LDU $m3,$SIZE_T*4($np)
+ b .Loop_mul4x_1st_tail
+
+.align 5
+.Loop_mul4x_1st_tail:
+ $UMULL $t0,$a0,$bi # lo(a[4..7]*b[i])
+ addze $carry,$carry # modulo-scheduled
+ $UMULL $t1,$a1,$bi
+ addi $cnt,$cnt,$SIZE_T
+ $UMULL $t2,$a2,$bi
+ andi. $cnt,$cnt,$SIZE_T*4-1
+ $UMULL $t3,$a3,$bi
+ addc $acc0,$acc0,$t0
+ $UMULH $t0,$a0,$bi # hi(a[4..7]*b[i])
+ adde $acc1,$acc1,$t1
+ $UMULH $t1,$a1,$bi
+ adde $acc2,$acc2,$t2
+ $UMULH $t2,$a2,$bi
+ adde $acc3,$acc3,$t3
+ $UMULH $t3,$a3,$bi
+ addze $acc4,$zero
+ $LDX $bi,$bp,$cnt # next b[i] (or b[0])
+ addc $acc1,$acc1,$t0
+ $UMULL $t0,$m0,$mi # lo(n[4..7]*a[0]*n0)
+ adde $acc2,$acc2,$t1
+ $UMULL $t1,$m1,$mi
+ adde $acc3,$acc3,$t2
+ $UMULL $t2,$m2,$mi
+ adde $acc4,$acc4,$t3 # can't overflow
+ $UMULL $t3,$m3,$mi
+ addc $acc0,$acc0,$t0
+ $UMULH $t0,$m0,$mi # hi(n[4..7]*a[0]*n0)
+ adde $acc1,$acc1,$t1
+ $UMULH $t1,$m1,$mi
+ adde $acc2,$acc2,$t2
+ $UMULH $t2,$m2,$mi
+ adde $acc3,$acc3,$t3
+ adde $acc4,$acc4,$carry
+ $UMULH $t3,$m3,$mi
+ addze $carry,$zero
+ addi $mi,$sp,$SIZE_T*8
+ $LDX $mi,$mi,$cnt # next t[0]*n0
+ $STU $acc0,$SIZE_T($tp) # word of result
+ addc $acc0,$acc1,$t0
+ adde $acc1,$acc2,$t1
+ adde $acc2,$acc3,$t2
+ adde $acc3,$acc4,$t3
+ #addze $carry,$carry
+ bne .Loop_mul4x_1st_tail
+
+ sub $t1,$ap_end,$num # rewinded $ap
+ $UCMP $ap_end,$ap # done yet?
+ beq .Lmul4x_proceed
+
+ $LD $a0,$SIZE_T*1($ap)
+ $LD $a1,$SIZE_T*2($ap)
+ $LD $a2,$SIZE_T*3($ap)
+ $LDU $a3,$SIZE_T*4($ap)
+ $LD $m0,$SIZE_T*1($np)
+ $LD $m1,$SIZE_T*2($np)
+ $LD $m2,$SIZE_T*3($np)
+ $LDU $m3,$SIZE_T*4($np)
+ b .Loop_mul4x_1st_tail
+
+.align 5
+.Lmul4x_proceed:
+ $LDU $bi,$SIZE_T*4($bp) # *++b
+ addze $carry,$carry # topmost carry
+ $LD $a0,$SIZE_T*1($t1)
+ $LD $a1,$SIZE_T*2($t1)
+ $LD $a2,$SIZE_T*3($t1)
+ $LD $a3,$SIZE_T*4($t1)
+ addi $ap,$t1,$SIZE_T*4
+ sub $np,$np,$num # rewind np
+
+ $ST $acc0,$SIZE_T*1($tp) # result
+ $ST $acc1,$SIZE_T*2($tp)
+ $ST $acc2,$SIZE_T*3($tp)
+ $ST $acc3,$SIZE_T*4($tp)
+ $ST $carry,$SIZE_T*5($tp) # save topmost carry
+ $LD $acc0,$SIZE_T*12($sp) # t[0..3]
+ $LD $acc1,$SIZE_T*13($sp)
+ $LD $acc2,$SIZE_T*14($sp)
+ $LD $acc3,$SIZE_T*15($sp)
+
+ $LD $m0,$SIZE_T*1($np) # n[0..3]
+ $LD $m1,$SIZE_T*2($np)
+ $LD $m2,$SIZE_T*3($np)
+ $LDU $m3,$SIZE_T*4($np)
+ addic $tp,$sp,$SIZE_T*7 # &t[-1], clear carry bit
+ li $carry,0
+ b .Loop_mul4x_reduction
+
+.align 5
+.Loop_mul4x_reduction:
+ $UMULL $t0,$a0,$bi # lo(a[0..3]*b[4])
+ addze $carry,$carry # modulo-scheduled
+ $UMULL $t1,$a1,$bi
+ addi $cnt,$cnt,$SIZE_T
+ $UMULL $t2,$a2,$bi
+ andi. $cnt,$cnt,$SIZE_T*4-1
+ $UMULL $t3,$a3,$bi
+ addc $acc0,$acc0,$t0
+ $UMULH $t0,$a0,$bi # hi(a[0..3]*b[4])
+ adde $acc1,$acc1,$t1
+ $UMULH $t1,$a1,$bi
+ adde $acc2,$acc2,$t2
+ $UMULL $mi,$acc0,$n0 # t[0]*n0
+ adde $acc3,$acc3,$t3
+ $UMULH $t2,$a2,$bi
+ addze $acc4,$zero
+ $UMULH $t3,$a3,$bi
+ $LDX $bi,$bp,$cnt # next b[i]
+ addc $acc1,$acc1,$t0
+ # (*) mul $t0,$m0,$mi
+ $STU $mi,$SIZE_T($tp) # put aside t[0]*n0 for tail processing
+ adde $acc2,$acc2,$t1
+ $UMULL $t1,$m1,$mi # lo(n[0..3]*t[0]*n0
+ adde $acc3,$acc3,$t2
+ $UMULL $t2,$m2,$mi
+ adde $acc4,$acc4,$t3 # can't overflow
+ $UMULL $t3,$m3,$mi
+ # (*) addc $acc0,$acc0,$t0
+ addic $acc0,$acc0,-1 # (*), discarded
+ $UMULH $t0,$m0,$mi # hi(n[0..3]*t[0]*n0
+ adde $acc0,$acc1,$t1
+ $UMULH $t1,$m1,$mi
+ adde $acc1,$acc2,$t2
+ $UMULH $t2,$m2,$mi
+ adde $acc2,$acc3,$t3
+ $UMULH $t3,$m3,$mi
+ adde $acc3,$acc4,$carry
+ addze $carry,$zero
+ addc $acc0,$acc0,$t0
+ adde $acc1,$acc1,$t1
+ adde $acc2,$acc2,$t2
+ adde $acc3,$acc3,$t3
+ #addze $carry,$carry
+ bne .Loop_mul4x_reduction
+
+ $LD $t0,$SIZE_T*5($tp) # t[4..7]
+ addze $carry,$carry
+ $LD $t1,$SIZE_T*6($tp)
+ $LD $t2,$SIZE_T*7($tp)
+ $LD $t3,$SIZE_T*8($tp)
+ $LD $a0,$SIZE_T*1($ap) # a[4..7]
+ $LD $a1,$SIZE_T*2($ap)
+ $LD $a2,$SIZE_T*3($ap)
+ $LDU $a3,$SIZE_T*4($ap)
+ addc $acc0,$acc0,$t0
+ adde $acc1,$acc1,$t1
+ adde $acc2,$acc2,$t2
+ adde $acc3,$acc3,$t3
+ #addze $carry,$carry
+
+ $LD $mi,$SIZE_T*8($sp) # t[0]*n0
+ $LD $m0,$SIZE_T*1($np) # n[4..7]
+ $LD $m1,$SIZE_T*2($np)
+ $LD $m2,$SIZE_T*3($np)
+ $LDU $m3,$SIZE_T*4($np)
+ b .Loop_mul4x_tail
+
+.align 5
+.Loop_mul4x_tail:
+ $UMULL $t0,$a0,$bi # lo(a[4..7]*b[4])
+ addze $carry,$carry # modulo-scheduled
+ $UMULL $t1,$a1,$bi
+ addi $cnt,$cnt,$SIZE_T
+ $UMULL $t2,$a2,$bi
+ andi. $cnt,$cnt,$SIZE_T*4-1
+ $UMULL $t3,$a3,$bi
+ addc $acc0,$acc0,$t0
+ $UMULH $t0,$a0,$bi # hi(a[4..7]*b[4])
+ adde $acc1,$acc1,$t1
+ $UMULH $t1,$a1,$bi
+ adde $acc2,$acc2,$t2
+ $UMULH $t2,$a2,$bi
+ adde $acc3,$acc3,$t3
+ $UMULH $t3,$a3,$bi
+ addze $acc4,$zero
+ $LDX $bi,$bp,$cnt # next b[i]
+ addc $acc1,$acc1,$t0
+ $UMULL $t0,$m0,$mi # lo(n[4..7]*t[0]*n0)
+ adde $acc2,$acc2,$t1
+ $UMULL $t1,$m1,$mi
+ adde $acc3,$acc3,$t2
+ $UMULL $t2,$m2,$mi
+ adde $acc4,$acc4,$t3 # can't overflow
+ $UMULL $t3,$m3,$mi
+ addc $acc0,$acc0,$t0
+ $UMULH $t0,$m0,$mi # hi(n[4..7]*t[0]*n0)
+ adde $acc1,$acc1,$t1
+ $UMULH $t1,$m1,$mi
+ adde $acc2,$acc2,$t2
+ $UMULH $t2,$m2,$mi
+ adde $acc3,$acc3,$t3
+ $UMULH $t3,$m3,$mi
+ adde $acc4,$acc4,$carry
+ addi $mi,$sp,$SIZE_T*8
+ $LDX $mi,$mi,$cnt # next a[0]*n0
+ addze $carry,$zero
+ $STU $acc0,$SIZE_T($tp) # word of result
+ addc $acc0,$acc1,$t0
+ adde $acc1,$acc2,$t1
+ adde $acc2,$acc3,$t2
+ adde $acc3,$acc4,$t3
+ #addze $carry,$carry
+ bne .Loop_mul4x_tail
+
+ $LD $t0,$SIZE_T*5($tp) # next t[i] or topmost carry
+ sub $t1,$np,$num # rewinded np?
+ addze $carry,$carry
+ $UCMP $ap_end,$ap # done yet?
+ beq .Loop_mul4x_break
+
+ $LD $t1,$SIZE_T*6($tp)
+ $LD $t2,$SIZE_T*7($tp)
+ $LD $t3,$SIZE_T*8($tp)
+ $LD $a0,$SIZE_T*1($ap)
+ $LD $a1,$SIZE_T*2($ap)
+ $LD $a2,$SIZE_T*3($ap)
+ $LDU $a3,$SIZE_T*4($ap)
+ addc $acc0,$acc0,$t0
+ adde $acc1,$acc1,$t1
+ adde $acc2,$acc2,$t2
+ adde $acc3,$acc3,$t3
+ #addze $carry,$carry
+
+ $LD $m0,$SIZE_T*1($np) # n[4..7]
+ $LD $m1,$SIZE_T*2($np)
+ $LD $m2,$SIZE_T*3($np)
+ $LDU $m3,$SIZE_T*4($np)
+ b .Loop_mul4x_tail
+
+.align 5
+.Loop_mul4x_break:
+ $POP $t2,$SIZE_T*6($sp) # pull rp and &b[num-4]
+ $POP $t3,$SIZE_T*7($sp)
+ addc $a0,$acc0,$t0 # accumulate topmost carry
+ $LD $acc0,$SIZE_T*12($sp) # t[0..3]
+ addze $a1,$acc1
+ $LD $acc1,$SIZE_T*13($sp)
+ addze $a2,$acc2
+ $LD $acc2,$SIZE_T*14($sp)
+ addze $a3,$acc3
+ $LD $acc3,$SIZE_T*15($sp)
+ addze $carry,$carry # topmost carry
+ $ST $a0,$SIZE_T*1($tp) # result
+ sub $ap,$ap_end,$num # rewind ap
+ $ST $a1,$SIZE_T*2($tp)
+ $ST $a2,$SIZE_T*3($tp)
+ $ST $a3,$SIZE_T*4($tp)
+ $ST $carry,$SIZE_T*5($tp) # store topmost carry
+
+ $LD $m0,$SIZE_T*1($t1) # n[0..3]
+ $LD $m1,$SIZE_T*2($t1)
+ $LD $m2,$SIZE_T*3($t1)
+ $LD $m3,$SIZE_T*4($t1)
+ addi $np,$t1,$SIZE_T*4
+ $UCMP $bp,$t3 # done yet?
+ beq .Lmul4x_post
+
+ $LDU $bi,$SIZE_T*4($bp)
+ $LD $a0,$SIZE_T*1($ap) # a[0..3]
+ $LD $a1,$SIZE_T*2($ap)
+ $LD $a2,$SIZE_T*3($ap)
+ $LDU $a3,$SIZE_T*4($ap)
+ li $carry,0
+ addic $tp,$sp,$SIZE_T*7 # &t[-1], clear carry bit
+ b .Loop_mul4x_reduction
+
+.align 5
+.Lmul4x_post:
+ # Final step. We see if result is larger than modulus, and
+ # if it is, subtract the modulus. But comparison implies
+ # subtraction. So we subtract modulus, see if it borrowed,
+ # and conditionally copy original value.
+ srwi $cnt,$num,`log($SIZE_T)/log(2)+2`
+ mr $bp,$t2 # &rp[-1]
+ subi $cnt,$cnt,1
+ mr $ap_end,$t2 # &rp[-1] copy
+ subfc $t0,$m0,$acc0
+ addi $tp,$sp,$SIZE_T*15
+ subfe $t1,$m1,$acc1
+
+ mtctr $cnt
+.Lmul4x_sub:
+ $LD $m0,$SIZE_T*1($np)
+ $LD $acc0,$SIZE_T*1($tp)
+ subfe $t2,$m2,$acc2
+ $LD $m1,$SIZE_T*2($np)
+ $LD $acc1,$SIZE_T*2($tp)
+ subfe $t3,$m3,$acc3
+ $LD $m2,$SIZE_T*3($np)
+ $LD $acc2,$SIZE_T*3($tp)
+ $LDU $m3,$SIZE_T*4($np)
+ $LDU $acc3,$SIZE_T*4($tp)
+ $ST $t0,$SIZE_T*1($bp)
+ $ST $t1,$SIZE_T*2($bp)
+ subfe $t0,$m0,$acc0
+ $ST $t2,$SIZE_T*3($bp)
+ $STU $t3,$SIZE_T*4($bp)
+ subfe $t1,$m1,$acc1
+ bdnz .Lmul4x_sub
+
+ $LD $a0,$SIZE_T*1($ap_end)
+ $ST $t0,$SIZE_T*1($bp)
+ $LD $t0,$SIZE_T*12($sp)
+ subfe $t2,$m2,$acc2
+ $LD $a1,$SIZE_T*2($ap_end)
+ $ST $t1,$SIZE_T*2($bp)
+ $LD $t1,$SIZE_T*13($sp)
+ subfe $t3,$m3,$acc3
+ subfe $carry,$zero,$carry # did it borrow?
+ addi $tp,$sp,$SIZE_T*12
+ $LD $a2,$SIZE_T*3($ap_end)
+ $ST $t2,$SIZE_T*3($bp)
+ $LD $t2,$SIZE_T*14($sp)
+ $LD $a3,$SIZE_T*4($ap_end)
+ $ST $t3,$SIZE_T*4($bp)
+ $LD $t3,$SIZE_T*15($sp)
+
+ mtctr $cnt
+.Lmul4x_cond_copy:
+ and $t0,$t0,$carry
+ andc $a0,$a0,$carry
+ $ST $zero,$SIZE_T*0($tp) # wipe stack clean
+ and $t1,$t1,$carry
+ andc $a1,$a1,$carry
+ $ST $zero,$SIZE_T*1($tp)
+ and $t2,$t2,$carry
+ andc $a2,$a2,$carry
+ $ST $zero,$SIZE_T*2($tp)
+ and $t3,$t3,$carry
+ andc $a3,$a3,$carry
+ $ST $zero,$SIZE_T*3($tp)
+ or $acc0,$t0,$a0
+ $LD $a0,$SIZE_T*5($ap_end)
+ $LD $t0,$SIZE_T*4($tp)
+ or $acc1,$t1,$a1
+ $LD $a1,$SIZE_T*6($ap_end)
+ $LD $t1,$SIZE_T*5($tp)
+ or $acc2,$t2,$a2
+ $LD $a2,$SIZE_T*7($ap_end)
+ $LD $t2,$SIZE_T*6($tp)
+ or $acc3,$t3,$a3
+ $LD $a3,$SIZE_T*8($ap_end)
+ $LD $t3,$SIZE_T*7($tp)
+ addi $tp,$tp,$SIZE_T*4
+ $ST $acc0,$SIZE_T*1($ap_end)
+ $ST $acc1,$SIZE_T*2($ap_end)
+ $ST $acc2,$SIZE_T*3($ap_end)
+ $STU $acc3,$SIZE_T*4($ap_end)
+ bdnz .Lmul4x_cond_copy
+
+ $POP $bp,0($sp) # pull saved sp
+ and $t0,$t0,$carry
+ andc $a0,$a0,$carry
+ $ST $zero,$SIZE_T*0($tp)
+ and $t1,$t1,$carry
+ andc $a1,$a1,$carry
+ $ST $zero,$SIZE_T*1($tp)
+ and $t2,$t2,$carry
+ andc $a2,$a2,$carry
+ $ST $zero,$SIZE_T*2($tp)
+ and $t3,$t3,$carry
+ andc $a3,$a3,$carry
+ $ST $zero,$SIZE_T*3($tp)
+ or $acc0,$t0,$a0
+ or $acc1,$t1,$a1
+ $ST $zero,$SIZE_T*4($tp)
+ or $acc2,$t2,$a2
+ or $acc3,$t3,$a3
+ $ST $acc0,$SIZE_T*1($ap_end)
+ $ST $acc1,$SIZE_T*2($ap_end)
+ $ST $acc2,$SIZE_T*3($ap_end)
+ $ST $acc3,$SIZE_T*4($ap_end)
+
+ b .Lmul4x_done
+
+.align 4
+.Lmul4x4_post_condition:
+ $POP $ap,$SIZE_T*6($sp) # pull &rp[-1]
+ $POP $bp,0($sp) # pull saved sp
+ addze $carry,$carry # modulo-scheduled
+ # $acc0-3,$carry hold result, $m0-3 hold modulus
+ subfc $a0,$m0,$acc0
+ subfe $a1,$m1,$acc1
+ subfe $a2,$m2,$acc2
+ subfe $a3,$m3,$acc3
+ subfe $carry,$zero,$carry # did it borrow?
+
+ and $m0,$m0,$carry
+ and $m1,$m1,$carry
+ addc $a0,$a0,$m0
+ and $m2,$m2,$carry
+ adde $a1,$a1,$m1
+ and $m3,$m3,$carry
+ adde $a2,$a2,$m2
+ adde $a3,$a3,$m3
+
+ $ST $a0,$SIZE_T*1($ap) # write result
+ $ST $a1,$SIZE_T*2($ap)
+ $ST $a2,$SIZE_T*3($ap)
+ $ST $a3,$SIZE_T*4($ap)
+
+.Lmul4x_done:
+ $ST $zero,$SIZE_T*8($sp) # wipe stack clean
+ $ST $zero,$SIZE_T*9($sp)
+ $ST $zero,$SIZE_T*10($sp)
+ $ST $zero,$SIZE_T*11($sp)
+ li r3,1 # signal "done"
+ $POP r14,-$SIZE_T*18($bp)
+ $POP r15,-$SIZE_T*17($bp)
+ $POP r16,-$SIZE_T*16($bp)
+ $POP r17,-$SIZE_T*15($bp)
+ $POP r18,-$SIZE_T*14($bp)
+ $POP r19,-$SIZE_T*13($bp)
+ $POP r20,-$SIZE_T*12($bp)
+ $POP r21,-$SIZE_T*11($bp)
+ $POP r22,-$SIZE_T*10($bp)
+ $POP r23,-$SIZE_T*9($bp)
+ $POP r24,-$SIZE_T*8($bp)
+ $POP r25,-$SIZE_T*7($bp)
+ $POP r26,-$SIZE_T*6($bp)
+ $POP r27,-$SIZE_T*5($bp)
+ $POP r28,-$SIZE_T*4($bp)
+ $POP r29,-$SIZE_T*3($bp)
+ $POP r30,-$SIZE_T*2($bp)
+ $POP r31,-$SIZE_T*1($bp)
+ mr $sp,$bp
+ blr
+ .long 0
+ .byte 0,12,4,0x20,0x80,18,6,0
+ .long 0
+.size .bn_mul4x_mont_int,.-.bn_mul4x_mont_int
+___
+}
+
+if (1) {
+########################################################################
+# Following is PPC adaptation of sqrx8x_mont from x86_64-mont5 module.
+
+my ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("r$_",(9..12,14..17));
+my ($t0,$t1,$t2,$t3)=map("r$_",(18..21));
+my ($acc0,$acc1,$acc2,$acc3,$acc4,$acc5,$acc6,$acc7)=map("r$_",(22..29));
+my ($cnt,$carry,$zero)=("r30","r31","r0");
+my ($tp,$ap_end,$na0)=($bp,$np,$carry);
+
+# sp----------->+-------------------------------+
+# | saved sp |
+# +-------------------------------+
+# . .
+# +12*size_t +-------------------------------+
+# | size_t tmp[2*num] |
+# . .
+# . .
+# . .
+# +-------------------------------+
+# . .
+# -18*size_t +-------------------------------+
+# | 18 saved gpr, r14-r31 |
+# . .
+# . .
+# +-------------------------------+
+$code.=<<___;
+.align 5
+__bn_sqr8x_mont:
+.Lsqr8x_do:
+ mr $a0,$sp
+ slwi $a1,$num,`log($SIZE_T)/log(2)+1`
+ li $a2,-32*$SIZE_T
+ sub $a1,$a2,$a1
+ slwi $num,$num,`log($SIZE_T)/log(2)`
+ $STUX $sp,$sp,$a1 # alloca
+
+ $PUSH r14,-$SIZE_T*18($a0)
+ $PUSH r15,-$SIZE_T*17($a0)
+ $PUSH r16,-$SIZE_T*16($a0)
+ $PUSH r17,-$SIZE_T*15($a0)
+ $PUSH r18,-$SIZE_T*14($a0)
+ $PUSH r19,-$SIZE_T*13($a0)
+ $PUSH r20,-$SIZE_T*12($a0)
+ $PUSH r21,-$SIZE_T*11($a0)
+ $PUSH r22,-$SIZE_T*10($a0)
+ $PUSH r23,-$SIZE_T*9($a0)
+ $PUSH r24,-$SIZE_T*8($a0)
+ $PUSH r25,-$SIZE_T*7($a0)
+ $PUSH r26,-$SIZE_T*6($a0)
+ $PUSH r27,-$SIZE_T*5($a0)
+ $PUSH r28,-$SIZE_T*4($a0)
+ $PUSH r29,-$SIZE_T*3($a0)
+ $PUSH r30,-$SIZE_T*2($a0)
+ $PUSH r31,-$SIZE_T*1($a0)
+ subi $ap,$ap,$SIZE_T # bias by -1
+ subi $t0,$np,$SIZE_T # bias by -1
+ subi $rp,$rp,$SIZE_T # bias by -1
+ $LD $n0,0($n0) # *n0
+ li $zero,0
+
+ add $ap_end,$ap,$num
+ $LD $a0,$SIZE_T*1($ap)
+ #li $acc0,0
+ $LD $a1,$SIZE_T*2($ap)
+ li $acc1,0
+ $LD $a2,$SIZE_T*3($ap)
+ li $acc2,0
+ $LD $a3,$SIZE_T*4($ap)
+ li $acc3,0
+ $LD $a4,$SIZE_T*5($ap)
+ li $acc4,0
+ $LD $a5,$SIZE_T*6($ap)
+ li $acc5,0
+ $LD $a6,$SIZE_T*7($ap)
+ li $acc6,0
+ $LDU $a7,$SIZE_T*8($ap)
+ li $acc7,0
+
+ addi $tp,$sp,$SIZE_T*11 # &tp[-1]
+ subic. $cnt,$num,$SIZE_T*8
+ b .Lsqr8x_zero_start
+
+.align 5
+.Lsqr8x_zero:
+ subic. $cnt,$cnt,$SIZE_T*8
+ $ST $zero,$SIZE_T*1($tp)
+ $ST $zero,$SIZE_T*2($tp)
+ $ST $zero,$SIZE_T*3($tp)
+ $ST $zero,$SIZE_T*4($tp)
+ $ST $zero,$SIZE_T*5($tp)
+ $ST $zero,$SIZE_T*6($tp)
+ $ST $zero,$SIZE_T*7($tp)
+ $ST $zero,$SIZE_T*8($tp)
+.Lsqr8x_zero_start:
+ $ST $zero,$SIZE_T*9($tp)
+ $ST $zero,$SIZE_T*10($tp)
+ $ST $zero,$SIZE_T*11($tp)
+ $ST $zero,$SIZE_T*12($tp)
+ $ST $zero,$SIZE_T*13($tp)
+ $ST $zero,$SIZE_T*14($tp)
+ $ST $zero,$SIZE_T*15($tp)
+ $STU $zero,$SIZE_T*16($tp)
+ bne .Lsqr8x_zero
+
+ $PUSH $rp,$SIZE_T*6($sp) # offload &rp[-1]
+ $PUSH $t0,$SIZE_T*7($sp) # offload &np[-1]
+ $PUSH $n0,$SIZE_T*8($sp) # offload n0
+ $PUSH $tp,$SIZE_T*9($sp) # &tp[2*num-1]
+ $PUSH $zero,$SIZE_T*10($sp) # initial top-most carry
+ addi $tp,$sp,$SIZE_T*11 # &tp[-1]
+
+ # Multiply everything but a[i]*a[i]
+.align 5
+.Lsqr8x_outer_loop:
+ # a[1]a[0] (i)
+ # a[2]a[0]
+ # a[3]a[0]
+ # a[4]a[0]
+ # a[5]a[0]
+ # a[6]a[0]
+ # a[7]a[0]
+ # a[2]a[1] (ii)
+ # a[3]a[1]
+ # a[4]a[1]
+ # a[5]a[1]
+ # a[6]a[1]
+ # a[7]a[1]
+ # a[3]a[2] (iii)
+ # a[4]a[2]
+ # a[5]a[2]
+ # a[6]a[2]
+ # a[7]a[2]
+ # a[4]a[3] (iv)
+ # a[5]a[3]
+ # a[6]a[3]
+ # a[7]a[3]
+ # a[5]a[4] (v)
+ # a[6]a[4]
+ # a[7]a[4]
+ # a[6]a[5] (vi)
+ # a[7]a[5]
+ # a[7]a[6] (vii)
+
+ $UMULL $t0,$a1,$a0 # lo(a[1..7]*a[0]) (i)
+ $UMULL $t1,$a2,$a0
+ $UMULL $t2,$a3,$a0
+ $UMULL $t3,$a4,$a0
+ addc $acc1,$acc1,$t0 # t[1]+lo(a[1]*a[0])
+ $UMULL $t0,$a5,$a0
+ adde $acc2,$acc2,$t1
+ $UMULL $t1,$a6,$a0
+ adde $acc3,$acc3,$t2
+ $UMULL $t2,$a7,$a0
+ adde $acc4,$acc4,$t3
+ $UMULH $t3,$a1,$a0 # hi(a[1..7]*a[0])
+ adde $acc5,$acc5,$t0
+ $UMULH $t0,$a2,$a0
+ adde $acc6,$acc6,$t1
+ $UMULH $t1,$a3,$a0
+ adde $acc7,$acc7,$t2
+ $UMULH $t2,$a4,$a0
+ $ST $acc0,$SIZE_T*1($tp) # t[0]
+ addze $acc0,$zero # t[8]
+ $ST $acc1,$SIZE_T*2($tp) # t[1]
+ addc $acc2,$acc2,$t3 # t[2]+lo(a[1]*a[0])
+ $UMULH $t3,$a5,$a0
+ adde $acc3,$acc3,$t0
+ $UMULH $t0,$a6,$a0
+ adde $acc4,$acc4,$t1
+ $UMULH $t1,$a7,$a0
+ adde $acc5,$acc5,$t2
+ $UMULL $t2,$a2,$a1 # lo(a[2..7]*a[1]) (ii)
+ adde $acc6,$acc6,$t3
+ $UMULL $t3,$a3,$a1
+ adde $acc7,$acc7,$t0
+ $UMULL $t0,$a4,$a1
+ adde $acc0,$acc0,$t1
+
+ $UMULL $t1,$a5,$a1
+ addc $acc3,$acc3,$t2
+ $UMULL $t2,$a6,$a1
+ adde $acc4,$acc4,$t3
+ $UMULL $t3,$a7,$a1
+ adde $acc5,$acc5,$t0
+ $UMULH $t0,$a2,$a1 # hi(a[2..7]*a[1])
+ adde $acc6,$acc6,$t1
+ $UMULH $t1,$a3,$a1
+ adde $acc7,$acc7,$t2
+ $UMULH $t2,$a4,$a1
+ adde $acc0,$acc0,$t3
+ $UMULH $t3,$a5,$a1
+ $ST $acc2,$SIZE_T*3($tp) # t[2]
+ addze $acc1,$zero # t[9]
+ $ST $acc3,$SIZE_T*4($tp) # t[3]
+ addc $acc4,$acc4,$t0
+ $UMULH $t0,$a6,$a1
+ adde $acc5,$acc5,$t1
+ $UMULH $t1,$a7,$a1
+ adde $acc6,$acc6,$t2
+ $UMULL $t2,$a3,$a2 # lo(a[3..7]*a[2]) (iii)
+ adde $acc7,$acc7,$t3
+ $UMULL $t3,$a4,$a2
+ adde $acc0,$acc0,$t0
+ $UMULL $t0,$a5,$a2
+ adde $acc1,$acc1,$t1
+
+ $UMULL $t1,$a6,$a2
+ addc $acc5,$acc5,$t2
+ $UMULL $t2,$a7,$a2
+ adde $acc6,$acc6,$t3
+ $UMULH $t3,$a3,$a2 # hi(a[3..7]*a[2])
+ adde $acc7,$acc7,$t0
+ $UMULH $t0,$a4,$a2
+ adde $acc0,$acc0,$t1
+ $UMULH $t1,$a5,$a2
+ adde $acc1,$acc1,$t2
+ $UMULH $t2,$a6,$a2
+ $ST $acc4,$SIZE_T*5($tp) # t[4]
+ addze $acc2,$zero # t[10]
+ $ST $acc5,$SIZE_T*6($tp) # t[5]
+ addc $acc6,$acc6,$t3
+ $UMULH $t3,$a7,$a2
+ adde $acc7,$acc7,$t0
+ $UMULL $t0,$a4,$a3 # lo(a[4..7]*a[3]) (iv)
+ adde $acc0,$acc0,$t1
+ $UMULL $t1,$a5,$a3
+ adde $acc1,$acc1,$t2
+ $UMULL $t2,$a6,$a3
+ adde $acc2,$acc2,$t3
+
+ $UMULL $t3,$a7,$a3
+ addc $acc7,$acc7,$t0
+ $UMULH $t0,$a4,$a3 # hi(a[4..7]*a[3])
+ adde $acc0,$acc0,$t1
+ $UMULH $t1,$a5,$a3
+ adde $acc1,$acc1,$t2
+ $UMULH $t2,$a6,$a3
+ adde $acc2,$acc2,$t3
+ $UMULH $t3,$a7,$a3
+ $ST $acc6,$SIZE_T*7($tp) # t[6]
+ addze $acc3,$zero # t[11]
+ $STU $acc7,$SIZE_T*8($tp) # t[7]
+ addc $acc0,$acc0,$t0
+ $UMULL $t0,$a5,$a4 # lo(a[5..7]*a[4]) (v)
+ adde $acc1,$acc1,$t1
+ $UMULL $t1,$a6,$a4
+ adde $acc2,$acc2,$t2
+ $UMULL $t2,$a7,$a4
+ adde $acc3,$acc3,$t3
+
+ $UMULH $t3,$a5,$a4 # hi(a[5..7]*a[4])
+ addc $acc1,$acc1,$t0
+ $UMULH $t0,$a6,$a4
+ adde $acc2,$acc2,$t1
+ $UMULH $t1,$a7,$a4
+ adde $acc3,$acc3,$t2
+ $UMULL $t2,$a6,$a5 # lo(a[6..7]*a[5]) (vi)
+ addze $acc4,$zero # t[12]
+ addc $acc2,$acc2,$t3
+ $UMULL $t3,$a7,$a5
+ adde $acc3,$acc3,$t0
+ $UMULH $t0,$a6,$a5 # hi(a[6..7]*a[5])
+ adde $acc4,$acc4,$t1
+
+ $UMULH $t1,$a7,$a5
+ addc $acc3,$acc3,$t2
+ $UMULL $t2,$a7,$a6 # lo(a[7]*a[6]) (vii)
+ adde $acc4,$acc4,$t3
+ $UMULH $t3,$a7,$a6 # hi(a[7]*a[6])
+ addze $acc5,$zero # t[13]
+ addc $acc4,$acc4,$t0
+ $UCMP $ap_end,$ap # done yet?
+ adde $acc5,$acc5,$t1
+
+ addc $acc5,$acc5,$t2
+ sub $t0,$ap_end,$num # rewinded ap
+ addze $acc6,$zero # t[14]
+ add $acc6,$acc6,$t3
+
+ beq .Lsqr8x_outer_break
+
+ mr $n0,$a0
+ $LD $a0,$SIZE_T*1($tp)
+ $LD $a1,$SIZE_T*2($tp)
+ $LD $a2,$SIZE_T*3($tp)
+ $LD $a3,$SIZE_T*4($tp)
+ $LD $a4,$SIZE_T*5($tp)
+ $LD $a5,$SIZE_T*6($tp)
+ $LD $a6,$SIZE_T*7($tp)
+ $LD $a7,$SIZE_T*8($tp)
+ addc $acc0,$acc0,$a0
+ $LD $a0,$SIZE_T*1($ap)
+ adde $acc1,$acc1,$a1
+ $LD $a1,$SIZE_T*2($ap)
+ adde $acc2,$acc2,$a2
+ $LD $a2,$SIZE_T*3($ap)
+ adde $acc3,$acc3,$a3
+ $LD $a3,$SIZE_T*4($ap)
+ adde $acc4,$acc4,$a4
+ $LD $a4,$SIZE_T*5($ap)
+ adde $acc5,$acc5,$a5
+ $LD $a5,$SIZE_T*6($ap)
+ adde $acc6,$acc6,$a6
+ $LD $a6,$SIZE_T*7($ap)
+ subi $rp,$ap,$SIZE_T*7
+ addze $acc7,$a7
+ $LDU $a7,$SIZE_T*8($ap)
+ #addze $carry,$zero # moved below
+ li $cnt,0
+ b .Lsqr8x_mul
+
+ # a[8]a[0]
+ # a[9]a[0]
+ # a[a]a[0]
+ # a[b]a[0]
+ # a[c]a[0]
+ # a[d]a[0]
+ # a[e]a[0]
+ # a[f]a[0]
+ # a[8]a[1]
+ # a[f]a[1]........................
+ # a[8]a[2]
+ # a[f]a[2]........................
+ # a[8]a[3]
+ # a[f]a[3]........................
+ # a[8]a[4]
+ # a[f]a[4]........................
+ # a[8]a[5]
+ # a[f]a[5]........................
+ # a[8]a[6]
+ # a[f]a[6]........................
+ # a[8]a[7]
+ # a[f]a[7]........................
+.align 5
+.Lsqr8x_mul:
+ $UMULL $t0,$a0,$n0
+ addze $carry,$zero # carry bit, modulo-scheduled
+ $UMULL $t1,$a1,$n0
+ addi $cnt,$cnt,$SIZE_T
+ $UMULL $t2,$a2,$n0
+ andi. $cnt,$cnt,$SIZE_T*8-1
+ $UMULL $t3,$a3,$n0
+ addc $acc0,$acc0,$t0
+ $UMULL $t0,$a4,$n0
+ adde $acc1,$acc1,$t1
+ $UMULL $t1,$a5,$n0
+ adde $acc2,$acc2,$t2
+ $UMULL $t2,$a6,$n0
+ adde $acc3,$acc3,$t3
+ $UMULL $t3,$a7,$n0
+ adde $acc4,$acc4,$t0
+ $UMULH $t0,$a0,$n0
+ adde $acc5,$acc5,$t1
+ $UMULH $t1,$a1,$n0
+ adde $acc6,$acc6,$t2
+ $UMULH $t2,$a2,$n0
+ adde $acc7,$acc7,$t3
+ $UMULH $t3,$a3,$n0
+ addze $carry,$carry
+ $STU $acc0,$SIZE_T($tp)
+ addc $acc0,$acc1,$t0
+ $UMULH $t0,$a4,$n0
+ adde $acc1,$acc2,$t1
+ $UMULH $t1,$a5,$n0
+ adde $acc2,$acc3,$t2
+ $UMULH $t2,$a6,$n0
+ adde $acc3,$acc4,$t3
+ $UMULH $t3,$a7,$n0
+ $LDX $n0,$rp,$cnt
+ adde $acc4,$acc5,$t0
+ adde $acc5,$acc6,$t1
+ adde $acc6,$acc7,$t2
+ adde $acc7,$carry,$t3
+ #addze $carry,$zero # moved above
+ bne .Lsqr8x_mul
+ # note that carry flag is guaranteed
+ # to be zero at this point
+ $UCMP $ap,$ap_end # done yet?
+ beq .Lsqr8x_break
+
+ $LD $a0,$SIZE_T*1($tp)
+ $LD $a1,$SIZE_T*2($tp)
+ $LD $a2,$SIZE_T*3($tp)
+ $LD $a3,$SIZE_T*4($tp)
+ $LD $a4,$SIZE_T*5($tp)
+ $LD $a5,$SIZE_T*6($tp)
+ $LD $a6,$SIZE_T*7($tp)
+ $LD $a7,$SIZE_T*8($tp)
+ addc $acc0,$acc0,$a0
+ $LD $a0,$SIZE_T*1($ap)
+ adde $acc1,$acc1,$a1
+ $LD $a1,$SIZE_T*2($ap)
+ adde $acc2,$acc2,$a2
+ $LD $a2,$SIZE_T*3($ap)
+ adde $acc3,$acc3,$a3
+ $LD $a3,$SIZE_T*4($ap)
+ adde $acc4,$acc4,$a4
+ $LD $a4,$SIZE_T*5($ap)
+ adde $acc5,$acc5,$a5
+ $LD $a5,$SIZE_T*6($ap)
+ adde $acc6,$acc6,$a6
+ $LD $a6,$SIZE_T*7($ap)
+ adde $acc7,$acc7,$a7
+ $LDU $a7,$SIZE_T*8($ap)
+ #addze $carry,$zero # moved above
+ b .Lsqr8x_mul
+
+.align 5
+.Lsqr8x_break:
+ $LD $a0,$SIZE_T*8($rp)
+ addi $ap,$rp,$SIZE_T*15
+ $LD $a1,$SIZE_T*9($rp)
+ sub. $t0,$ap_end,$ap # is it last iteration?
+ $LD $a2,$SIZE_T*10($rp)
+ sub $t1,$tp,$t0
+ $LD $a3,$SIZE_T*11($rp)
+ $LD $a4,$SIZE_T*12($rp)
+ $LD $a5,$SIZE_T*13($rp)
+ $LD $a6,$SIZE_T*14($rp)
+ $LD $a7,$SIZE_T*15($rp)
+ beq .Lsqr8x_outer_loop
+
+ $ST $acc0,$SIZE_T*1($tp)
+ $LD $acc0,$SIZE_T*1($t1)
+ $ST $acc1,$SIZE_T*2($tp)
+ $LD $acc1,$SIZE_T*2($t1)
+ $ST $acc2,$SIZE_T*3($tp)
+ $LD $acc2,$SIZE_T*3($t1)
+ $ST $acc3,$SIZE_T*4($tp)
+ $LD $acc3,$SIZE_T*4($t1)
+ $ST $acc4,$SIZE_T*5($tp)
+ $LD $acc4,$SIZE_T*5($t1)
+ $ST $acc5,$SIZE_T*6($tp)
+ $LD $acc5,$SIZE_T*6($t1)
+ $ST $acc6,$SIZE_T*7($tp)
+ $LD $acc6,$SIZE_T*7($t1)
+ $ST $acc7,$SIZE_T*8($tp)
+ $LD $acc7,$SIZE_T*8($t1)
+ mr $tp,$t1
+ b .Lsqr8x_outer_loop
+
+.align 5
+.Lsqr8x_outer_break:
+ ####################################################################
+ # Now multiply above result by 2 and add a[n-1]*a[n-1]|...|a[0]*a[0]
+ $LD $a1,$SIZE_T*1($t0) # recall that $t0 is &a[-1]
+ $LD $a3,$SIZE_T*2($t0)
+ $LD $a5,$SIZE_T*3($t0)
+ $LD $a7,$SIZE_T*4($t0)
+ addi $ap,$t0,$SIZE_T*4
+ # "tp[x]" comments are for num==8 case
+ $LD $t1,$SIZE_T*13($sp) # =tp[1], t[0] is not interesting
+ $LD $t2,$SIZE_T*14($sp)
+ $LD $t3,$SIZE_T*15($sp)
+ $LD $t0,$SIZE_T*16($sp)
+
+ $ST $acc0,$SIZE_T*1($tp) # tp[8]=
+ srwi $cnt,$num,`log($SIZE_T)/log(2)+2`
+ $ST $acc1,$SIZE_T*2($tp)
+ subi $cnt,$cnt,1
+ $ST $acc2,$SIZE_T*3($tp)
+ $ST $acc3,$SIZE_T*4($tp)
+ $ST $acc4,$SIZE_T*5($tp)
+ $ST $acc5,$SIZE_T*6($tp)
+ $ST $acc6,$SIZE_T*7($tp)
+ #$ST $acc7,$SIZE_T*8($tp) # tp[15] is not interesting
+ addi $tp,$sp,$SIZE_T*11 # &tp[-1]
+ $UMULL $acc0,$a1,$a1
+ $UMULH $a1,$a1,$a1
+ add $acc1,$t1,$t1 # <<1
+ $SHRI $t1,$t1,$BITS-1
+ $UMULL $a2,$a3,$a3
+ $UMULH $a3,$a3,$a3
+ addc $acc1,$acc1,$a1
+ add $acc2,$t2,$t2
+ $SHRI $t2,$t2,$BITS-1
+ add $acc3,$t3,$t3
+ $SHRI $t3,$t3,$BITS-1
+ or $acc2,$acc2,$t1
+
+ mtctr $cnt
+.Lsqr4x_shift_n_add:
+ $UMULL $a4,$a5,$a5
+ $UMULH $a5,$a5,$a5
+ $LD $t1,$SIZE_T*6($tp) # =tp[5]
+ $LD $a1,$SIZE_T*1($ap)
+ adde $acc2,$acc2,$a2
+ add $acc4,$t0,$t0
+ $SHRI $t0,$t0,$BITS-1
+ or $acc3,$acc3,$t2
+ $LD $t2,$SIZE_T*7($tp) # =tp[6]
+ adde $acc3,$acc3,$a3
+ $LD $a3,$SIZE_T*2($ap)
+ add $acc5,$t1,$t1
+ $SHRI $t1,$t1,$BITS-1
+ or $acc4,$acc4,$t3
+ $LD $t3,$SIZE_T*8($tp) # =tp[7]
+ $UMULL $a6,$a7,$a7
+ $UMULH $a7,$a7,$a7
+ adde $acc4,$acc4,$a4
+ add $acc6,$t2,$t2
+ $SHRI $t2,$t2,$BITS-1
+ or $acc5,$acc5,$t0
+ $LD $t0,$SIZE_T*9($tp) # =tp[8]
+ adde $acc5,$acc5,$a5
+ $LD $a5,$SIZE_T*3($ap)
+ add $acc7,$t3,$t3
+ $SHRI $t3,$t3,$BITS-1
+ or $acc6,$acc6,$t1
+ $LD $t1,$SIZE_T*10($tp) # =tp[9]
+ $UMULL $a0,$a1,$a1
+ $UMULH $a1,$a1,$a1
+ adde $acc6,$acc6,$a6
+ $ST $acc0,$SIZE_T*1($tp) # tp[0]=
+ add $acc0,$t0,$t0
+ $SHRI $t0,$t0,$BITS-1
+ or $acc7,$acc7,$t2
+ $LD $t2,$SIZE_T*11($tp) # =tp[10]
+ adde $acc7,$acc7,$a7
+ $LDU $a7,$SIZE_T*4($ap)
+ $ST $acc1,$SIZE_T*2($tp) # tp[1]=
+ add $acc1,$t1,$t1
+ $SHRI $t1,$t1,$BITS-1
+ or $acc0,$acc0,$t3
+ $LD $t3,$SIZE_T*12($tp) # =tp[11]
+ $UMULL $a2,$a3,$a3
+ $UMULH $a3,$a3,$a3
+ adde $acc0,$acc0,$a0
+ $ST $acc2,$SIZE_T*3($tp) # tp[2]=
+ add $acc2,$t2,$t2
+ $SHRI $t2,$t2,$BITS-1
+ or $acc1,$acc1,$t0
+ $LD $t0,$SIZE_T*13($tp) # =tp[12]
+ adde $acc1,$acc1,$a1
+ $ST $acc3,$SIZE_T*4($tp) # tp[3]=
+ $ST $acc4,$SIZE_T*5($tp) # tp[4]=
+ $ST $acc5,$SIZE_T*6($tp) # tp[5]=
+ $ST $acc6,$SIZE_T*7($tp) # tp[6]=
+ $STU $acc7,$SIZE_T*8($tp) # tp[7]=
+ add $acc3,$t3,$t3
+ $SHRI $t3,$t3,$BITS-1
+ or $acc2,$acc2,$t1
+ bdnz .Lsqr4x_shift_n_add
+___
+my ($np,$np_end)=($ap,$ap_end);
+$code.=<<___;
+ $POP $np,$SIZE_T*7($sp) # pull &np[-1] and n0
+ $POP $n0,$SIZE_T*8($sp)
+
+ $UMULL $a4,$a5,$a5
+ $UMULH $a5,$a5,$a5
+ $ST $acc0,$SIZE_T*1($tp) # tp[8]=
+ $LD $acc0,$SIZE_T*12($sp) # =tp[0]
+ $LD $t1,$SIZE_T*6($tp) # =tp[13]
+ adde $acc2,$acc2,$a2
+ add $acc4,$t0,$t0
+ $SHRI $t0,$t0,$BITS-1
+ or $acc3,$acc3,$t2
+ $LD $t2,$SIZE_T*7($tp) # =tp[14]
+ adde $acc3,$acc3,$a3
+ add $acc5,$t1,$t1
+ $SHRI $t1,$t1,$BITS-1
+ or $acc4,$acc4,$t3
+ $UMULL $a6,$a7,$a7
+ $UMULH $a7,$a7,$a7
+ adde $acc4,$acc4,$a4
+ add $acc6,$t2,$t2
+ $SHRI $t2,$t2,$BITS-1
+ or $acc5,$acc5,$t0
+ $ST $acc1,$SIZE_T*2($tp) # tp[9]=
+ $LD $acc1,$SIZE_T*13($sp) # =tp[1]
+ adde $acc5,$acc5,$a5
+ or $acc6,$acc6,$t1
+ $LD $a0,$SIZE_T*1($np)
+ $LD $a1,$SIZE_T*2($np)
+ adde $acc6,$acc6,$a6
+ $LD $a2,$SIZE_T*3($np)
+ $LD $a3,$SIZE_T*4($np)
+ adde $acc7,$a7,$t2
+ $LD $a4,$SIZE_T*5($np)
+ $LD $a5,$SIZE_T*6($np)
+
+ ################################################################
+ # Reduce by 8 limbs per iteration
+ $UMULL $na0,$n0,$acc0 # t[0]*n0
+ li $cnt,8
+ $LD $a6,$SIZE_T*7($np)
+ add $np_end,$np,$num
+ $LDU $a7,$SIZE_T*8($np)
+ $ST $acc2,$SIZE_T*3($tp) # tp[10]=
+ $LD $acc2,$SIZE_T*14($sp)
+ $ST $acc3,$SIZE_T*4($tp) # tp[11]=
+ $LD $acc3,$SIZE_T*15($sp)
+ $ST $acc4,$SIZE_T*5($tp) # tp[12]=
+ $LD $acc4,$SIZE_T*16($sp)
+ $ST $acc5,$SIZE_T*6($tp) # tp[13]=
+ $LD $acc5,$SIZE_T*17($sp)
+ $ST $acc6,$SIZE_T*7($tp) # tp[14]=
+ $LD $acc6,$SIZE_T*18($sp)
+ $ST $acc7,$SIZE_T*8($tp) # tp[15]=
+ $LD $acc7,$SIZE_T*19($sp)
+ addi $tp,$sp,$SIZE_T*11 # &tp[-1]
+ mtctr $cnt
+ b .Lsqr8x_reduction
+
+.align 5
+.Lsqr8x_reduction:
+ # (*) $UMULL $t0,$a0,$na0 # lo(n[0-7])*lo(t[0]*n0)
+ $UMULL $t1,$a1,$na0
+ $UMULL $t2,$a2,$na0
+ $STU $na0,$SIZE_T($tp) # put aside t[0]*n0 for tail processing
+ $UMULL $t3,$a3,$na0
+ # (*) addc $acc0,$acc0,$t0
+ addic $acc0,$acc0,-1 # (*)
+ $UMULL $t0,$a4,$na0
+ adde $acc0,$acc1,$t1
+ $UMULL $t1,$a5,$na0
+ adde $acc1,$acc2,$t2
+ $UMULL $t2,$a6,$na0
+ adde $acc2,$acc3,$t3
+ $UMULL $t3,$a7,$na0
+ adde $acc3,$acc4,$t0
+ $UMULH $t0,$a0,$na0 # hi(n[0-7])*lo(t[0]*n0)
+ adde $acc4,$acc5,$t1
+ $UMULH $t1,$a1,$na0
+ adde $acc5,$acc6,$t2
+ $UMULH $t2,$a2,$na0
+ adde $acc6,$acc7,$t3
+ $UMULH $t3,$a3,$na0
+ addze $acc7,$zero
+ addc $acc0,$acc0,$t0
+ $UMULH $t0,$a4,$na0
+ adde $acc1,$acc1,$t1
+ $UMULH $t1,$a5,$na0
+ adde $acc2,$acc2,$t2
+ $UMULH $t2,$a6,$na0
+ adde $acc3,$acc3,$t3
+ $UMULH $t3,$a7,$na0
+ $UMULL $na0,$n0,$acc0 # next t[0]*n0
+ adde $acc4,$acc4,$t0
+ adde $acc5,$acc5,$t1
+ adde $acc6,$acc6,$t2
+ adde $acc7,$acc7,$t3
+ bdnz .Lsqr8x_reduction
+
+ $LD $t0,$SIZE_T*1($tp)
+ $LD $t1,$SIZE_T*2($tp)
+ $LD $t2,$SIZE_T*3($tp)
+ $LD $t3,$SIZE_T*4($tp)
+ subi $rp,$tp,$SIZE_T*7
+ $UCMP $np_end,$np # done yet?
+ addc $acc0,$acc0,$t0
+ $LD $t0,$SIZE_T*5($tp)
+ adde $acc1,$acc1,$t1
+ $LD $t1,$SIZE_T*6($tp)
+ adde $acc2,$acc2,$t2
+ $LD $t2,$SIZE_T*7($tp)
+ adde $acc3,$acc3,$t3
+ $LD $t3,$SIZE_T*8($tp)
+ adde $acc4,$acc4,$t0
+ adde $acc5,$acc5,$t1
+ adde $acc6,$acc6,$t2
+ adde $acc7,$acc7,$t3
+ #addze $carry,$zero # moved below
+ beq .Lsqr8x8_post_condition
+
+ $LD $n0,$SIZE_T*0($rp)
+ $LD $a0,$SIZE_T*1($np)
+ $LD $a1,$SIZE_T*2($np)
+ $LD $a2,$SIZE_T*3($np)
+ $LD $a3,$SIZE_T*4($np)
+ $LD $a4,$SIZE_T*5($np)
+ $LD $a5,$SIZE_T*6($np)
+ $LD $a6,$SIZE_T*7($np)
+ $LDU $a7,$SIZE_T*8($np)
+ li $cnt,0
+
+.align 5
+.Lsqr8x_tail:
+ $UMULL $t0,$a0,$n0
+ addze $carry,$zero # carry bit, modulo-scheduled
+ $UMULL $t1,$a1,$n0
+ addi $cnt,$cnt,$SIZE_T
+ $UMULL $t2,$a2,$n0
+ andi. $cnt,$cnt,$SIZE_T*8-1
+ $UMULL $t3,$a3,$n0
+ addc $acc0,$acc0,$t0
+ $UMULL $t0,$a4,$n0
+ adde $acc1,$acc1,$t1
+ $UMULL $t1,$a5,$n0
+ adde $acc2,$acc2,$t2
+ $UMULL $t2,$a6,$n0
+ adde $acc3,$acc3,$t3
+ $UMULL $t3,$a7,$n0
+ adde $acc4,$acc4,$t0
+ $UMULH $t0,$a0,$n0
+ adde $acc5,$acc5,$t1
+ $UMULH $t1,$a1,$n0
+ adde $acc6,$acc6,$t2
+ $UMULH $t2,$a2,$n0
+ adde $acc7,$acc7,$t3
+ $UMULH $t3,$a3,$n0
+ addze $carry,$carry
+ $STU $acc0,$SIZE_T($tp)
+ addc $acc0,$acc1,$t0
+ $UMULH $t0,$a4,$n0
+ adde $acc1,$acc2,$t1
+ $UMULH $t1,$a5,$n0
+ adde $acc2,$acc3,$t2
+ $UMULH $t2,$a6,$n0
+ adde $acc3,$acc4,$t3
+ $UMULH $t3,$a7,$n0
+ $LDX $n0,$rp,$cnt
+ adde $acc4,$acc5,$t0
+ adde $acc5,$acc6,$t1
+ adde $acc6,$acc7,$t2
+ adde $acc7,$carry,$t3
+ #addze $carry,$zero # moved above
+ bne .Lsqr8x_tail
+ # note that carry flag is guaranteed
+ # to be zero at this point
+ $LD $a0,$SIZE_T*1($tp)
+ $POP $carry,$SIZE_T*10($sp) # pull top-most carry in case we break
+ $UCMP $np_end,$np # done yet?
+ $LD $a1,$SIZE_T*2($tp)
+ sub $t2,$np_end,$num # rewinded np
+ $LD $a2,$SIZE_T*3($tp)
+ $LD $a3,$SIZE_T*4($tp)
+ $LD $a4,$SIZE_T*5($tp)
+ $LD $a5,$SIZE_T*6($tp)
+ $LD $a6,$SIZE_T*7($tp)
+ $LD $a7,$SIZE_T*8($tp)
+ beq .Lsqr8x_tail_break
+
+ addc $acc0,$acc0,$a0
+ $LD $a0,$SIZE_T*1($np)
+ adde $acc1,$acc1,$a1
+ $LD $a1,$SIZE_T*2($np)
+ adde $acc2,$acc2,$a2
+ $LD $a2,$SIZE_T*3($np)
+ adde $acc3,$acc3,$a3
+ $LD $a3,$SIZE_T*4($np)
+ adde $acc4,$acc4,$a4
+ $LD $a4,$SIZE_T*5($np)
+ adde $acc5,$acc5,$a5
+ $LD $a5,$SIZE_T*6($np)
+ adde $acc6,$acc6,$a6
+ $LD $a6,$SIZE_T*7($np)
+ adde $acc7,$acc7,$a7
+ $LDU $a7,$SIZE_T*8($np)
+ #addze $carry,$zero # moved above
+ b .Lsqr8x_tail
+
+.align 5
+.Lsqr8x_tail_break:
+ $POP $n0,$SIZE_T*8($sp) # pull n0
+ $POP $t3,$SIZE_T*9($sp) # &tp[2*num-1]
+ addi $cnt,$tp,$SIZE_T*8 # end of current t[num] window
+
+ addic $carry,$carry,-1 # "move" top-most carry to carry bit
+ adde $t0,$acc0,$a0
+ $LD $acc0,$SIZE_T*8($rp)
+ $LD $a0,$SIZE_T*1($t2) # recall that $t2 is &n[-1]
+ adde $t1,$acc1,$a1
+ $LD $acc1,$SIZE_T*9($rp)
+ $LD $a1,$SIZE_T*2($t2)
+ adde $acc2,$acc2,$a2
+ $LD $a2,$SIZE_T*3($t2)
+ adde $acc3,$acc3,$a3
+ $LD $a3,$SIZE_T*4($t2)
+ adde $acc4,$acc4,$a4
+ $LD $a4,$SIZE_T*5($t2)
+ adde $acc5,$acc5,$a5
+ $LD $a5,$SIZE_T*6($t2)
+ adde $acc6,$acc6,$a6
+ $LD $a6,$SIZE_T*7($t2)
+ adde $acc7,$acc7,$a7
+ $LD $a7,$SIZE_T*8($t2)
+ addi $np,$t2,$SIZE_T*8
+ addze $t2,$zero # top-most carry
+ $UMULL $na0,$n0,$acc0
+ $ST $t0,$SIZE_T*1($tp)
+ $UCMP $cnt,$t3 # did we hit the bottom?
+ $ST $t1,$SIZE_T*2($tp)
+ li $cnt,8
+ $ST $acc2,$SIZE_T*3($tp)
+ $LD $acc2,$SIZE_T*10($rp)
+ $ST $acc3,$SIZE_T*4($tp)
+ $LD $acc3,$SIZE_T*11($rp)
+ $ST $acc4,$SIZE_T*5($tp)
+ $LD $acc4,$SIZE_T*12($rp)
+ $ST $acc5,$SIZE_T*6($tp)
+ $LD $acc5,$SIZE_T*13($rp)
+ $ST $acc6,$SIZE_T*7($tp)
+ $LD $acc6,$SIZE_T*14($rp)
+ $ST $acc7,$SIZE_T*8($tp)
+ $LD $acc7,$SIZE_T*15($rp)
+ $PUSH $t2,$SIZE_T*10($sp) # off-load top-most carry
+ addi $tp,$rp,$SIZE_T*7 # slide the window
+ mtctr $cnt
+ bne .Lsqr8x_reduction
+
+ ################################################################
+ # Final step. We see if result is larger than modulus, and
+ # if it is, subtract the modulus. But comparison implies
+ # subtraction. So we subtract modulus, see if it borrowed,
+ # and conditionally copy original value.
+ $POP $rp,$SIZE_T*6($sp) # pull &rp[-1]
+ srwi $cnt,$num,`log($SIZE_T)/log(2)+3`
+ mr $n0,$tp # put tp aside
+ addi $tp,$tp,$SIZE_T*8
+ subi $cnt,$cnt,1
+ subfc $t0,$a0,$acc0
+ subfe $t1,$a1,$acc1
+ mr $carry,$t2
+ mr $ap_end,$rp # $rp copy
+
+ mtctr $cnt
+ b .Lsqr8x_sub
+
+.align 5
+.Lsqr8x_sub:
+ $LD $a0,$SIZE_T*1($np)
+ $LD $acc0,$SIZE_T*1($tp)
+ $LD $a1,$SIZE_T*2($np)
+ $LD $acc1,$SIZE_T*2($tp)
+ subfe $t2,$a2,$acc2
+ $LD $a2,$SIZE_T*3($np)
+ $LD $acc2,$SIZE_T*3($tp)
+ subfe $t3,$a3,$acc3
+ $LD $a3,$SIZE_T*4($np)
+ $LD $acc3,$SIZE_T*4($tp)
+ $ST $t0,$SIZE_T*1($rp)
+ subfe $t0,$a4,$acc4
+ $LD $a4,$SIZE_T*5($np)
+ $LD $acc4,$SIZE_T*5($tp)
+ $ST $t1,$SIZE_T*2($rp)
+ subfe $t1,$a5,$acc5
+ $LD $a5,$SIZE_T*6($np)
+ $LD $acc5,$SIZE_T*6($tp)
+ $ST $t2,$SIZE_T*3($rp)
+ subfe $t2,$a6,$acc6
+ $LD $a6,$SIZE_T*7($np)
+ $LD $acc6,$SIZE_T*7($tp)
+ $ST $t3,$SIZE_T*4($rp)
+ subfe $t3,$a7,$acc7
+ $LDU $a7,$SIZE_T*8($np)
+ $LDU $acc7,$SIZE_T*8($tp)
+ $ST $t0,$SIZE_T*5($rp)
+ subfe $t0,$a0,$acc0
+ $ST $t1,$SIZE_T*6($rp)
+ subfe $t1,$a1,$acc1
+ $ST $t2,$SIZE_T*7($rp)
+ $STU $t3,$SIZE_T*8($rp)
+ bdnz .Lsqr8x_sub
+
+ srwi $cnt,$num,`log($SIZE_T)/log(2)+2`
+ $LD $a0,$SIZE_T*1($ap_end) # original $rp
+ $LD $acc0,$SIZE_T*1($n0) # original $tp
+ subi $cnt,$cnt,1
+ $LD $a1,$SIZE_T*2($ap_end)
+ $LD $acc1,$SIZE_T*2($n0)
+ subfe $t2,$a2,$acc2
+ $LD $a2,$SIZE_T*3($ap_end)
+ $LD $acc2,$SIZE_T*3($n0)
+ subfe $t3,$a3,$acc3
+ $LD $a3,$SIZE_T*4($ap_end)
+ $LDU $acc3,$SIZE_T*4($n0)
+ $ST $t0,$SIZE_T*1($rp)
+ subfe $t0,$a4,$acc4
+ $ST $t1,$SIZE_T*2($rp)
+ subfe $t1,$a5,$acc5
+ $ST $t2,$SIZE_T*3($rp)
+ subfe $t2,$a6,$acc6
+ $ST $t3,$SIZE_T*4($rp)
+ subfe $t3,$a7,$acc7
+ $ST $t0,$SIZE_T*5($rp)
+ subfe $carry,$zero,$carry # did it borrow?
+ $ST $t1,$SIZE_T*6($rp)
+ $ST $t2,$SIZE_T*7($rp)
+ $ST $t3,$SIZE_T*8($rp)
+
+ addi $tp,$sp,$SIZE_T*11
+ mtctr $cnt
+
+.Lsqr4x_cond_copy:
+ andc $a0,$a0,$carry
+ $ST $zero,-$SIZE_T*3($n0) # wipe stack clean
+ and $acc0,$acc0,$carry
+ $ST $zero,-$SIZE_T*2($n0)
+ andc $a1,$a1,$carry
+ $ST $zero,-$SIZE_T*1($n0)
+ and $acc1,$acc1,$carry
+ $ST $zero,-$SIZE_T*0($n0)
+ andc $a2,$a2,$carry
+ $ST $zero,$SIZE_T*1($tp)
+ and $acc2,$acc2,$carry
+ $ST $zero,$SIZE_T*2($tp)
+ andc $a3,$a3,$carry
+ $ST $zero,$SIZE_T*3($tp)
+ and $acc3,$acc3,$carry
+ $STU $zero,$SIZE_T*4($tp)
+ or $t0,$a0,$acc0
+ $LD $a0,$SIZE_T*5($ap_end)
+ $LD $acc0,$SIZE_T*1($n0)
+ or $t1,$a1,$acc1
+ $LD $a1,$SIZE_T*6($ap_end)
+ $LD $acc1,$SIZE_T*2($n0)
+ or $t2,$a2,$acc2
+ $LD $a2,$SIZE_T*7($ap_end)
+ $LD $acc2,$SIZE_T*3($n0)
+ or $t3,$a3,$acc3
+ $LD $a3,$SIZE_T*8($ap_end)
+ $LDU $acc3,$SIZE_T*4($n0)
+ $ST $t0,$SIZE_T*1($ap_end)
+ $ST $t1,$SIZE_T*2($ap_end)
+ $ST $t2,$SIZE_T*3($ap_end)
+ $STU $t3,$SIZE_T*4($ap_end)
+ bdnz .Lsqr4x_cond_copy
+
+ $POP $ap,0($sp) # pull saved sp
+ andc $a0,$a0,$carry
+ and $acc0,$acc0,$carry
+ andc $a1,$a1,$carry
+ and $acc1,$acc1,$carry
+ andc $a2,$a2,$carry
+ and $acc2,$acc2,$carry
+ andc $a3,$a3,$carry
+ and $acc3,$acc3,$carry
+ or $t0,$a0,$acc0
+ or $t1,$a1,$acc1
+ or $t2,$a2,$acc2
+ or $t3,$a3,$acc3
+ $ST $t0,$SIZE_T*1($ap_end)
+ $ST $t1,$SIZE_T*2($ap_end)
+ $ST $t2,$SIZE_T*3($ap_end)
+ $ST $t3,$SIZE_T*4($ap_end)
+
+ b .Lsqr8x_done
+
+.align 5
+.Lsqr8x8_post_condition:
+ $POP $rp,$SIZE_T*6($sp) # pull rp
+ $POP $ap,0($sp) # pull saved sp
+ addze $carry,$zero
+
+ # $acc0-7,$carry hold result, $a0-7 hold modulus
+ subfc $acc0,$a0,$acc0
+ subfe $acc1,$a1,$acc1
+ $ST $zero,$SIZE_T*12($sp) # wipe stack clean
+ $ST $zero,$SIZE_T*13($sp)
+ subfe $acc2,$a2,$acc2
+ $ST $zero,$SIZE_T*14($sp)
+ $ST $zero,$SIZE_T*15($sp)
+ subfe $acc3,$a3,$acc3
+ $ST $zero,$SIZE_T*16($sp)
+ $ST $zero,$SIZE_T*17($sp)
+ subfe $acc4,$a4,$acc4
+ $ST $zero,$SIZE_T*18($sp)
+ $ST $zero,$SIZE_T*19($sp)
+ subfe $acc5,$a5,$acc5
+ $ST $zero,$SIZE_T*20($sp)
+ $ST $zero,$SIZE_T*21($sp)
+ subfe $acc6,$a6,$acc6
+ $ST $zero,$SIZE_T*22($sp)
+ $ST $zero,$SIZE_T*23($sp)
+ subfe $acc7,$a7,$acc7
+ $ST $zero,$SIZE_T*24($sp)
+ $ST $zero,$SIZE_T*25($sp)
+ subfe $carry,$zero,$carry # did it borrow?
+ $ST $zero,$SIZE_T*26($sp)
+ $ST $zero,$SIZE_T*27($sp)
+
+ and $a0,$a0,$carry
+ and $a1,$a1,$carry
+ addc $acc0,$acc0,$a0 # add modulus back if borrowed
+ and $a2,$a2,$carry
+ adde $acc1,$acc1,$a1
+ and $a3,$a3,$carry
+ adde $acc2,$acc2,$a2
+ and $a4,$a4,$carry
+ adde $acc3,$acc3,$a3
+ and $a5,$a5,$carry
+ adde $acc4,$acc4,$a4
+ and $a6,$a6,$carry
+ adde $acc5,$acc5,$a5
+ and $a7,$a7,$carry
+ adde $acc6,$acc6,$a6
+ adde $acc7,$acc7,$a7
+ $ST $acc0,$SIZE_T*1($rp)
+ $ST $acc1,$SIZE_T*2($rp)
+ $ST $acc2,$SIZE_T*3($rp)
+ $ST $acc3,$SIZE_T*4($rp)
+ $ST $acc4,$SIZE_T*5($rp)
+ $ST $acc5,$SIZE_T*6($rp)
+ $ST $acc6,$SIZE_T*7($rp)
+ $ST $acc7,$SIZE_T*8($rp)
+
+.Lsqr8x_done:
+ $PUSH $zero,$SIZE_T*8($sp)
+ $PUSH $zero,$SIZE_T*10($sp)
+
+ $POP r14,-$SIZE_T*18($ap)
+ li r3,1 # signal "done"
+ $POP r15,-$SIZE_T*17($ap)
+ $POP r16,-$SIZE_T*16($ap)
+ $POP r17,-$SIZE_T*15($ap)
+ $POP r18,-$SIZE_T*14($ap)
+ $POP r19,-$SIZE_T*13($ap)
+ $POP r20,-$SIZE_T*12($ap)
+ $POP r21,-$SIZE_T*11($ap)
+ $POP r22,-$SIZE_T*10($ap)
+ $POP r23,-$SIZE_T*9($ap)
+ $POP r24,-$SIZE_T*8($ap)
+ $POP r25,-$SIZE_T*7($ap)
+ $POP r26,-$SIZE_T*6($ap)
+ $POP r27,-$SIZE_T*5($ap)
+ $POP r28,-$SIZE_T*4($ap)
+ $POP r29,-$SIZE_T*3($ap)
+ $POP r30,-$SIZE_T*2($ap)
+ $POP r31,-$SIZE_T*1($ap)
+ mr $sp,$ap
+ blr
+ .long 0
+ .byte 0,12,4,0x20,0x80,18,6,0
+ .long 0
+.size __bn_sqr8x_mont,.-__bn_sqr8x_mont
+___
+}
+$code.=<<___;
.asciz "Montgomery Multiplication for PPC, CRYPTOGAMS by <appro\@openssl.org>"
___
diff --git a/deps/openssl/openssl/crypto/bn/asm/ppc.pl b/deps/openssl/openssl/crypto/bn/asm/ppc.pl
index 4ea534a1c7..e37068192f 100644
--- a/deps/openssl/openssl/crypto/bn/asm/ppc.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/ppc.pl
@@ -1,5 +1,5 @@
#! /usr/bin/env perl
-# Copyright 2004-2016 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2004-2018 The OpenSSL Project Authors. All Rights Reserved.
#
# Licensed under the OpenSSL license (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
@@ -38,9 +38,9 @@
#rsa 2048 bits 0.3036s 0.0085s 3.3 117.1
#rsa 4096 bits 2.0040s 0.0299s 0.5 33.4
#dsa 512 bits 0.0087s 0.0106s 114.3 94.5
-#dsa 1024 bits 0.0256s 0.0313s 39.0 32.0
+#dsa 1024 bits 0.0256s 0.0313s 39.0 32.0
#
-# Same bechmark with this assembler code:
+# Same benchmark with this assembler code:
#
#rsa 512 bits 0.0056s 0.0005s 178.6 2049.2
#rsa 1024 bits 0.0283s 0.0015s 35.3 674.1
@@ -74,7 +74,7 @@
#rsa 4096 bits 0.3700s 0.0058s 2.7 171.0
#dsa 512 bits 0.0016s 0.0020s 610.7 507.1
#dsa 1024 bits 0.0047s 0.0058s 212.5 173.2
-#
+#
# Again, performance increases by at about 75%
#
# Mac OS X, Apple G5 1.8GHz (Note this is 32 bit code)
@@ -101,10 +101,7 @@
#dsa 2048 bits 0.0061s 0.0075s 163.5 132.8
#
# Performance increase of ~60%
-#
-# If you have comments or suggestions to improve code send
-# me a note at schari@us.ibm.com
-#
+# Based on submission from Suresh N. Chari of IBM
$flavour = shift;
@@ -125,7 +122,7 @@ if ($flavour =~ /32/) {
$CNTLZ= "cntlzw"; # count leading zeros
$SHL= "slw"; # shift left
$SHR= "srw"; # unsigned shift right
- $SHRI= "srwi"; # unsigned shift right by immediate
+ $SHRI= "srwi"; # unsigned shift right by immediate
$SHLI= "slwi"; # shift left by immediate
$CLRU= "clrlwi"; # clear upper bits
$INSR= "insrwi"; # insert right
@@ -149,10 +146,10 @@ if ($flavour =~ /32/) {
$CNTLZ= "cntlzd"; # count leading zeros
$SHL= "sld"; # shift left
$SHR= "srd"; # unsigned shift right
- $SHRI= "srdi"; # unsigned shift right by immediate
+ $SHRI= "srdi"; # unsigned shift right by immediate
$SHLI= "sldi"; # shift left by immediate
$CLRU= "clrldi"; # clear upper bits
- $INSR= "insrdi"; # insert right
+ $INSR= "insrdi"; # insert right
$ROTL= "rotldi"; # rotate left by immediate
$TR= "td"; # conditional trap
} else { die "nonsense $flavour"; }
@@ -189,7 +186,7 @@ $data=<<EOF;
# below.
# 12/05/03 Suresh Chari
# (with lots of help from) Andy Polyakov
-##
+##
# 1. Initial version 10/20/02 Suresh Chari
#
#
@@ -202,7 +199,7 @@ $data=<<EOF;
# be done in the build process.
#
# Hand optimized assembly code for the following routines
-#
+#
# bn_sqr_comba4
# bn_sqr_comba8
# bn_mul_comba4
@@ -225,10 +222,10 @@ $data=<<EOF;
#--------------------------------------------------------------------------
#
# Defines to be used in the assembly code.
-#
+#
#.set r0,0 # we use it as storage for value of 0
#.set SP,1 # preserved
-#.set RTOC,2 # preserved
+#.set RTOC,2 # preserved
#.set r3,3 # 1st argument/return value
#.set r4,4 # 2nd argument/volatile register
#.set r5,5 # 3rd argument/volatile register
@@ -246,7 +243,7 @@ $data=<<EOF;
# the first . i.e. for example change ".bn_sqr_comba4"
# to "bn_sqr_comba4". This should be automatically done
# in the build.
-
+
.globl .bn_sqr_comba4
.globl .bn_sqr_comba8
.globl .bn_mul_comba4
@@ -257,9 +254,9 @@ $data=<<EOF;
.globl .bn_sqr_words
.globl .bn_mul_words
.globl .bn_mul_add_words
-
+
# .text section
-
+
.machine "any"
#
@@ -278,8 +275,8 @@ $data=<<EOF;
# r3 contains r
# r4 contains a
#
-# Freely use registers r5,r6,r7,r8,r9,r10,r11 as follows:
-#
+# Freely use registers r5,r6,r7,r8,r9,r10,r11 as follows:
+#
# r5,r6 are the two BN_ULONGs being multiplied.
# r7,r8 are the results of the 32x32 giving 64 bit multiply.
# r9,r10, r11 are the equivalents of c1,c2, c3.
@@ -288,10 +285,10 @@ $data=<<EOF;
#
xor r0,r0,r0 # set r0 = 0. Used in the addze
# instructions below
-
+
#sqr_add_c(a,0,c1,c2,c3)
- $LD r5,`0*$BNSZ`(r4)
- $UMULL r9,r5,r5
+ $LD r5,`0*$BNSZ`(r4)
+ $UMULL r9,r5,r5
$UMULH r10,r5,r5 #in first iteration. No need
#to add since c1=c2=c3=0.
# Note c3(r11) is NOT set to 0
@@ -299,20 +296,20 @@ $data=<<EOF;
$ST r9,`0*$BNSZ`(r3) # r[0]=c1;
# sqr_add_c2(a,1,0,c2,c3,c1);
- $LD r6,`1*$BNSZ`(r4)
+ $LD r6,`1*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
-
+
addc r7,r7,r7 # compute (r7,r8)=2*(r7,r8)
adde r8,r8,r8
addze r9,r0 # catch carry if any.
- # r9= r0(=0) and carry
-
+ # r9= r0(=0) and carry
+
addc r10,r7,r10 # now add to temp result.
- addze r11,r8 # r8 added to r11 which is 0
+ addze r11,r8 # r8 added to r11 which is 0
addze r9,r9
-
- $ST r10,`1*$BNSZ`(r3) #r[1]=c2;
+
+ $ST r10,`1*$BNSZ`(r3) #r[1]=c2;
#sqr_add_c(a,1,c3,c1,c2)
$UMULL r7,r6,r6
$UMULH r8,r6,r6
@@ -323,23 +320,23 @@ $data=<<EOF;
$LD r6,`2*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
-
+
addc r7,r7,r7
adde r8,r8,r8
addze r10,r10
-
+
addc r11,r7,r11
adde r9,r8,r9
addze r10,r10
- $ST r11,`2*$BNSZ`(r3) #r[2]=c3
+ $ST r11,`2*$BNSZ`(r3) #r[2]=c3
#sqr_add_c2(a,3,0,c1,c2,c3);
- $LD r6,`3*$BNSZ`(r4)
+ $LD r6,`3*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r7,r7,r7
adde r8,r8,r8
addze r11,r0
-
+
addc r9,r7,r9
adde r10,r8,r10
addze r11,r11
@@ -348,7 +345,7 @@ $data=<<EOF;
$LD r6,`2*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
-
+
addc r7,r7,r7
adde r8,r8,r8
addze r11,r11
@@ -363,31 +360,31 @@ $data=<<EOF;
adde r11,r8,r11
addze r9,r0
#sqr_add_c2(a,3,1,c2,c3,c1);
- $LD r6,`3*$BNSZ`(r4)
+ $LD r6,`3*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r7,r7,r7
adde r8,r8,r8
addze r9,r9
-
+
addc r10,r7,r10
adde r11,r8,r11
addze r9,r9
$ST r10,`4*$BNSZ`(r3) #r[4]=c2
#sqr_add_c2(a,3,2,c3,c1,c2);
- $LD r5,`2*$BNSZ`(r4)
+ $LD r5,`2*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
addc r7,r7,r7
adde r8,r8,r8
addze r10,r0
-
+
addc r11,r7,r11
adde r9,r8,r9
addze r10,r10
$ST r11,`5*$BNSZ`(r3) #r[5] = c3
#sqr_add_c(a,3,c1,c2,c3);
- $UMULL r7,r6,r6
+ $UMULL r7,r6,r6
$UMULH r8,r6,r6
addc r9,r7,r9
adde r10,r8,r10
@@ -406,7 +403,7 @@ $data=<<EOF;
# for the gcc compiler. This should be automatically
# done in the build
#
-
+
.align 4
.bn_sqr_comba8:
#
@@ -418,15 +415,15 @@ $data=<<EOF;
# r3 contains r
# r4 contains a
#
-# Freely use registers r5,r6,r7,r8,r9,r10,r11 as follows:
-#
+# Freely use registers r5,r6,r7,r8,r9,r10,r11 as follows:
+#
# r5,r6 are the two BN_ULONGs being multiplied.
# r7,r8 are the results of the 32x32 giving 64 bit multiply.
# r9,r10, r11 are the equivalents of c1,c2, c3.
#
# Possible optimization of loading all 8 longs of a into registers
# doesn't provide any speedup
-#
+#
xor r0,r0,r0 #set r0 = 0.Used in addze
#instructions below.
@@ -439,18 +436,18 @@ $data=<<EOF;
#sqr_add_c2(a,1,0,c2,c3,c1);
$LD r6,`1*$BNSZ`(r4)
$UMULL r7,r5,r6
- $UMULH r8,r5,r6
-
+ $UMULH r8,r5,r6
+
addc r10,r7,r10 #add the two register number
adde r11,r8,r0 # (r8,r7) to the three register
addze r9,r0 # number (r9,r11,r10).NOTE:r0=0
-
+
addc r10,r7,r10 #add the two register number
adde r11,r8,r11 # (r8,r7) to the three register
addze r9,r9 # number (r9,r11,r10).
-
+
$ST r10,`1*$BNSZ`(r3) # r[1]=c2
-
+
#sqr_add_c(a,1,c3,c1,c2);
$UMULL r7,r6,r6
$UMULH r8,r6,r6
@@ -461,25 +458,25 @@ $data=<<EOF;
$LD r6,`2*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
-
+
addc r11,r7,r11
adde r9,r8,r9
addze r10,r10
-
+
addc r11,r7,r11
adde r9,r8,r9
addze r10,r10
-
+
$ST r11,`2*$BNSZ`(r3) #r[2]=c3
#sqr_add_c2(a,3,0,c1,c2,c3);
$LD r6,`3*$BNSZ`(r4) #r6 = a[3]. r5 is already a[0].
$UMULL r7,r5,r6
$UMULH r8,r5,r6
-
+
addc r9,r7,r9
adde r10,r8,r10
addze r11,r0
-
+
addc r9,r7,r9
adde r10,r8,r10
addze r11,r11
@@ -488,20 +485,20 @@ $data=<<EOF;
$LD r6,`2*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
-
+
addc r9,r7,r9
adde r10,r8,r10
addze r11,r11
-
+
addc r9,r7,r9
adde r10,r8,r10
addze r11,r11
-
+
$ST r9,`3*$BNSZ`(r3) #r[3]=c1;
#sqr_add_c(a,2,c2,c3,c1);
$UMULL r7,r6,r6
$UMULH r8,r6,r6
-
+
addc r10,r7,r10
adde r11,r8,r11
addze r9,r0
@@ -509,11 +506,11 @@ $data=<<EOF;
$LD r6,`3*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
-
+
addc r10,r7,r10
adde r11,r8,r11
addze r9,r9
-
+
addc r10,r7,r10
adde r11,r8,r11
addze r9,r9
@@ -522,11 +519,11 @@ $data=<<EOF;
$LD r6,`4*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
-
+
addc r10,r7,r10
adde r11,r8,r11
addze r9,r9
-
+
addc r10,r7,r10
adde r11,r8,r11
addze r9,r9
@@ -535,11 +532,11 @@ $data=<<EOF;
$LD r6,`5*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
-
+
addc r11,r7,r11
adde r9,r8,r9
addze r10,r0
-
+
addc r11,r7,r11
adde r9,r8,r9
addze r10,r10
@@ -548,11 +545,11 @@ $data=<<EOF;
$LD r6,`4*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
-
+
addc r11,r7,r11
adde r9,r8,r9
addze r10,r10
-
+
addc r11,r7,r11
adde r9,r8,r9
addze r10,r10
@@ -561,11 +558,11 @@ $data=<<EOF;
$LD r6,`3*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
-
+
addc r11,r7,r11
adde r9,r8,r9
addze r10,r10
-
+
addc r11,r7,r11
adde r9,r8,r9
addze r10,r10
@@ -580,11 +577,11 @@ $data=<<EOF;
$LD r6,`4*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
-
+
addc r9,r7,r9
adde r10,r8,r10
addze r11,r11
-
+
addc r9,r7,r9
adde r10,r8,r10
addze r11,r11
@@ -593,11 +590,11 @@ $data=<<EOF;
$LD r6,`5*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
-
+
addc r9,r7,r9
adde r10,r8,r10
addze r11,r11
-
+
addc r9,r7,r9
adde r10,r8,r10
addze r11,r11
@@ -617,7 +614,7 @@ $data=<<EOF;
$LD r6,`7*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
-
+
addc r10,r7,r10
adde r11,r8,r11
addze r9,r0
@@ -629,7 +626,7 @@ $data=<<EOF;
$LD r6,`6*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
-
+
addc r10,r7,r10
adde r11,r8,r11
addze r9,r9
@@ -652,7 +649,7 @@ $data=<<EOF;
$LD r6,`4*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
-
+
addc r10,r7,r10
adde r11,r8,r11
addze r9,r9
@@ -684,7 +681,7 @@ $data=<<EOF;
addc r11,r7,r11
adde r9,r8,r9
addze r10,r10
-
+
addc r11,r7,r11
adde r9,r8,r9
addze r10,r10
@@ -704,7 +701,7 @@ $data=<<EOF;
$LD r5,`2*$BNSZ`(r4)
$UMULL r7,r5,r6
$UMULH r8,r5,r6
-
+
addc r9,r7,r9
adde r10,r8,r10
addze r11,r0
@@ -801,7 +798,7 @@ $data=<<EOF;
adde r10,r8,r10
addze r11,r11
$ST r9,`12*$BNSZ`(r3) #r[12]=c1;
-
+
#sqr_add_c2(a,7,6,c2,c3,c1)
$LD r5,`6*$BNSZ`(r4)
$UMULL r7,r5,r6
@@ -850,21 +847,21 @@ $data=<<EOF;
#
xor r0,r0,r0 #r0=0. Used in addze below.
#mul_add_c(a[0],b[0],c1,c2,c3);
- $LD r6,`0*$BNSZ`(r4)
- $LD r7,`0*$BNSZ`(r5)
- $UMULL r10,r6,r7
- $UMULH r11,r6,r7
+ $LD r6,`0*$BNSZ`(r4)
+ $LD r7,`0*$BNSZ`(r5)
+ $UMULL r10,r6,r7
+ $UMULH r11,r6,r7
$ST r10,`0*$BNSZ`(r3) #r[0]=c1
#mul_add_c(a[0],b[1],c2,c3,c1);
- $LD r7,`1*$BNSZ`(r5)
+ $LD r7,`1*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r8,r11
adde r12,r9,r0
addze r10,r0
#mul_add_c(a[1],b[0],c2,c3,c1);
- $LD r6, `1*$BNSZ`(r4)
- $LD r7, `0*$BNSZ`(r5)
+ $LD r6, `1*$BNSZ`(r4)
+ $LD r7, `0*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r8,r11
@@ -872,23 +869,23 @@ $data=<<EOF;
addze r10,r10
$ST r11,`1*$BNSZ`(r3) #r[1]=c2
#mul_add_c(a[2],b[0],c3,c1,c2);
- $LD r6,`2*$BNSZ`(r4)
+ $LD r6,`2*$BNSZ`(r4)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r8,r12
adde r10,r9,r10
addze r11,r0
#mul_add_c(a[1],b[1],c3,c1,c2);
- $LD r6,`1*$BNSZ`(r4)
- $LD r7,`1*$BNSZ`(r5)
+ $LD r6,`1*$BNSZ`(r4)
+ $LD r7,`1*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r8,r12
adde r10,r9,r10
addze r11,r11
#mul_add_c(a[0],b[2],c3,c1,c2);
- $LD r6,`0*$BNSZ`(r4)
- $LD r7,`2*$BNSZ`(r5)
+ $LD r6,`0*$BNSZ`(r4)
+ $LD r7,`2*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r8,r12
@@ -896,7 +893,7 @@ $data=<<EOF;
addze r11,r11
$ST r12,`2*$BNSZ`(r3) #r[2]=c3
#mul_add_c(a[0],b[3],c1,c2,c3);
- $LD r7,`3*$BNSZ`(r5)
+ $LD r7,`3*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r10,r8,r10
@@ -928,7 +925,7 @@ $data=<<EOF;
addze r12,r12
$ST r10,`3*$BNSZ`(r3) #r[3]=c1
#mul_add_c(a[3],b[1],c2,c3,c1);
- $LD r7,`1*$BNSZ`(r5)
+ $LD r7,`1*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r11,r8,r11
@@ -952,7 +949,7 @@ $data=<<EOF;
addze r10,r10
$ST r11,`4*$BNSZ`(r3) #r[4]=c2
#mul_add_c(a[2],b[3],c3,c1,c2);
- $LD r6,`2*$BNSZ`(r4)
+ $LD r6,`2*$BNSZ`(r4)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r8,r12
@@ -968,7 +965,7 @@ $data=<<EOF;
addze r11,r11
$ST r12,`5*$BNSZ`(r3) #r[5]=c3
#mul_add_c(a[3],b[3],c1,c2,c3);
- $LD r7,`3*$BNSZ`(r5)
+ $LD r7,`3*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r10,r8,r10
@@ -988,7 +985,7 @@ $data=<<EOF;
# for the gcc compiler. This should be automatically
# done in the build
#
-
+
.align 4
.bn_mul_comba8:
#
@@ -1003,7 +1000,7 @@ $data=<<EOF;
# r10, r11, r12 are the equivalents of c1, c2, and c3.
#
xor r0,r0,r0 #r0=0. Used in addze below.
-
+
#mul_add_c(a[0],b[0],c1,c2,c3);
$LD r6,`0*$BNSZ`(r4) #a[0]
$LD r7,`0*$BNSZ`(r5) #b[0]
@@ -1065,7 +1062,7 @@ $data=<<EOF;
addc r10,r10,r8
adde r11,r11,r9
addze r12,r12
-
+
#mul_add_c(a[2],b[1],c1,c2,c3);
$LD r6,`2*$BNSZ`(r4)
$LD r7,`1*$BNSZ`(r5)
@@ -1131,7 +1128,7 @@ $data=<<EOF;
adde r10,r10,r9
addze r11,r0
#mul_add_c(a[1],b[4],c3,c1,c2);
- $LD r6,`1*$BNSZ`(r4)
+ $LD r6,`1*$BNSZ`(r4)
$LD r7,`4*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
@@ -1139,7 +1136,7 @@ $data=<<EOF;
adde r10,r10,r9
addze r11,r11
#mul_add_c(a[2],b[3],c3,c1,c2);
- $LD r6,`2*$BNSZ`(r4)
+ $LD r6,`2*$BNSZ`(r4)
$LD r7,`3*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
@@ -1147,7 +1144,7 @@ $data=<<EOF;
adde r10,r10,r9
addze r11,r11
#mul_add_c(a[3],b[2],c3,c1,c2);
- $LD r6,`3*$BNSZ`(r4)
+ $LD r6,`3*$BNSZ`(r4)
$LD r7,`2*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
@@ -1155,7 +1152,7 @@ $data=<<EOF;
adde r10,r10,r9
addze r11,r11
#mul_add_c(a[4],b[1],c3,c1,c2);
- $LD r6,`4*$BNSZ`(r4)
+ $LD r6,`4*$BNSZ`(r4)
$LD r7,`1*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
@@ -1163,7 +1160,7 @@ $data=<<EOF;
adde r10,r10,r9
addze r11,r11
#mul_add_c(a[5],b[0],c3,c1,c2);
- $LD r6,`5*$BNSZ`(r4)
+ $LD r6,`5*$BNSZ`(r4)
$LD r7,`0*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
@@ -1555,7 +1552,7 @@ $data=<<EOF;
addi r3,r3,-$BNSZ
addi r5,r5,-$BNSZ
mtctr r6
-Lppcasm_sub_mainloop:
+Lppcasm_sub_mainloop:
$LDU r7,$BNSZ(r4)
$LDU r8,$BNSZ(r5)
subfe r6,r8,r7 # r6 = r7+carry bit + onescomplement(r8)
@@ -1563,7 +1560,7 @@ Lppcasm_sub_mainloop:
# is r7-r8 -1 as we need.
$STU r6,$BNSZ(r3)
bdnz Lppcasm_sub_mainloop
-Lppcasm_sub_adios:
+Lppcasm_sub_adios:
subfze r3,r0 # if carry bit is set then r3 = 0 else -1
andi. r3,r3,1 # keep only last bit.
blr
@@ -1604,13 +1601,13 @@ Lppcasm_sub_adios:
addi r3,r3,-$BNSZ
addi r5,r5,-$BNSZ
mtctr r6
-Lppcasm_add_mainloop:
+Lppcasm_add_mainloop:
$LDU r7,$BNSZ(r4)
$LDU r8,$BNSZ(r5)
adde r8,r7,r8
$STU r8,$BNSZ(r3)
bdnz Lppcasm_add_mainloop
-Lppcasm_add_adios:
+Lppcasm_add_adios:
addze r3,r0 #return carry bit.
blr
.long 0
@@ -1633,11 +1630,11 @@ Lppcasm_add_adios:
# the PPC instruction to count leading zeros instead
# of call to num_bits_word. Since this was compiled
# only at level -O2 we can possibly squeeze it more?
-#
+#
# r3 = h
# r4 = l
# r5 = d
-
+
$UCMPI 0,r5,0 # compare r5 and 0
bne Lppcasm_div1 # proceed if d!=0
li r3,-1 # d=0 return -1
@@ -1653,7 +1650,7 @@ Lppcasm_div1:
Lppcasm_div2:
$UCMP 0,r3,r5 #h>=d?
blt Lppcasm_div3 #goto Lppcasm_div3 if not
- subf r3,r5,r3 #h-=d ;
+ subf r3,r5,r3 #h-=d ;
Lppcasm_div3: #r7 = BN_BITS2-i. so r7=i
cmpi 0,0,r7,0 # is (i == 0)?
beq Lppcasm_div4
@@ -1668,7 +1665,7 @@ Lppcasm_div4:
# as it saves registers.
li r6,2 #r6=2
mtctr r6 #counter will be in count.
-Lppcasm_divouterloop:
+Lppcasm_divouterloop:
$SHRI r8,r3,`$BITS/2` #r8 = (h>>BN_BITS4)
$SHRI r11,r4,`$BITS/2` #r11= (l&BN_MASK2h)>>BN_BITS4
# compute here for innerloop.
@@ -1676,7 +1673,7 @@ Lppcasm_divouterloop:
bne Lppcasm_div5 # goto Lppcasm_div5 if not
li r8,-1
- $CLRU r8,r8,`$BITS/2` #q = BN_MASK2l
+ $CLRU r8,r8,`$BITS/2` #q = BN_MASK2l
b Lppcasm_div6
Lppcasm_div5:
$UDIV r8,r3,r9 #q = h/dh
@@ -1684,7 +1681,7 @@ Lppcasm_div6:
$UMULL r12,r9,r8 #th = q*dh
$CLRU r10,r5,`$BITS/2` #r10=dl
$UMULL r6,r8,r10 #tl = q*dl
-
+
Lppcasm_divinnerloop:
subf r10,r12,r3 #t = h -th
$SHRI r7,r10,`$BITS/2` #r7= (t &BN_MASK2H), sort of...
@@ -1761,7 +1758,7 @@ Lppcasm_div9:
addi r4,r4,-$BNSZ
addi r3,r3,-$BNSZ
mtctr r5
-Lppcasm_sqr_mainloop:
+Lppcasm_sqr_mainloop:
#sqr(r[0],r[1],a[0]);
$LDU r6,$BNSZ(r4)
$UMULL r7,r6,r6
@@ -1769,7 +1766,7 @@ Lppcasm_sqr_mainloop:
$STU r7,$BNSZ(r3)
$STU r8,$BNSZ(r3)
bdnz Lppcasm_sqr_mainloop
-Lppcasm_sqr_adios:
+Lppcasm_sqr_adios:
blr
.long 0
.byte 0,12,0x14,0,0,0,3,0
@@ -1783,7 +1780,7 @@ Lppcasm_sqr_adios:
# done in the build
#
-.align 4
+.align 4
.bn_mul_words:
#
# BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
@@ -1797,7 +1794,7 @@ Lppcasm_sqr_adios:
rlwinm. r7,r5,30,2,31 # num >> 2
beq Lppcasm_mw_REM
mtctr r7
-Lppcasm_mw_LOOP:
+Lppcasm_mw_LOOP:
#mul(rp[0],ap[0],w,c1);
$LD r8,`0*$BNSZ`(r4)
$UMULL r9,r6,r8
@@ -1809,7 +1806,7 @@ Lppcasm_mw_LOOP:
#using adde.
$ST r9,`0*$BNSZ`(r3)
#mul(rp[1],ap[1],w,c1);
- $LD r8,`1*$BNSZ`(r4)
+ $LD r8,`1*$BNSZ`(r4)
$UMULL r11,r6,r8
$UMULH r12,r6,r8
adde r11,r11,r10
@@ -1830,7 +1827,7 @@ Lppcasm_mw_LOOP:
addze r12,r12 #this spin we collect carry into
#r12
$ST r11,`3*$BNSZ`(r3)
-
+
addi r3,r3,`4*$BNSZ`
addi r4,r4,`4*$BNSZ`
bdnz Lppcasm_mw_LOOP
@@ -1846,25 +1843,25 @@ Lppcasm_mw_REM:
addze r10,r10
$ST r9,`0*$BNSZ`(r3)
addi r12,r10,0
-
+
addi r5,r5,-1
cmpli 0,0,r5,0
beq Lppcasm_mw_OVER
-
+
#mul(rp[1],ap[1],w,c1);
- $LD r8,`1*$BNSZ`(r4)
+ $LD r8,`1*$BNSZ`(r4)
$UMULL r9,r6,r8
$UMULH r10,r6,r8
addc r9,r9,r12
addze r10,r10
$ST r9,`1*$BNSZ`(r3)
addi r12,r10,0
-
+
addi r5,r5,-1
cmpli 0,0,r5,0
beq Lppcasm_mw_OVER
-
+
#mul_add(rp[2],ap[2],w,c1);
$LD r8,`2*$BNSZ`(r4)
$UMULL r9,r6,r8
@@ -1873,14 +1870,14 @@ Lppcasm_mw_REM:
addze r10,r10
$ST r9,`2*$BNSZ`(r3)
addi r12,r10,0
-
-Lppcasm_mw_OVER:
+
+Lppcasm_mw_OVER:
addi r3,r12,0
blr
.long 0
.byte 0,12,0x14,0,0,0,4,0
.long 0
-.size bn_mul_words,.-bn_mul_words
+.size .bn_mul_words,.-.bn_mul_words
#
# NOTE: The following label name should be changed to
@@ -1902,11 +1899,11 @@ Lppcasm_mw_OVER:
# empirical evidence suggests that unrolled version performs best!!
#
xor r0,r0,r0 #r0 = 0
- xor r12,r12,r12 #r12 = 0 . used for carry
+ xor r12,r12,r12 #r12 = 0 . used for carry
rlwinm. r7,r5,30,2,31 # num >> 2
beq Lppcasm_maw_leftover # if (num < 4) go LPPCASM_maw_leftover
mtctr r7
-Lppcasm_maw_mainloop:
+Lppcasm_maw_mainloop:
#mul_add(rp[0],ap[0],w,c1);
$LD r8,`0*$BNSZ`(r4)
$LD r11,`0*$BNSZ`(r3)
@@ -1922,9 +1919,9 @@ Lppcasm_maw_mainloop:
#by multiply and will be collected
#in the next spin
$ST r9,`0*$BNSZ`(r3)
-
+
#mul_add(rp[1],ap[1],w,c1);
- $LD r8,`1*$BNSZ`(r4)
+ $LD r8,`1*$BNSZ`(r4)
$LD r9,`1*$BNSZ`(r3)
$UMULL r11,r6,r8
$UMULH r12,r6,r8
@@ -1933,7 +1930,7 @@ Lppcasm_maw_mainloop:
addc r11,r11,r9
#addze r12,r12
$ST r11,`1*$BNSZ`(r3)
-
+
#mul_add(rp[2],ap[2],w,c1);
$LD r8,`2*$BNSZ`(r4)
$UMULL r9,r6,r8
@@ -1944,7 +1941,7 @@ Lppcasm_maw_mainloop:
addc r9,r9,r11
#addze r10,r10
$ST r9,`2*$BNSZ`(r3)
-
+
#mul_add(rp[3],ap[3],w,c1);
$LD r8,`3*$BNSZ`(r4)
$UMULL r11,r6,r8
@@ -1958,7 +1955,7 @@ Lppcasm_maw_mainloop:
addi r3,r3,`4*$BNSZ`
addi r4,r4,`4*$BNSZ`
bdnz Lppcasm_maw_mainloop
-
+
Lppcasm_maw_leftover:
andi. r5,r5,0x3
beq Lppcasm_maw_adios
@@ -1975,10 +1972,10 @@ Lppcasm_maw_leftover:
addc r9,r9,r12
addze r12,r10
$ST r9,0(r3)
-
+
bdz Lppcasm_maw_adios
#mul_add(rp[1],ap[1],w,c1);
- $LDU r8,$BNSZ(r4)
+ $LDU r8,$BNSZ(r4)
$UMULL r9,r6,r8
$UMULH r10,r6,r8
$LDU r11,$BNSZ(r3)
@@ -1987,7 +1984,7 @@ Lppcasm_maw_leftover:
addc r9,r9,r12
addze r12,r10
$ST r9,0(r3)
-
+
bdz Lppcasm_maw_adios
#mul_add(rp[2],ap[2],w,c1);
$LDU r8,$BNSZ(r4)
@@ -1999,8 +1996,8 @@ Lppcasm_maw_leftover:
addc r9,r9,r12
addze r12,r10
$ST r9,0(r3)
-
-Lppcasm_maw_adios:
+
+Lppcasm_maw_adios:
addi r3,r12,0
blr
.long 0
diff --git a/deps/openssl/openssl/crypto/bn/asm/ppc64-mont.pl b/deps/openssl/openssl/crypto/bn/asm/ppc64-mont.pl
index 5d9f43aa5d..c41b620bc2 100644
--- a/deps/openssl/openssl/crypto/bn/asm/ppc64-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/ppc64-mont.pl
@@ -35,7 +35,7 @@
# key lengths. As it's obviously inappropriate as "best all-round"
# alternative, it has to be complemented with run-time CPU family
# detection. Oh! It should also be noted that unlike other PowerPC
-# implementation IALU ppc-mont.pl module performs *suboptimaly* on
+# implementation IALU ppc-mont.pl module performs *suboptimally* on
# >=1024-bit key lengths on Power 6. It should also be noted that
# *everything* said so far applies to 64-bit builds! As far as 32-bit
# application executed on 64-bit CPU goes, this module is likely to
@@ -1353,7 +1353,7 @@ $code.=<<___;
std $t3,-16($tp) ; tp[j-1]
std $t5,-8($tp) ; tp[j]
- add $carry,$carry,$ovf ; comsume upmost overflow
+ add $carry,$carry,$ovf ; consume upmost overflow
add $t6,$t6,$carry ; can not overflow
srdi $carry,$t6,16
add $t7,$t7,$carry
diff --git a/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl b/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl
index 0466e11a25..f1292cc75c 100755
--- a/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl
@@ -1,68 +1,30 @@
#! /usr/bin/env perl
# Copyright 2013-2018 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright (c) 2012, Intel Corporation. All Rights Reserved.
#
# Licensed under the OpenSSL license (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
# in the file LICENSE in the source distribution or at
# https://www.openssl.org/source/license.html
-
-
-##############################################################################
-# #
-# Copyright (c) 2012, Intel Corporation #
-# #
-# All rights reserved. #
-# #
-# Redistribution and use in source and binary forms, with or without #
-# modification, are permitted provided that the following conditions are #
-# met: #
-# #
-# * Redistributions of source code must retain the above copyright #
-# notice, this list of conditions and the following disclaimer. #
-# #
-# * Redistributions in binary form must reproduce the above copyright #
-# notice, this list of conditions and the following disclaimer in the #
-# documentation and/or other materials provided with the #
-# distribution. #
-# #
-# * Neither the name of the Intel Corporation nor the names of its #
-# contributors may be used to endorse or promote products derived from #
-# this software without specific prior written permission. #
-# #
-# #
-# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY #
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE #
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR #
-# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR #
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, #
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, #
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR #
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF #
-# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING #
-# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS #
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #
-# #
-##############################################################################
-# Developers and authors: #
-# Shay Gueron (1, 2), and Vlad Krasnov (1) #
-# (1) Intel Corporation, Israel Development Center, Haifa, Israel #
-# (2) University of Haifa, Israel #
-##############################################################################
-# Reference: #
-# [1] S. Gueron, V. Krasnov: "Software Implementation of Modular #
-# Exponentiation, Using Advanced Vector Instructions Architectures", #
-# F. Ozbudak and F. Rodriguez-Henriquez (Eds.): WAIFI 2012, LNCS 7369, #
-# pp. 119?135, 2012. Springer-Verlag Berlin Heidelberg 2012 #
-# [2] S. Gueron: "Efficient Software Implementations of Modular #
-# Exponentiation", Journal of Cryptographic Engineering 2:31-43 (2012). #
-# [3] S. Gueron, V. Krasnov: "Speeding up Big-numbers Squaring",IEEE #
-# Proceedings of 9th International Conference on Information Technology: #
-# New Generations (ITNG 2012), pp.821-823 (2012) #
-# [4] S. Gueron, V. Krasnov: "[PATCH] Efficient and side channel analysis #
-# resistant 1024-bit modular exponentiation, for optimizing RSA2048 #
-# on AVX2 capable x86_64 platforms", #
-# http://rt.openssl.org/Ticket/Display.html?id=2850&user=guest&pass=guest#
-##############################################################################
+#
+# Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1)
+# (1) Intel Corporation, Israel Development Center, Haifa, Israel
+# (2) University of Haifa, Israel
+#
+# References:
+# [1] S. Gueron, V. Krasnov: "Software Implementation of Modular
+# Exponentiation, Using Advanced Vector Instructions Architectures",
+# F. Ozbudak and F. Rodriguez-Henriquez (Eds.): WAIFI 2012, LNCS 7369,
+# pp. 119?135, 2012. Springer-Verlag Berlin Heidelberg 2012
+# [2] S. Gueron: "Efficient Software Implementations of Modular
+# Exponentiation", Journal of Cryptographic Engineering 2:31-43 (2012).
+# [3] S. Gueron, V. Krasnov: "Speeding up Big-numbers Squaring",IEEE
+# Proceedings of 9th International Conference on Information Technology:
+# New Generations (ITNG 2012), pp.821-823 (2012)
+# [4] S. Gueron, V. Krasnov: "[PATCH] Efficient and side channel analysis
+# resistant 1024-bit modular exponentiation, for optimizing RSA2048
+# on AVX2 capable x86_64 platforms",
+# http://rt.openssl.org/Ticket/Display.html?id=2850&user=guest&pass=guest
#
# +13% improvement over original submission by <appro@openssl.org>
#
@@ -168,13 +130,21 @@ $code.=<<___;
.type rsaz_1024_sqr_avx2,\@function,5
.align 64
rsaz_1024_sqr_avx2: # 702 cycles, 14% faster than rsaz_1024_mul_avx2
+.cfi_startproc
lea (%rsp), %rax
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
vzeroupper
___
$code.=<<___ if ($win64);
@@ -193,6 +163,7 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
mov %rax,%rbp
+.cfi_def_cfa_register %rbp
mov %rdx, $np # reassigned argument
sub \$$FrameSize, %rsp
mov $np, $tmp
@@ -382,7 +353,7 @@ $code.=<<___;
vpaddq $TEMP1, $ACC1, $ACC1
vpmuludq 32*7-128($aap), $B2, $ACC2
vpbroadcastq 32*5-128($tpa), $B2
- vpaddq 32*11-448($tp1), $ACC2, $ACC2
+ vpaddq 32*11-448($tp1), $ACC2, $ACC2
vmovdqu $ACC6, 32*6-192($tp0)
vmovdqu $ACC7, 32*7-192($tp0)
@@ -441,7 +412,7 @@ $code.=<<___;
vmovdqu $ACC7, 32*16-448($tp1)
lea 8($tp1), $tp1
- dec $i
+ dec $i
jnz .LOOP_SQR_1024
___
$ZERO = $ACC9;
@@ -786,7 +757,7 @@ $code.=<<___;
vpblendd \$3, $TEMP4, $TEMP5, $TEMP4
vpaddq $TEMP3, $ACC7, $ACC7
vpaddq $TEMP4, $ACC8, $ACC8
-
+
vpsrlq \$29, $ACC4, $TEMP1
vpand $AND_MASK, $ACC4, $ACC4
vpsrlq \$29, $ACC5, $TEMP2
@@ -825,8 +796,10 @@ $code.=<<___;
vzeroall
mov %rbp, %rax
+.cfi_def_cfa_register %rax
___
$code.=<<___ if ($win64);
+.Lsqr_1024_in_tail:
movaps -0xd8(%rax),%xmm6
movaps -0xc8(%rax),%xmm7
movaps -0xb8(%rax),%xmm8
@@ -840,14 +813,22 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
mov -48(%rax),%r15
+.cfi_restore %r15
mov -40(%rax),%r14
+.cfi_restore %r14
mov -32(%rax),%r13
+.cfi_restore %r13
mov -24(%rax),%r12
+.cfi_restore %r12
mov -16(%rax),%rbp
+.cfi_restore %rbp
mov -8(%rax),%rbx
+.cfi_restore %rbx
lea (%rax),%rsp # restore %rsp
+.cfi_def_cfa_register %rsp
.Lsqr_1024_epilogue:
ret
+.cfi_endproc
.size rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2
___
}
@@ -900,13 +881,21 @@ $code.=<<___;
.type rsaz_1024_mul_avx2,\@function,5
.align 64
rsaz_1024_mul_avx2:
+.cfi_startproc
lea (%rsp), %rax
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
___
$code.=<<___ if ($win64);
vzeroupper
@@ -925,6 +914,7 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
mov %rax,%rbp
+.cfi_def_cfa_register %rbp
vzeroall
mov %rdx, $bp # reassigned argument
sub \$64,%rsp
@@ -1450,15 +1440,17 @@ $code.=<<___;
vpaddq $TEMP4, $ACC8, $ACC8
vmovdqu $ACC4, 128-128($rp)
- vmovdqu $ACC5, 160-128($rp)
+ vmovdqu $ACC5, 160-128($rp)
vmovdqu $ACC6, 192-128($rp)
vmovdqu $ACC7, 224-128($rp)
vmovdqu $ACC8, 256-128($rp)
vzeroupper
mov %rbp, %rax
+.cfi_def_cfa_register %rax
___
$code.=<<___ if ($win64);
+.Lmul_1024_in_tail:
movaps -0xd8(%rax),%xmm6
movaps -0xc8(%rax),%xmm7
movaps -0xb8(%rax),%xmm8
@@ -1472,14 +1464,22 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
mov -48(%rax),%r15
+.cfi_restore %r15
mov -40(%rax),%r14
+.cfi_restore %r14
mov -32(%rax),%r13
+.cfi_restore %r13
mov -24(%rax),%r12
+.cfi_restore %r12
mov -16(%rax),%rbp
+.cfi_restore %rbp
mov -8(%rax),%rbx
+.cfi_restore %rbx
lea (%rax),%rsp # restore %rsp
+.cfi_def_cfa_register %rsp
.Lmul_1024_epilogue:
ret
+.cfi_endproc
.size rsaz_1024_mul_avx2,.-rsaz_1024_mul_avx2
___
}
@@ -1598,8 +1598,10 @@ rsaz_1024_scatter5_avx2:
.type rsaz_1024_gather5_avx2,\@abi-omnipotent
.align 32
rsaz_1024_gather5_avx2:
+.cfi_startproc
vzeroupper
mov %rsp,%r11
+.cfi_def_cfa_register %r11
___
$code.=<<___ if ($win64);
lea -0x88(%rsp),%rax
@@ -1737,11 +1739,13 @@ $code.=<<___ if ($win64);
movaps -0x38(%r11),%xmm13
movaps -0x28(%r11),%xmm14
movaps -0x18(%r11),%xmm15
-.LSEH_end_rsaz_1024_gather5:
___
$code.=<<___;
lea (%r11),%rsp
+.cfi_def_cfa_register %rsp
ret
+.cfi_endproc
+.LSEH_end_rsaz_1024_gather5:
.size rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2
___
}
@@ -1814,14 +1818,17 @@ rsaz_se_handler:
cmp %r10,%rbx # context->Rip<prologue label
jb .Lcommon_seh_tail
- mov 152($context),%rax # pull context->Rsp
-
mov 4(%r11),%r10d # HandlerData[1]
lea (%rsi,%r10),%r10 # epilogue label
cmp %r10,%rbx # context->Rip>=epilogue label
jae .Lcommon_seh_tail
- mov 160($context),%rax # pull context->Rbp
+ mov 160($context),%rbp # pull context->Rbp
+
+ mov 8(%r11),%r10d # HandlerData[2]
+ lea (%rsi,%r10),%r10 # "in tail" label
+ cmp %r10,%rbx # context->Rip>="in tail" label
+ cmovc %rbp,%rax
mov -48(%rax),%r15
mov -40(%rax),%r14
@@ -1899,11 +1906,13 @@ rsaz_se_handler:
.LSEH_info_rsaz_1024_sqr_avx2:
.byte 9,0,0,0
.rva rsaz_se_handler
- .rva .Lsqr_1024_body,.Lsqr_1024_epilogue
+ .rva .Lsqr_1024_body,.Lsqr_1024_epilogue,.Lsqr_1024_in_tail
+ .long 0
.LSEH_info_rsaz_1024_mul_avx2:
.byte 9,0,0,0
.rva rsaz_se_handler
- .rva .Lmul_1024_body,.Lmul_1024_epilogue
+ .rva .Lmul_1024_body,.Lmul_1024_epilogue,.Lmul_1024_in_tail
+ .long 0
.LSEH_info_rsaz_1024_gather5:
.byte 0x01,0x36,0x17,0x0b
.byte 0x36,0xf8,0x09,0x00 # vmovaps 0x90(rsp),xmm15
diff --git a/deps/openssl/openssl/crypto/bn/asm/rsaz-x86_64.pl b/deps/openssl/openssl/crypto/bn/asm/rsaz-x86_64.pl
index 6f3b664f7a..b1797b649f 100755
--- a/deps/openssl/openssl/crypto/bn/asm/rsaz-x86_64.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/rsaz-x86_64.pl
@@ -1,68 +1,29 @@
#! /usr/bin/env perl
# Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright (c) 2012, Intel Corporation. All Rights Reserved.
#
# Licensed under the OpenSSL license (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
# in the file LICENSE in the source distribution or at
# https://www.openssl.org/source/license.html
-
-
-##############################################################################
-# #
-# Copyright (c) 2012, Intel Corporation #
-# #
-# All rights reserved. #
-# #
-# Redistribution and use in source and binary forms, with or without #
-# modification, are permitted provided that the following conditions are #
-# met: #
-# #
-# * Redistributions of source code must retain the above copyright #
-# notice, this list of conditions and the following disclaimer. #
-# #
-# * Redistributions in binary form must reproduce the above copyright #
-# notice, this list of conditions and the following disclaimer in the #
-# documentation and/or other materials provided with the #
-# distribution. #
-# #
-# * Neither the name of the Intel Corporation nor the names of its #
-# contributors may be used to endorse or promote products derived from #
-# this software without specific prior written permission. #
-# #
-# #
-# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY #
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE #
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR #
-# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR #
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, #
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, #
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR #
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF #
-# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING #
-# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS #
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #
-# #
-##############################################################################
-# Developers and authors: #
-# Shay Gueron (1, 2), and Vlad Krasnov (1) #
-# (1) Intel Architecture Group, Microprocessor and Chipset Development, #
-# Israel Development Center, Haifa, Israel #
-# (2) University of Haifa #
-##############################################################################
-# Reference: #
-# [1] S. Gueron, "Efficient Software Implementations of Modular #
-# Exponentiation", http://eprint.iacr.org/2011/239 #
-# [2] S. Gueron, V. Krasnov. "Speeding up Big-Numbers Squaring". #
-# IEEE Proceedings of 9th International Conference on Information #
-# Technology: New Generations (ITNG 2012), 821-823 (2012). #
-# [3] S. Gueron, Efficient Software Implementations of Modular Exponentiation#
-# Journal of Cryptographic Engineering 2:31-43 (2012). #
-# [4] S. Gueron, V. Krasnov: "[PATCH] Efficient and side channel analysis #
-# resistant 512-bit and 1024-bit modular exponentiation for optimizing #
-# RSA1024 and RSA2048 on x86_64 platforms", #
-# http://rt.openssl.org/Ticket/Display.html?id=2582&user=guest&pass=guest#
-##############################################################################
-
+#
+# Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1)
+# (1) Intel Corporation, Israel Development Center, Haifa, Israel
+# (2) University of Haifa, Israel
+#
+# References:
+# [1] S. Gueron, "Efficient Software Implementations of Modular
+# Exponentiation", http://eprint.iacr.org/2011/239
+# [2] S. Gueron, V. Krasnov. "Speeding up Big-Numbers Squaring".
+# IEEE Proceedings of 9th International Conference on Information
+# Technology: New Generations (ITNG 2012), 821-823 (2012).
+# [3] S. Gueron, Efficient Software Implementations of Modular Exponentiation
+# Journal of Cryptographic Engineering 2:31-43 (2012).
+# [4] S. Gueron, V. Krasnov: "[PATCH] Efficient and side channel analysis
+# resistant 512-bit and 1024-bit modular exponentiation for optimizing
+# RSA1024 and RSA2048 on x86_64 platforms",
+# http://rt.openssl.org/Ticket/Display.html?id=2582&user=guest&pass=guest
+#
# While original submission covers 512- and 1024-bit exponentiation,
# this module is limited to 512-bit version only (and as such
# accelerates RSA1024 sign). This is because improvement for longer
@@ -138,14 +99,22 @@ $code.=<<___;
.type rsaz_512_sqr,\@function,5
.align 32
rsaz_512_sqr: # 25-29% faster than rsaz_512_mul
+.cfi_startproc
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
subq \$128+24, %rsp
+.cfi_adjust_cfa_offset 128+24
.Lsqr_body:
movq $mod, %rbp # common argument
movq ($inp), %rdx
@@ -282,9 +251,9 @@ $code.=<<___;
movq %r9, 16(%rsp)
movq %r10, 24(%rsp)
shrq \$63, %rbx
-
+
#third iteration
- movq 16($inp), %r9
+ movq 16($inp), %r9
movq 24($inp), %rax
mulq %r9
addq %rax, %r12
@@ -532,7 +501,7 @@ $code.=<<___;
movl $times,128+8(%rsp)
movq $out, %xmm0 # off-load
movq %rbp, %xmm1 # off-load
-#first iteration
+#first iteration
mulx %rax, %r8, %r9
mulx 16($inp), %rcx, %r10
@@ -568,7 +537,7 @@ $code.=<<___;
mov %rax, (%rsp)
mov %r8, 8(%rsp)
-#second iteration
+#second iteration
mulx 16($inp), %rax, %rbx
adox %rax, %r10
adcx %rbx, %r11
@@ -607,8 +576,8 @@ $code.=<<___;
mov %r9, 16(%rsp)
.byte 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00 # mov %r10, 24(%rsp)
-
-#third iteration
+
+#third iteration
.byte 0xc4,0x62,0xc3,0xf6,0x8e,0x18,0x00,0x00,0x00 # mulx 24($inp), $out, %r9
adox $out, %r12
adcx %r9, %r13
@@ -643,8 +612,8 @@ $code.=<<___;
mov %r11, 32(%rsp)
.byte 0x4c,0x89,0xa4,0x24,0x28,0x00,0x00,0x00 # mov %r12, 40(%rsp)
-
-#fourth iteration
+
+#fourth iteration
.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x20,0x00,0x00,0x00 # mulx 32($inp), %rax, %rbx
adox %rax, %r14
adcx %rbx, %r15
@@ -676,8 +645,8 @@ $code.=<<___;
mov %r13, 48(%rsp)
mov %r14, 56(%rsp)
-
-#fifth iteration
+
+#fifth iteration
.byte 0xc4,0x62,0xc3,0xf6,0x9e,0x28,0x00,0x00,0x00 # mulx 40($inp), $out, %r11
adox $out, %r8
adcx %r11, %r9
@@ -704,8 +673,8 @@ $code.=<<___;
mov %r15, 64(%rsp)
mov %r8, 72(%rsp)
-
-#sixth iteration
+
+#sixth iteration
.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00 # mulx 48($inp), %rax, %rbx
adox %rax, %r10
adcx %rbx, %r11
@@ -800,15 +769,24 @@ ___
$code.=<<___;
leaq 128+24+48(%rsp), %rax
+.cfi_def_cfa %rax,8
movq -48(%rax), %r15
+.cfi_restore %r15
movq -40(%rax), %r14
+.cfi_restore %r14
movq -32(%rax), %r13
+.cfi_restore %r13
movq -24(%rax), %r12
+.cfi_restore %r12
movq -16(%rax), %rbp
+.cfi_restore %rbp
movq -8(%rax), %rbx
+.cfi_restore %rbx
leaq (%rax), %rsp
+.cfi_def_cfa_register %rsp
.Lsqr_epilogue:
ret
+.cfi_endproc
.size rsaz_512_sqr,.-rsaz_512_sqr
___
}
@@ -819,14 +797,22 @@ $code.=<<___;
.type rsaz_512_mul,\@function,5
.align 32
rsaz_512_mul:
+.cfi_startproc
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
subq \$128+24, %rsp
+.cfi_adjust_cfa_offset 128+24
.Lmul_body:
movq $out, %xmm0 # off-load arguments
movq $mod, %xmm1
@@ -896,15 +882,24 @@ $code.=<<___;
call __rsaz_512_subtract
leaq 128+24+48(%rsp), %rax
+.cfi_def_cfa %rax,8
movq -48(%rax), %r15
+.cfi_restore %r15
movq -40(%rax), %r14
+.cfi_restore %r14
movq -32(%rax), %r13
+.cfi_restore %r13
movq -24(%rax), %r12
+.cfi_restore %r12
movq -16(%rax), %rbp
+.cfi_restore %rbp
movq -8(%rax), %rbx
+.cfi_restore %rbx
leaq (%rax), %rsp
+.cfi_def_cfa_register %rsp
.Lmul_epilogue:
ret
+.cfi_endproc
.size rsaz_512_mul,.-rsaz_512_mul
___
}
@@ -915,14 +910,22 @@ $code.=<<___;
.type rsaz_512_mul_gather4,\@function,6
.align 32
rsaz_512_mul_gather4:
+.cfi_startproc
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
subq \$`128+24+($win64?0xb0:0)`, %rsp
+.cfi_adjust_cfa_offset `128+24+($win64?0xb0:0)`
___
$code.=<<___ if ($win64);
movaps %xmm6,0xa0(%rsp)
@@ -1048,7 +1051,7 @@ $code.=<<___;
movq 56($ap), %rax
movq %rdx, %r14
adcq \$0, %r14
-
+
mulq %rbx
addq %rax, %r14
movq ($ap), %rax
@@ -1150,7 +1153,7 @@ $code.=<<___;
movq ($ap), %rax
adcq \$0, %rdx
addq %r15, %r14
- movq %rdx, %r15
+ movq %rdx, %r15
adcq \$0, %r15
leaq 8(%rdi), %rdi
@@ -1212,7 +1215,7 @@ $code.=<<___ if ($addx);
mulx 48($ap), %rbx, %r14
adcx %rax, %r12
-
+
mulx 56($ap), %rax, %r15
adcx %rbx, %r13
adcx %rax, %r14
@@ -1348,15 +1351,24 @@ $code.=<<___ if ($win64);
lea 0xb0(%rax),%rax
___
$code.=<<___;
+.cfi_def_cfa %rax,8
movq -48(%rax), %r15
+.cfi_restore %r15
movq -40(%rax), %r14
+.cfi_restore %r14
movq -32(%rax), %r13
+.cfi_restore %r13
movq -24(%rax), %r12
+.cfi_restore %r12
movq -16(%rax), %rbp
+.cfi_restore %rbp
movq -8(%rax), %rbx
+.cfi_restore %rbx
leaq (%rax), %rsp
+.cfi_def_cfa_register %rsp
.Lmul_gather4_epilogue:
ret
+.cfi_endproc
.size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4
___
}
@@ -1367,15 +1379,23 @@ $code.=<<___;
.type rsaz_512_mul_scatter4,\@function,6
.align 32
rsaz_512_mul_scatter4:
+.cfi_startproc
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
mov $pwr, $pwr
subq \$128+24, %rsp
+.cfi_adjust_cfa_offset 128+24
.Lmul_scatter4_body:
leaq ($tbl,$pwr,8), $tbl
movq $out, %xmm0 # off-load arguments
@@ -1411,7 +1431,7 @@ $code.=<<___;
___
$code.=<<___ if ($addx);
jmp .Lmul_scatter_tail
-
+
.align 32
.Lmulx_scatter:
movq ($out), %rdx # pass b[0]
@@ -1458,15 +1478,24 @@ $code.=<<___;
movq %r15, 128*7($inp)
leaq 128+24+48(%rsp), %rax
+.cfi_def_cfa %rax,8
movq -48(%rax), %r15
+.cfi_restore %r15
movq -40(%rax), %r14
+.cfi_restore %r14
movq -32(%rax), %r13
+.cfi_restore %r13
movq -24(%rax), %r12
+.cfi_restore %r12
movq -16(%rax), %rbp
+.cfi_restore %rbp
movq -8(%rax), %rbx
+.cfi_restore %rbx
leaq (%rax), %rsp
+.cfi_def_cfa_register %rsp
.Lmul_scatter4_epilogue:
ret
+.cfi_endproc
.size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4
___
}
@@ -1477,14 +1506,22 @@ $code.=<<___;
.type rsaz_512_mul_by_one,\@function,4
.align 32
rsaz_512_mul_by_one:
+.cfi_startproc
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
subq \$128+24, %rsp
+.cfi_adjust_cfa_offset 128+24
.Lmul_by_one_body:
___
$code.=<<___ if ($addx);
@@ -1539,15 +1576,24 @@ $code.=<<___;
movq %r15, 56($out)
leaq 128+24+48(%rsp), %rax
+.cfi_def_cfa %rax,8
movq -48(%rax), %r15
+.cfi_restore %r15
movq -40(%rax), %r14
+.cfi_restore %r14
movq -32(%rax), %r13
+.cfi_restore %r13
movq -24(%rax), %r12
+.cfi_restore %r12
movq -16(%rax), %rbp
+.cfi_restore %rbp
movq -8(%rax), %rbx
+.cfi_restore %rbx
leaq (%rax), %rsp
+.cfi_def_cfa_register %rsp
.Lmul_by_one_epilogue:
ret
+.cfi_endproc
.size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one
___
}
@@ -1824,7 +1870,7 @@ __rsaz_512_mul:
movq 56($ap), %rax
movq %rdx, %r14
adcq \$0, %r14
-
+
mulq %rbx
addq %rax, %r14
movq ($ap), %rax
@@ -1901,7 +1947,7 @@ __rsaz_512_mul:
movq ($ap), %rax
adcq \$0, %rdx
addq %r15, %r14
- movq %rdx, %r15
+ movq %rdx, %r15
adcq \$0, %r15
leaq 8(%rdi), %rdi
diff --git a/deps/openssl/openssl/crypto/bn/asm/s390x-gf2m.pl b/deps/openssl/openssl/crypto/bn/asm/s390x-gf2m.pl
index cbd16f4214..06181bf9b9 100644
--- a/deps/openssl/openssl/crypto/bn/asm/s390x-gf2m.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/s390x-gf2m.pl
@@ -20,7 +20,7 @@
# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
# the time being... gcc 4.3 appeared to generate poor code, therefore
# the effort. And indeed, the module delivers 55%-90%(*) improvement
-# on haviest ECDSA verify and ECDH benchmarks for 163- and 571-bit
+# on heaviest ECDSA verify and ECDH benchmarks for 163- and 571-bit
# key lengths on z990, 30%-55%(*) - on z10, and 70%-110%(*) - on z196.
# This is for 64-bit build. In 32-bit "highgprs" case improvement is
# even higher, for example on z990 it was measured 80%-150%. ECDSA
@@ -198,7 +198,7 @@ $code.=<<___;
xgr $hi,@r[1]
xgr $lo,@r[0]
xgr $hi,@r[2]
- xgr $lo,@r[3]
+ xgr $lo,@r[3]
xgr $hi,@r[3]
xgr $lo,$hi
stg $hi,16($rp)
diff --git a/deps/openssl/openssl/crypto/bn/asm/s390x-mont.pl b/deps/openssl/openssl/crypto/bn/asm/s390x-mont.pl
index 66780cdf80..c2fc5adffe 100644
--- a/deps/openssl/openssl/crypto/bn/asm/s390x-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/s390x-mont.pl
@@ -8,7 +8,7 @@
# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
diff --git a/deps/openssl/openssl/crypto/bn/asm/sparct4-mont.pl b/deps/openssl/openssl/crypto/bn/asm/sparct4-mont.pl
index 4f339b2279..fcae9cfc5b 100755
--- a/deps/openssl/openssl/crypto/bn/asm/sparct4-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/sparct4-mont.pl
@@ -8,9 +8,9 @@
# ====================================================================
-# Written by David S. Miller <davem@devemloft.net> and Andy Polyakov
-# <appro@openssl.org>. The module is licensed under 2-clause BSD
-# license. November 2012. All rights reserved.
+# Written by David S. Miller and Andy Polyakov
+# The module is licensed under 2-clause BSD license.
+# November 2012. All rights reserved.
# ====================================================================
######################################################################
diff --git a/deps/openssl/openssl/crypto/bn/asm/sparcv8.S b/deps/openssl/openssl/crypto/bn/asm/sparcv8.S
index 9c31073b24..75d72eb92c 100644
--- a/deps/openssl/openssl/crypto/bn/asm/sparcv8.S
+++ b/deps/openssl/openssl/crypto/bn/asm/sparcv8.S
@@ -1,5 +1,5 @@
.ident "sparcv8.s, Version 1.4"
-.ident "SPARC v8 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
+.ident "SPARC v8 ISA artwork by Andy Polyakov <appro@openssl.org>"
/*
* ====================================================================
@@ -13,7 +13,7 @@
*/
/*
- * This is my modest contributon to OpenSSL project (see
+ * This is my modest contribution to OpenSSL project (see
* http://www.openssl.org/ for more information about it) and is
* a drop-in SuperSPARC ISA replacement for crypto/bn/bn_asm.c
* module. For updates see http://fy.chalmers.se/~appro/hpe/.
@@ -159,12 +159,12 @@ bn_mul_add_words:
*/
bn_mul_words:
cmp %o2,0
- bg,a .L_bn_mul_words_proceeed
+ bg,a .L_bn_mul_words_proceed
ld [%o1],%g2
retl
clr %o0
-.L_bn_mul_words_proceeed:
+.L_bn_mul_words_proceed:
andcc %o2,-4,%g0
bz .L_bn_mul_words_tail
clr %o5
@@ -251,12 +251,12 @@ bn_mul_words:
*/
bn_sqr_words:
cmp %o2,0
- bg,a .L_bn_sqr_words_proceeed
+ bg,a .L_bn_sqr_words_proceed
ld [%o1],%g2
retl
clr %o0
-.L_bn_sqr_words_proceeed:
+.L_bn_sqr_words_proceed:
andcc %o2,-4,%g0
bz .L_bn_sqr_words_tail
clr %o5
diff --git a/deps/openssl/openssl/crypto/bn/asm/sparcv8plus.S b/deps/openssl/openssl/crypto/bn/asm/sparcv8plus.S
index 714a136675..fe4699b2bd 100644
--- a/deps/openssl/openssl/crypto/bn/asm/sparcv8plus.S
+++ b/deps/openssl/openssl/crypto/bn/asm/sparcv8plus.S
@@ -1,5 +1,5 @@
.ident "sparcv8plus.s, Version 1.4"
-.ident "SPARC v9 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
+.ident "SPARC v9 ISA artwork by Andy Polyakov <appro@openssl.org>"
/*
* ====================================================================
@@ -13,7 +13,7 @@
*/
/*
- * This is my modest contributon to OpenSSL project (see
+ * This is my modest contribution to OpenSSL project (see
* http://www.openssl.org/ for more information about it) and is
* a drop-in UltraSPARC ISA replacement for crypto/bn/bn_asm.c
* module. For updates see http://fy.chalmers.se/~appro/hpe/.
@@ -144,10 +144,6 @@
* }
*/
-#ifdef OPENSSL_FIPSCANISTER
-#include <openssl/fipssyms.h>
-#endif
-
#if defined(__SUNPRO_C) && defined(__sparcv9)
/* They've said -xarch=v9 at command line */
.register %g2,#scratch
@@ -282,7 +278,7 @@ bn_mul_add_words:
*/
bn_mul_words:
sra %o2,%g0,%o2 ! signx %o2
- brgz,a %o2,.L_bn_mul_words_proceeed
+ brgz,a %o2,.L_bn_mul_words_proceed
lduw [%o1],%g2
retl
clr %o0
@@ -290,7 +286,7 @@ bn_mul_words:
nop
nop
-.L_bn_mul_words_proceeed:
+.L_bn_mul_words_proceed:
srl %o3,%g0,%o3 ! clruw %o3
andcc %o2,-4,%g0
bz,pn %icc,.L_bn_mul_words_tail
@@ -370,7 +366,7 @@ bn_mul_words:
*/
bn_sqr_words:
sra %o2,%g0,%o2 ! signx %o2
- brgz,a %o2,.L_bn_sqr_words_proceeed
+ brgz,a %o2,.L_bn_sqr_words_proceed
lduw [%o1],%g2
retl
clr %o0
@@ -378,7 +374,7 @@ bn_sqr_words:
nop
nop
-.L_bn_sqr_words_proceeed:
+.L_bn_sqr_words_proceed:
andcc %o2,-4,%g0
nop
bz,pn %icc,.L_bn_sqr_words_tail
diff --git a/deps/openssl/openssl/crypto/bn/asm/sparcv9-mont.pl b/deps/openssl/openssl/crypto/bn/asm/sparcv9-mont.pl
index 074f9df14b..b41903af98 100644
--- a/deps/openssl/openssl/crypto/bn/asm/sparcv9-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/sparcv9-mont.pl
@@ -8,7 +8,7 @@
# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
@@ -612,7 +612,7 @@ $code.=<<___;
add $tp,8,$tp
.type $fname,#function
.size $fname,(.-$fname)
-.asciz "Montgomery Multipltication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
+.asciz "Montgomery Multiplication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
.align 32
___
$code =~ s/\`([^\`]*)\`/eval($1)/gem;
diff --git a/deps/openssl/openssl/crypto/bn/asm/sparcv9a-mont.pl b/deps/openssl/openssl/crypto/bn/asm/sparcv9a-mont.pl
index 50b690653f..c8f759df9f 100755
--- a/deps/openssl/openssl/crypto/bn/asm/sparcv9a-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/sparcv9a-mont.pl
@@ -8,7 +8,7 @@
# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
@@ -865,7 +865,7 @@ $fname:
restore
.type $fname,#function
.size $fname,(.-$fname)
-.asciz "Montgomery Multipltication for UltraSPARC, CRYPTOGAMS by <appro\@openssl.org>"
+.asciz "Montgomery Multiplication for UltraSPARC, CRYPTOGAMS by <appro\@openssl.org>"
.align 32
___
diff --git a/deps/openssl/openssl/crypto/bn/asm/via-mont.pl b/deps/openssl/openssl/crypto/bn/asm/via-mont.pl
index 9d65a146a2..9cf717e841 100644
--- a/deps/openssl/openssl/crypto/bn/asm/via-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/via-mont.pl
@@ -8,7 +8,7 @@
#
# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
@@ -76,7 +76,7 @@
# dsa 1024 bits 0.001346s 0.001595s 742.7 627.0
# dsa 2048 bits 0.004745s 0.005582s 210.7 179.1
#
-# Conclusions:
+# Conclusions:
# - VIA SDK leaves a *lot* of room for improvement (which this
# implementation successfully fills:-);
# - 'rep montmul' gives up to >3x performance improvement depending on
@@ -91,7 +91,7 @@ require "x86asm.pl";
$output = pop;
open STDOUT,">$output";
-&asm_init($ARGV[0],"via-mont.pl");
+&asm_init($ARGV[0]);
# int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
$func="bn_mul_mont_padlock";
diff --git a/deps/openssl/openssl/crypto/bn/asm/vis3-mont.pl b/deps/openssl/openssl/crypto/bn/asm/vis3-mont.pl
index ba34b36a81..04833a0c87 100644
--- a/deps/openssl/openssl/crypto/bn/asm/vis3-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/vis3-mont.pl
@@ -16,7 +16,7 @@
# October 2012.
#
-# SPARCv9 VIS3 Montgomery multiplicaion procedure suitable for T3 and
+# SPARCv9 VIS3 Montgomery multiplication procedure suitable for T3 and
# onward. There are three new instructions used here: umulxhi,
# addxc[cc] and initializing store. On T3 RSA private key operations
# are 1.54/1.87/2.11/2.26 times faster for 512/1024/2048/4096-bit key
diff --git a/deps/openssl/openssl/crypto/bn/asm/x86-gf2m.pl b/deps/openssl/openssl/crypto/bn/asm/x86-gf2m.pl
index f464368733..d03efcc750 100644
--- a/deps/openssl/openssl/crypto/bn/asm/x86-gf2m.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/x86-gf2m.pl
@@ -46,7 +46,7 @@ require "x86asm.pl";
$output = pop;
open STDOUT,">$output";
-&asm_init($ARGV[0],$0,$x86only = $ARGV[$#ARGV] eq "386");
+&asm_init($ARGV[0],$x86only = $ARGV[$#ARGV] eq "386");
$sse2=0;
for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
@@ -152,7 +152,7 @@ $R="mm0";
&xor ($a4,$a2); # a2=a4^a2^a4
&mov (&DWP(5*4,"esp"),$a1); # a1^a4
&xor ($a4,$a1); # a1^a2^a4
- &sar (@i[1],31); # broardcast 30th bit
+ &sar (@i[1],31); # broadcast 30th bit
&and ($lo,$b);
&mov (&DWP(6*4,"esp"),$a2); # a2^a4
&and (@i[1],$b);
diff --git a/deps/openssl/openssl/crypto/bn/asm/x86-mont.pl b/deps/openssl/openssl/crypto/bn/asm/x86-mont.pl
index f1abcc5b4c..7ba2133ac9 100755
--- a/deps/openssl/openssl/crypto/bn/asm/x86-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/x86-mont.pl
@@ -8,7 +8,7 @@
# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
@@ -39,8 +39,8 @@ require "x86asm.pl";
$output = pop;
open STDOUT,">$output";
-
-&asm_init($ARGV[0],$0);
+
+&asm_init($ARGV[0]);
$sse2=0;
for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
@@ -78,7 +78,7 @@ $frame=32; # size of above frame rounded up to 16n
&lea ("ebp",&DWP(-$frame,"esp","edi",4)); # future alloca($frame+4*(num+2))
&neg ("edi");
- # minimize cache contention by arraning 2K window between stack
+ # minimize cache contention by arranging 2K window between stack
# pointer and ap argument [np is also position sensitive vector,
# but it's assumed to be near ap, as it's allocated at ~same
# time].
diff --git a/deps/openssl/openssl/crypto/bn/asm/x86_64-gcc.c b/deps/openssl/openssl/crypto/bn/asm/x86_64-gcc.c
index 621be33054..31839ba060 100644
--- a/deps/openssl/openssl/crypto/bn/asm/x86_64-gcc.c
+++ b/deps/openssl/openssl/crypto/bn/asm/x86_64-gcc.c
@@ -14,7 +14,7 @@
/*-
* x86_64 BIGNUM accelerator version 0.1, December 2002.
*
- * Implemented by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+ * Implemented by Andy Polyakov <appro@openssl.org> for the OpenSSL
* project.
*
* Rights for redistribution and usage in source and binary forms are
@@ -114,7 +114,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
BN_ULONG c1 = 0;
if (num <= 0)
- return (c1);
+ return c1;
while (num & ~3) {
mul_add(rp[0], ap[0], w, c1);
@@ -136,7 +136,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
return c1;
}
- return (c1);
+ return c1;
}
BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
@@ -144,7 +144,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
BN_ULONG c1 = 0;
if (num <= 0)
- return (c1);
+ return c1;
while (num & ~3) {
mul(rp[0], ap[0], w, c1);
@@ -164,7 +164,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
return c1;
mul(rp[2], ap[2], w, c1);
}
- return (c1);
+ return c1;
}
void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
@@ -264,7 +264,7 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
int c = 0;
if (n <= 0)
- return ((BN_ULONG)0);
+ return (BN_ULONG)0;
for (;;) {
t1 = a[0];
@@ -303,7 +303,7 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
b += 4;
r += 4;
}
- return (c);
+ return c;
}
# endif
diff --git a/deps/openssl/openssl/crypto/bn/asm/x86_64-gf2m.pl b/deps/openssl/openssl/crypto/bn/asm/x86_64-gf2m.pl
index d962f62033..0fd6e985d7 100644
--- a/deps/openssl/openssl/crypto/bn/asm/x86_64-gf2m.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/x86_64-gf2m.pl
@@ -54,7 +54,9 @@ $code.=<<___;
.type _mul_1x1,\@abi-omnipotent
.align 16
_mul_1x1:
+.cfi_startproc
sub \$128+8,%rsp
+.cfi_adjust_cfa_offset 128+8
mov \$-1,$a1
lea ($a,$a),$i0
shr \$3,$a1
@@ -66,7 +68,7 @@ _mul_1x1:
sar \$63,$i0 # broadcast 62nd bit
lea (,$a1,4),$a4
and $b,$a
- sar \$63,$i1 # boardcast 61st bit
+ sar \$63,$i1 # broadcast 61st bit
mov $a,$hi # $a is $lo
shl \$63,$lo
and $b,$i0
@@ -160,8 +162,10 @@ $code.=<<___;
xor $i1,$hi
add \$128+8,%rsp
+.cfi_adjust_cfa_offset -128-8
ret
.Lend_mul_1x1:
+.cfi_endproc
.size _mul_1x1,.-_mul_1x1
___
@@ -174,8 +178,10 @@ $code.=<<___;
.type bn_GF2m_mul_2x2,\@abi-omnipotent
.align 16
bn_GF2m_mul_2x2:
- mov OPENSSL_ia32cap_P(%rip),%rax
- bt \$33,%rax
+.cfi_startproc
+ mov %rsp,%rax
+ mov OPENSSL_ia32cap_P(%rip),%r10
+ bt \$33,%r10
jnc .Lvanilla_mul_2x2
movq $a1,%xmm0
@@ -210,6 +216,7 @@ $code.=<<___;
.align 16
.Lvanilla_mul_2x2:
lea -8*17(%rsp),%rsp
+.cfi_adjust_cfa_offset 8*17
___
$code.=<<___ if ($win64);
mov `8*17+40`(%rsp),$b0
@@ -218,10 +225,15 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
mov %r14,8*10(%rsp)
+.cfi_rel_offset %r14,8*10
mov %r13,8*11(%rsp)
+.cfi_rel_offset %r13,8*11
mov %r12,8*12(%rsp)
+.cfi_rel_offset %r12,8*12
mov %rbp,8*13(%rsp)
+.cfi_rel_offset %rbp,8*13
mov %rbx,8*14(%rsp)
+.cfi_rel_offset %rbx,8*14
.Lbody_mul_2x2:
mov $rp,32(%rsp) # save the arguments
mov $a1,40(%rsp)
@@ -269,10 +281,15 @@ $code.=<<___;
mov $lo,8(%rbp)
mov 8*10(%rsp),%r14
+.cfi_restore %r14
mov 8*11(%rsp),%r13
+.cfi_restore %r13
mov 8*12(%rsp),%r12
+.cfi_restore %r12
mov 8*13(%rsp),%rbp
+.cfi_restore %rbp
mov 8*14(%rsp),%rbx
+.cfi_restore %rbx
___
$code.=<<___ if ($win64);
mov 8*15(%rsp),%rdi
@@ -280,8 +297,11 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
lea 8*17(%rsp),%rsp
+.cfi_adjust_cfa_offset -8*17
+.Lepilogue_mul_2x2:
ret
.Lend_mul_2x2:
+.cfi_endproc
.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
.asciz "GF(2^m) Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
.align 16
@@ -312,13 +332,19 @@ se_handler:
pushfq
sub \$64,%rsp
- mov 152($context),%rax # pull context->Rsp
+ mov 120($context),%rax # pull context->Rax
mov 248($context),%rbx # pull context->Rip
lea .Lbody_mul_2x2(%rip),%r10
cmp %r10,%rbx # context->Rip<"prologue" label
jb .Lin_prologue
+ mov 152($context),%rax # pull context->Rsp
+
+ lea .Lepilogue_mul_2x2(%rip),%r10
+ cmp %r10,%rbx # context->Rip>="epilogue" label
+ jae .Lin_prologue
+
mov 8*10(%rax),%r14 # mimic epilogue
mov 8*11(%rax),%r13
mov 8*12(%rax),%r12
@@ -335,8 +361,9 @@ se_handler:
mov %r13,224($context) # restore context->R13
mov %r14,232($context) # restore context->R14
-.Lin_prologue:
lea 8*17(%rax),%rax
+
+.Lin_prologue:
mov %rax,152($context) # restore context->Rsp
mov 40($disp),%rdi # disp->ContextRecord
diff --git a/deps/openssl/openssl/crypto/bn/asm/x86_64-mont.pl b/deps/openssl/openssl/crypto/bn/asm/x86_64-mont.pl
index 8d2fb2cebb..c051135e30 100755
--- a/deps/openssl/openssl/crypto/bn/asm/x86_64-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/x86_64-mont.pl
@@ -104,8 +104,10 @@ $code=<<___;
.type bn_mul_mont,\@function,6
.align 16
bn_mul_mont:
+.cfi_startproc
mov ${num}d,${num}d
mov %rsp,%rax
+.cfi_def_cfa_register %rax
test \$3,${num}d
jnz .Lmul_enter
cmp \$8,${num}d
@@ -124,11 +126,17 @@ $code.=<<___;
.align 16
.Lmul_enter:
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
neg $num
mov %rsp,%r11
@@ -161,6 +169,7 @@ $code.=<<___;
.Lmul_page_walk_done:
mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp
+.cfi_cfa_expression %rsp+8,$num,8,mul,plus,deref,+8
.Lmul_body:
mov $bp,%r12 # reassign $bp
___
@@ -309,7 +318,7 @@ $code.=<<___;
mov %rax,($rp,$i,8) # rp[i]=tp[i]-np[i]
mov 8(%rsp,$i,8),%rax # tp[i+1]
lea 1($i),$i # i++
- dec $j # doesnn't affect CF!
+ dec $j # doesn't affect CF!
jnz .Lsub
sbb \$0,%rax # handle upmost overflow bit
@@ -331,16 +340,25 @@ $code.=<<___;
jnz .Lcopy
mov 8(%rsp,$num,8),%rsi # restore %rsp
+.cfi_def_cfa %rsi,8
mov \$1,%rax
mov -48(%rsi),%r15
+.cfi_restore %r15
mov -40(%rsi),%r14
+.cfi_restore %r14
mov -32(%rsi),%r13
+.cfi_restore %r13
mov -24(%rsi),%r12
+.cfi_restore %r12
mov -16(%rsi),%rbp
+.cfi_restore %rbp
mov -8(%rsi),%rbx
+.cfi_restore %rbx
lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lmul_epilogue:
ret
+.cfi_endproc
.size bn_mul_mont,.-bn_mul_mont
___
{{{
@@ -350,8 +368,10 @@ $code.=<<___;
.type bn_mul4x_mont,\@function,6
.align 16
bn_mul4x_mont:
+.cfi_startproc
mov ${num}d,${num}d
mov %rsp,%rax
+.cfi_def_cfa_register %rax
.Lmul4x_enter:
___
$code.=<<___ if ($addx);
@@ -361,11 +381,17 @@ $code.=<<___ if ($addx);
___
$code.=<<___;
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
neg $num
mov %rsp,%r11
@@ -389,6 +415,7 @@ $code.=<<___;
.Lmul4x_page_walk_done:
mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp
+.cfi_cfa_expression %rsp+8,$num,8,mul,plus,deref,+8
.Lmul4x_body:
mov $rp,16(%rsp,$num,8) # tp[num+2]=$rp
mov %rdx,%r12 # reassign $bp
@@ -721,7 +748,7 @@ $code.=<<___;
mov 56($ap,$i,8),@ri[3]
sbb 40($np,$i,8),@ri[1]
lea 4($i),$i # i++
- dec $j # doesnn't affect CF!
+ dec $j # doesn't affect CF!
jnz .Lsub4x
mov @ri[0],0($rp,$i,8) # rp[i]=tp[i]-np[i]
@@ -766,16 +793,25 @@ ___
}
$code.=<<___;
mov 8(%rsp,$num,8),%rsi # restore %rsp
+.cfi_def_cfa %rsi, 8
mov \$1,%rax
mov -48(%rsi),%r15
+.cfi_restore %r15
mov -40(%rsi),%r14
+.cfi_restore %r14
mov -32(%rsi),%r13
+.cfi_restore %r13
mov -24(%rsi),%r12
+.cfi_restore %r12
mov -16(%rsi),%rbp
+.cfi_restore %rbp
mov -8(%rsi),%rbx
+.cfi_restore %rbx
lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lmul4x_epilogue:
ret
+.cfi_endproc
.size bn_mul4x_mont,.-bn_mul4x_mont
___
}}}
@@ -803,14 +839,22 @@ $code.=<<___;
.type bn_sqr8x_mont,\@function,6
.align 32
bn_sqr8x_mont:
+.cfi_startproc
mov %rsp,%rax
+.cfi_def_cfa_register %rax
.Lsqr8x_enter:
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
.Lsqr8x_prologue:
mov ${num}d,%r10d
@@ -866,6 +910,7 @@ bn_sqr8x_mont:
mov $n0, 32(%rsp)
mov %rax, 40(%rsp) # save original %rsp
+.cfi_cfa_expression %rsp+40,deref,+8
.Lsqr8x_body:
movq $nptr, %xmm2 # save pointer to modulus
@@ -935,6 +980,7 @@ $code.=<<___;
pxor %xmm0,%xmm0
pshufd \$0,%xmm1,%xmm1
mov 40(%rsp),%rsi # restore %rsp
+.cfi_def_cfa %rsi,8
jmp .Lsqr8x_cond_copy
.align 32
@@ -964,14 +1010,22 @@ $code.=<<___;
mov \$1,%rax
mov -48(%rsi),%r15
+.cfi_restore %r15
mov -40(%rsi),%r14
+.cfi_restore %r14
mov -32(%rsi),%r13
+.cfi_restore %r13
mov -24(%rsi),%r12
+.cfi_restore %r12
mov -16(%rsi),%rbp
+.cfi_restore %rbp
mov -8(%rsi),%rbx
+.cfi_restore %rbx
lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lsqr8x_epilogue:
ret
+.cfi_endproc
.size bn_sqr8x_mont,.-bn_sqr8x_mont
___
}}}
@@ -983,14 +1037,22 @@ $code.=<<___;
.type bn_mulx4x_mont,\@function,6
.align 32
bn_mulx4x_mont:
+.cfi_startproc
mov %rsp,%rax
+.cfi_def_cfa_register %rax
.Lmulx4x_enter:
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
.Lmulx4x_prologue:
shl \$3,${num}d # convert $num to bytes
@@ -1036,6 +1098,7 @@ bn_mulx4x_mont:
mov $n0, 24(%rsp) # save *n0
mov $rp, 32(%rsp) # save $rp
mov %rax,40(%rsp) # save original %rsp
+.cfi_cfa_expression %rsp+40,deref,+8
mov $num,48(%rsp) # inner counter
jmp .Lmulx4x_body
@@ -1285,6 +1348,7 @@ $code.=<<___;
pxor %xmm0,%xmm0
pshufd \$0,%xmm1,%xmm1
mov 40(%rsp),%rsi # restore %rsp
+.cfi_def_cfa %rsi,8
jmp .Lmulx4x_cond_copy
.align 32
@@ -1314,14 +1378,22 @@ $code.=<<___;
mov \$1,%rax
mov -48(%rsi),%r15
+.cfi_restore %r15
mov -40(%rsi),%r14
+.cfi_restore %r14
mov -32(%rsi),%r13
+.cfi_restore %r13
mov -24(%rsi),%r12
+.cfi_restore %r12
mov -16(%rsi),%rbp
+.cfi_restore %rbp
mov -8(%rsi),%rbx
+.cfi_restore %rbx
lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lmulx4x_epilogue:
ret
+.cfi_endproc
.size bn_mulx4x_mont,.-bn_mulx4x_mont
___
}}}
@@ -1400,12 +1472,12 @@ sqr_handler:
mov 0(%r11),%r10d # HandlerData[0]
lea (%rsi,%r10),%r10 # end of prologue label
- cmp %r10,%rbx # context->Rip<.Lsqr_body
+ cmp %r10,%rbx # context->Rip<.Lsqr_prologue
jb .Lcommon_seh_tail
mov 4(%r11),%r10d # HandlerData[1]
lea (%rsi,%r10),%r10 # body label
- cmp %r10,%rbx # context->Rip>=.Lsqr_epilogue
+ cmp %r10,%rbx # context->Rip<.Lsqr_body
jb .Lcommon_pop_regs
mov 152($context),%rax # pull context->Rsp
diff --git a/deps/openssl/openssl/crypto/bn/asm/x86_64-mont5.pl b/deps/openssl/openssl/crypto/bn/asm/x86_64-mont5.pl
index 97d8eee700..ad6e8ada3c 100755
--- a/deps/openssl/openssl/crypto/bn/asm/x86_64-mont5.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/x86_64-mont5.pl
@@ -93,8 +93,10 @@ $code=<<___;
.type bn_mul_mont_gather5,\@function,6
.align 64
bn_mul_mont_gather5:
+.cfi_startproc
mov ${num}d,${num}d
mov %rsp,%rax
+.cfi_def_cfa_register %rax
test \$7,${num}d
jnz .Lmul_enter
___
@@ -108,11 +110,17 @@ $code.=<<___;
.Lmul_enter:
movd `($win64?56:8)`(%rsp),%xmm5 # load 7th argument
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
neg $num
mov %rsp,%r11
@@ -145,6 +153,7 @@ $code.=<<___;
lea .Linc(%rip),%r10
mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp
+.cfi_cfa_expression %rsp+8,$num,8,mul,plus,deref,+8
.Lmul_body:
lea 128($bp),%r12 # reassign $bp (+size optimization)
@@ -410,7 +419,7 @@ $code.=<<___;
mov %rax,($rp,$i,8) # rp[i]=tp[i]-np[i]
mov 8($ap,$i,8),%rax # tp[i+1]
lea 1($i),$i # i++
- dec $j # doesnn't affect CF!
+ dec $j # doesn't affect CF!
jnz .Lsub
sbb \$0,%rax # handle upmost overflow bit
@@ -432,17 +441,26 @@ $code.=<<___;
jnz .Lcopy
mov 8(%rsp,$num,8),%rsi # restore %rsp
+.cfi_def_cfa %rsi,8
mov \$1,%rax
mov -48(%rsi),%r15
+.cfi_restore %r15
mov -40(%rsi),%r14
+.cfi_restore %r14
mov -32(%rsi),%r13
+.cfi_restore %r13
mov -24(%rsi),%r12
+.cfi_restore %r12
mov -16(%rsi),%rbp
+.cfi_restore %rbp
mov -8(%rsi),%rbx
+.cfi_restore %rbx
lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lmul_epilogue:
ret
+.cfi_endproc
.size bn_mul_mont_gather5,.-bn_mul_mont_gather5
___
{{{
@@ -452,8 +470,10 @@ $code.=<<___;
.type bn_mul4x_mont_gather5,\@function,6
.align 32
bn_mul4x_mont_gather5:
+.cfi_startproc
.byte 0x67
mov %rsp,%rax
+.cfi_def_cfa_register %rax
.Lmul4x_enter:
___
$code.=<<___ if ($addx);
@@ -463,11 +483,17 @@ $code.=<<___ if ($addx);
___
$code.=<<___;
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
.Lmul4x_prologue:
.byte 0x67
@@ -523,22 +549,32 @@ $code.=<<___;
neg $num
mov %rax,40(%rsp)
+.cfi_cfa_expression %rsp+40,deref,+8
.Lmul4x_body:
call mul4x_internal
mov 40(%rsp),%rsi # restore %rsp
+.cfi_def_cfa %rsi,8
mov \$1,%rax
mov -48(%rsi),%r15
+.cfi_restore %r15
mov -40(%rsi),%r14
+.cfi_restore %r14
mov -32(%rsi),%r13
+.cfi_restore %r13
mov -24(%rsi),%r12
+.cfi_restore %r12
mov -16(%rsi),%rbp
+.cfi_restore %rbp
mov -8(%rsi),%rbx
+.cfi_restore %rbx
lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lmul4x_epilogue:
ret
+.cfi_endproc
.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
.type mul4x_internal,\@abi-omnipotent
@@ -1050,7 +1086,7 @@ my $bptr="%rdx"; # const void *table,
my $nptr="%rcx"; # const BN_ULONG *nptr,
my $n0 ="%r8"; # const BN_ULONG *n0);
my $num ="%r9"; # int num, has to be divisible by 8
- # int pwr
+ # int pwr
my ($i,$j,$tptr)=("%rbp","%rcx",$rptr);
my @A0=("%r10","%r11");
@@ -1062,7 +1098,9 @@ $code.=<<___;
.type bn_power5,\@function,6
.align 32
bn_power5:
+.cfi_startproc
mov %rsp,%rax
+.cfi_def_cfa_register %rax
___
$code.=<<___ if ($addx);
mov OPENSSL_ia32cap_P+8(%rip),%r11d
@@ -1072,11 +1110,17 @@ $code.=<<___ if ($addx);
___
$code.=<<___;
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
.Lpower5_prologue:
shl \$3,${num}d # convert $num to bytes
@@ -1127,7 +1171,7 @@ $code.=<<___;
ja .Lpwr_page_walk
.Lpwr_page_walk_done:
- mov $num,%r10
+ mov $num,%r10
neg $num
##############################################################
@@ -1141,6 +1185,7 @@ $code.=<<___;
#
mov $n0, 32(%rsp)
mov %rax, 40(%rsp) # save original %rsp
+.cfi_cfa_expression %rsp+40,deref,+8
.Lpower5_body:
movq $rptr,%xmm1 # save $rptr, used in sqr8x
movq $nptr,%xmm2 # save $nptr
@@ -1167,16 +1212,25 @@ $code.=<<___;
call mul4x_internal
mov 40(%rsp),%rsi # restore %rsp
+.cfi_def_cfa %rsi,8
mov \$1,%rax
mov -48(%rsi),%r15
+.cfi_restore %r15
mov -40(%rsi),%r14
+.cfi_restore %r14
mov -32(%rsi),%r13
+.cfi_restore %r13
mov -24(%rsi),%r12
+.cfi_restore %r12
mov -16(%rsi),%rbp
+.cfi_restore %rbp
mov -8(%rsi),%rbx
+.cfi_restore %rbx
lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lpower5_epilogue:
ret
+.cfi_endproc
.size bn_power5,.-bn_power5
.globl bn_sqr8x_internal
@@ -2036,7 +2090,7 @@ __bn_post4x_internal:
jnz .Lsqr4x_sub
mov $num,%r10 # prepare for back-to-back call
- neg $num # restore $num
+ neg $num # restore $num
ret
.size __bn_post4x_internal,.-__bn_post4x_internal
___
@@ -2056,14 +2110,22 @@ bn_from_montgomery:
.type bn_from_mont8x,\@function,6
.align 32
bn_from_mont8x:
+.cfi_startproc
.byte 0x67
mov %rsp,%rax
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
.Lfrom_prologue:
shl \$3,${num}d # convert $num to bytes
@@ -2128,6 +2190,7 @@ bn_from_mont8x:
#
mov $n0, 32(%rsp)
mov %rax, 40(%rsp) # save original %rsp
+.cfi_cfa_expression %rsp+40,deref,+8
.Lfrom_body:
mov $num,%r11
lea 48(%rsp),%rax
@@ -2171,7 +2234,6 @@ $code.=<<___ if ($addx);
pxor %xmm0,%xmm0
lea 48(%rsp),%rax
- mov 40(%rsp),%rsi # restore %rsp
jmp .Lfrom_mont_zero
.align 32
@@ -2183,11 +2245,12 @@ $code.=<<___;
pxor %xmm0,%xmm0
lea 48(%rsp),%rax
- mov 40(%rsp),%rsi # restore %rsp
jmp .Lfrom_mont_zero
.align 32
.Lfrom_mont_zero:
+ mov 40(%rsp),%rsi # restore %rsp
+.cfi_def_cfa %rsi,8
movdqa %xmm0,16*0(%rax)
movdqa %xmm0,16*1(%rax)
movdqa %xmm0,16*2(%rax)
@@ -2198,14 +2261,22 @@ $code.=<<___;
mov \$1,%rax
mov -48(%rsi),%r15
+.cfi_restore %r15
mov -40(%rsi),%r14
+.cfi_restore %r14
mov -32(%rsi),%r13
+.cfi_restore %r13
mov -24(%rsi),%r12
+.cfi_restore %r12
mov -16(%rsi),%rbp
+.cfi_restore %rbp
mov -8(%rsi),%rbx
+.cfi_restore %rbx
lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lfrom_epilogue:
ret
+.cfi_endproc
.size bn_from_mont8x,.-bn_from_mont8x
___
}
@@ -2218,14 +2289,22 @@ $code.=<<___;
.type bn_mulx4x_mont_gather5,\@function,6
.align 32
bn_mulx4x_mont_gather5:
+.cfi_startproc
mov %rsp,%rax
+.cfi_def_cfa_register %rax
.Lmulx4x_enter:
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
.Lmulx4x_prologue:
shl \$3,${num}d # convert $num to bytes
@@ -2259,7 +2338,7 @@ bn_mulx4x_mont_gather5:
mov \$0,%r10
cmovc %r10,%r11
sub %r11,%rbp
-.Lmulx4xsp_done:
+.Lmulx4xsp_done:
and \$-64,%rbp # ensure alignment
mov %rsp,%r11
sub %rbp,%r11
@@ -2291,21 +2370,31 @@ bn_mulx4x_mont_gather5:
#
mov $n0, 32(%rsp) # save *n0
mov %rax,40(%rsp) # save original %rsp
+.cfi_cfa_expression %rsp+40,deref,+8
.Lmulx4x_body:
call mulx4x_internal
mov 40(%rsp),%rsi # restore %rsp
+.cfi_def_cfa %rsi,8
mov \$1,%rax
mov -48(%rsi),%r15
+.cfi_restore %r15
mov -40(%rsi),%r14
+.cfi_restore %r14
mov -32(%rsi),%r13
+.cfi_restore %r13
mov -24(%rsi),%r12
+.cfi_restore %r12
mov -16(%rsi),%rbp
+.cfi_restore %rbp
mov -8(%rsi),%rbx
+.cfi_restore %rbx
lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lmulx4x_epilogue:
ret
+.cfi_endproc
.size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5
.type mulx4x_internal,\@abi-omnipotent
@@ -2333,7 +2422,7 @@ my $N=$STRIDE/4; # should match cache line size
$code.=<<___;
movdqa 0(%rax),%xmm0 # 00000001000000010000000000000000
movdqa 16(%rax),%xmm1 # 00000002000000020000000200000002
- lea 88-112(%rsp,%r10),%r10 # place the mask after tp[num+1] (+ICache optimizaton)
+ lea 88-112(%rsp,%r10),%r10 # place the mask after tp[num+1] (+ICache optimization)
lea 128($bp),$bptr # size optimization
pshufd \$0,%xmm5,%xmm5 # broadcast index
@@ -2683,14 +2772,22 @@ $code.=<<___;
.type bn_powerx5,\@function,6
.align 32
bn_powerx5:
+.cfi_startproc
mov %rsp,%rax
+.cfi_def_cfa_register %rax
.Lpowerx5_enter:
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
.Lpowerx5_prologue:
shl \$3,${num}d # convert $num to bytes
@@ -2741,7 +2838,7 @@ bn_powerx5:
ja .Lpwrx_page_walk
.Lpwrx_page_walk_done:
- mov $num,%r10
+ mov $num,%r10
neg $num
##############################################################
@@ -2762,6 +2859,7 @@ bn_powerx5:
movq $bptr,%xmm4
mov $n0, 32(%rsp)
mov %rax, 40(%rsp) # save original %rsp
+.cfi_cfa_expression %rsp+40,deref,+8
.Lpowerx5_body:
call __bn_sqrx8x_internal
@@ -2784,17 +2882,26 @@ bn_powerx5:
call mulx4x_internal
mov 40(%rsp),%rsi # restore %rsp
+.cfi_def_cfa %rsi,8
mov \$1,%rax
mov -48(%rsi),%r15
+.cfi_restore %r15
mov -40(%rsi),%r14
+.cfi_restore %r14
mov -32(%rsi),%r13
+.cfi_restore %r13
mov -24(%rsi),%r12
+.cfi_restore %r12
mov -16(%rsi),%rbp
+.cfi_restore %rbp
mov -8(%rsi),%rbx
+.cfi_restore %rbx
lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lpowerx5_epilogue:
ret
+.cfi_endproc
.size bn_powerx5,.-bn_powerx5
.globl bn_sqrx8x_internal
@@ -3678,8 +3785,8 @@ mul_handler:
jb .Lcommon_seh_tail
mov 4(%r11),%r10d # HandlerData[1]
- lea (%rsi,%r10),%r10 # epilogue label
- cmp %r10,%rbx # context->Rip>=epilogue label
+ lea (%rsi,%r10),%r10 # beginning of body label
+ cmp %r10,%rbx # context->Rip<body label
jb .Lcommon_pop_regs
mov 152($context),%rax # pull context->Rsp
diff --git a/deps/openssl/openssl/crypto/bn/bn_add.c b/deps/openssl/openssl/crypto/bn/bn_add.c
index 7cdefa77a1..f2736b8f6d 100644
--- a/deps/openssl/openssl/crypto/bn/bn_add.c
+++ b/deps/openssl/openssl/crypto/bn/bn_add.c
@@ -1,5 +1,5 @@
/*
- * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -10,51 +10,69 @@
#include "internal/cryptlib.h"
#include "bn_lcl.h"
-/* r can == a or b */
+/* signed add of b to a. */
int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
{
- int a_neg = a->neg, ret;
+ int ret, r_neg, cmp_res;
bn_check_top(a);
bn_check_top(b);
- /*-
- * a + b a+b
- * a + -b a-b
- * -a + b b-a
- * -a + -b -(a+b)
- */
- if (a_neg ^ b->neg) {
- /* only one is negative */
- if (a_neg) {
- const BIGNUM *tmp;
-
- tmp = a;
- a = b;
- b = tmp;
+ if (a->neg == b->neg) {
+ r_neg = a->neg;
+ ret = BN_uadd(r, a, b);
+ } else {
+ cmp_res = BN_ucmp(a, b);
+ if (cmp_res > 0) {
+ r_neg = a->neg;
+ ret = BN_usub(r, a, b);
+ } else if (cmp_res < 0) {
+ r_neg = b->neg;
+ ret = BN_usub(r, b, a);
+ } else {
+ r_neg = 0;
+ BN_zero(r);
+ ret = 1;
}
+ }
+
+ r->neg = r_neg;
+ bn_check_top(r);
+ return ret;
+}
+
+/* signed sub of b from a. */
+int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
+{
+ int ret, r_neg, cmp_res;
- /* we are now a - b */
+ bn_check_top(a);
+ bn_check_top(b);
- if (BN_ucmp(a, b) < 0) {
- if (!BN_usub(r, b, a))
- return 0;
- r->neg = 1;
+ if (a->neg != b->neg) {
+ r_neg = a->neg;
+ ret = BN_uadd(r, a, b);
+ } else {
+ cmp_res = BN_ucmp(a, b);
+ if (cmp_res > 0) {
+ r_neg = a->neg;
+ ret = BN_usub(r, a, b);
+ } else if (cmp_res < 0) {
+ r_neg = !b->neg;
+ ret = BN_usub(r, b, a);
} else {
- if (!BN_usub(r, a, b))
- return 0;
- r->neg = 0;
+ r_neg = 0;
+ BN_zero(r);
+ ret = 1;
}
- return 1;
}
- ret = BN_uadd(r, a, b);
- r->neg = a_neg;
+ r->neg = r_neg;
bn_check_top(r);
return ret;
}
-/* unsigned add of b to a */
+/* unsigned add of b to a, r can be equal to a or b. */
int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
{
int max, min, dif;
@@ -151,59 +169,3 @@ int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
return 1;
}
-int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
-{
- int max;
- int add = 0, neg = 0;
-
- bn_check_top(a);
- bn_check_top(b);
-
- /*-
- * a - b a-b
- * a - -b a+b
- * -a - b -(a+b)
- * -a - -b b-a
- */
- if (a->neg) {
- if (b->neg) {
- const BIGNUM *tmp;
-
- tmp = a;
- a = b;
- b = tmp;
- } else {
- add = 1;
- neg = 1;
- }
- } else {
- if (b->neg) {
- add = 1;
- neg = 0;
- }
- }
-
- if (add) {
- if (!BN_uadd(r, a, b))
- return 0;
- r->neg = neg;
- return 1;
- }
-
- /* We are actually doing a - b :-) */
-
- max = (a->top > b->top) ? a->top : b->top;
- if (bn_wexpand(r, max) == NULL)
- return 0;
- if (BN_ucmp(a, b) < 0) {
- if (!BN_usub(r, b, a))
- return 0;
- r->neg = 1;
- } else {
- if (!BN_usub(r, a, b))
- return 0;
- r->neg = 0;
- }
- bn_check_top(r);
- return 1;
-}
diff --git a/deps/openssl/openssl/crypto/bn/bn_asm.c b/deps/openssl/openssl/crypto/bn/bn_asm.c
index 39c6c2134b..729b2480ac 100644
--- a/deps/openssl/openssl/crypto/bn/bn_asm.c
+++ b/deps/openssl/openssl/crypto/bn/bn_asm.c
@@ -21,7 +21,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
assert(num >= 0);
if (num <= 0)
- return (c1);
+ return c1;
# ifndef OPENSSL_SMALL_FOOTPRINT
while (num & ~3) {
@@ -41,7 +41,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
num--;
}
- return (c1);
+ return c1;
}
BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
@@ -50,7 +50,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
assert(num >= 0);
if (num <= 0)
- return (c1);
+ return c1;
# ifndef OPENSSL_SMALL_FOOTPRINT
while (num & ~3) {
@@ -69,7 +69,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
rp++;
num--;
}
- return (c1);
+ return c1;
}
void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
@@ -108,7 +108,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
assert(num >= 0);
if (num <= 0)
- return ((BN_ULONG)0);
+ return (BN_ULONG)0;
bl = LBITS(w);
bh = HBITS(w);
@@ -130,7 +130,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
rp++;
num--;
}
- return (c);
+ return c;
}
BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
@@ -140,7 +140,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
assert(num >= 0);
if (num <= 0)
- return ((BN_ULONG)0);
+ return (BN_ULONG)0;
bl = LBITS(w);
bh = HBITS(w);
@@ -162,7 +162,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
rp++;
num--;
}
- return (carry);
+ return carry;
}
void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
@@ -210,7 +210,7 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
int i, count = 2;
if (d == 0)
- return (BN_MASK2);
+ return BN_MASK2;
i = BN_num_bits_word(d);
assert((i == BN_BITS2) || (h <= (BN_ULONG)1 << i));
@@ -264,7 +264,7 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
l = (l & BN_MASK2l) << BN_BITS4;
}
ret |= q;
- return (ret);
+ return ret;
}
#endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */
@@ -276,7 +276,7 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
assert(n >= 0);
if (n <= 0)
- return ((BN_ULONG)0);
+ return (BN_ULONG)0;
# ifndef OPENSSL_SMALL_FOOTPRINT
while (n & ~3) {
@@ -307,7 +307,7 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
r++;
n--;
}
- return ((BN_ULONG)ll);
+ return (BN_ULONG)ll;
}
#else /* !BN_LLONG */
BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
@@ -317,7 +317,7 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
assert(n >= 0);
if (n <= 0)
- return ((BN_ULONG)0);
+ return (BN_ULONG)0;
c = 0;
# ifndef OPENSSL_SMALL_FOOTPRINT
@@ -364,7 +364,7 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
r++;
n--;
}
- return ((BN_ULONG)c);
+ return (BN_ULONG)c;
}
#endif /* !BN_LLONG */
@@ -376,7 +376,7 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
assert(n >= 0);
if (n <= 0)
- return ((BN_ULONG)0);
+ return (BN_ULONG)0;
#ifndef OPENSSL_SMALL_FOOTPRINT
while (n & ~3) {
@@ -417,7 +417,7 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
r++;
n--;
}
- return (c);
+ return c;
}
#if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT)
diff --git a/deps/openssl/openssl/crypto/bn/bn_blind.c b/deps/openssl/openssl/crypto/bn/bn_blind.c
index 9474e21e4c..450cdfb348 100644
--- a/deps/openssl/openssl/crypto/bn/bn_blind.c
+++ b/deps/openssl/openssl/crypto/bn/bn_blind.c
@@ -82,7 +82,6 @@ void BN_BLINDING_free(BN_BLINDING *r)
{
if (r == NULL)
return;
-
BN_free(r->A);
BN_free(r->Ai);
BN_free(r->e);
@@ -124,7 +123,7 @@ int BN_BLINDING_update(BN_BLINDING *b, BN_CTX *ctx)
err:
if (b->counter == BN_BLINDING_COUNTER)
b->counter = 0;
- return (ret);
+ return ret;
}
int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx)
@@ -140,14 +139,14 @@ int BN_BLINDING_convert_ex(BIGNUM *n, BIGNUM *r, BN_BLINDING *b, BN_CTX *ctx)
if ((b->A == NULL) || (b->Ai == NULL)) {
BNerr(BN_F_BN_BLINDING_CONVERT_EX, BN_R_NOT_INITIALIZED);
- return (0);
+ return 0;
}
if (b->counter == -1)
/* Fresh blinding, doesn't need updating. */
b->counter = 0;
else if (!BN_BLINDING_update(b, ctx))
- return (0);
+ return 0;
if (r != NULL && (BN_copy(r, b->Ai) == NULL))
return 0;
@@ -198,7 +197,7 @@ int BN_BLINDING_invert_ex(BIGNUM *n, const BIGNUM *r, BN_BLINDING *b,
}
bn_check_top(n);
- return (ret);
+ return ret;
}
int BN_BLINDING_is_current_thread(BN_BLINDING *b)
@@ -271,7 +270,7 @@ BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b,
do {
int rv;
- if (!BN_rand_range(ret->A, ret->mod))
+ if (!BN_priv_rand_range(ret->A, ret->mod))
goto err;
if (int_bn_mod_inverse(ret->Ai, ret->A, ret->mod, ctx, &rv))
break;
diff --git a/deps/openssl/openssl/crypto/bn/bn_ctx.c b/deps/openssl/openssl/crypto/bn/bn_ctx.c
index 68c0468743..aa08b31a34 100644
--- a/deps/openssl/openssl/crypto/bn/bn_ctx.c
+++ b/deps/openssl/openssl/crypto/bn/bn_ctx.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2000-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2000-2018 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -255,9 +255,12 @@ static int BN_STACK_push(BN_STACK *st, unsigned int idx)
/* Need to expand */
unsigned int newsize =
st->size ? (st->size * 3 / 2) : BN_CTX_START_FRAMES;
- unsigned int *newitems = OPENSSL_malloc(sizeof(*newitems) * newsize);
- if (newitems == NULL)
+ unsigned int *newitems;
+
+ if ((newitems = OPENSSL_malloc(sizeof(*newitems) * newsize)) == NULL) {
+ BNerr(BN_F_BN_STACK_PUSH, ERR_R_MALLOC_FAILURE);
return 0;
+ }
if (st->depth)
memcpy(newitems, st->indexes, sizeof(*newitems) * st->depth);
OPENSSL_free(st->indexes);
@@ -306,9 +309,12 @@ static BIGNUM *BN_POOL_get(BN_POOL *p, int flag)
/* Full; allocate a new pool item and link it in. */
if (p->used == p->size) {
- BN_POOL_ITEM *item = OPENSSL_malloc(sizeof(*item));
- if (item == NULL)
+ BN_POOL_ITEM *item;
+
+ if ((item = OPENSSL_malloc(sizeof(*item))) == NULL) {
+ BNerr(BN_F_BN_POOL_GET, ERR_R_MALLOC_FAILURE);
return NULL;
+ }
for (loop = 0, bn = item->vals; loop++ < BN_CTX_POOL_SIZE; bn++) {
bn_init(bn);
if ((flag & BN_FLG_SECURE) != 0)
diff --git a/deps/openssl/openssl/crypto/bn/bn_dh.c b/deps/openssl/openssl/crypto/bn/bn_dh.c
index 17d05597b3..38acdee234 100644
--- a/deps/openssl/openssl/crypto/bn/bn_dh.c
+++ b/deps/openssl/openssl/crypto/bn/bn_dh.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2014-2017 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -8,7 +8,7 @@
*/
#include "bn_lcl.h"
-#include "e_os.h"
+#include "internal/nelem.h"
#ifndef OPENSSL_NO_DH
#include <openssl/dh.h>
@@ -104,6 +104,146 @@ static const BN_ULONG dh2048_256_q[] = {
0x8CF83642A709A097ULL
};
+/* Primes from RFC 7919 */
+static const BN_ULONG ffdhe2048_p[] = {
+ 0xFFFFFFFFFFFFFFFFULL, 0x886B423861285C97ULL, 0xC6F34A26C1B2EFFAULL,
+ 0xC58EF1837D1683B2ULL, 0x3BB5FCBC2EC22005ULL, 0xC3FE3B1B4C6FAD73ULL,
+ 0x8E4F1232EEF28183ULL, 0x9172FE9CE98583FFULL, 0xC03404CD28342F61ULL,
+ 0x9E02FCE1CDF7E2ECULL, 0x0B07A7C8EE0A6D70ULL, 0xAE56EDE76372BB19ULL,
+ 0x1D4F42A3DE394DF4ULL, 0xB96ADAB760D7F468ULL, 0xD108A94BB2C8E3FBULL,
+ 0xBC0AB182B324FB61ULL, 0x30ACCA4F483A797AULL, 0x1DF158A136ADE735ULL,
+ 0xE2A689DAF3EFE872ULL, 0x984F0C70E0E68B77ULL, 0xB557135E7F57C935ULL,
+ 0x856365553DED1AF3ULL, 0x2433F51F5F066ED0ULL, 0xD3DF1ED5D5FD6561ULL,
+ 0xF681B202AEC4617AULL, 0x7D2FE363630C75D8ULL, 0xCC939DCE249B3EF9ULL,
+ 0xA9E13641146433FBULL, 0xD8B9C583CE2D3695ULL, 0xAFDC5620273D3CF1ULL,
+ 0xADF85458A2BB4A9AULL, 0xFFFFFFFFFFFFFFFFULL
+};
+
+static const BN_ULONG ffdhe3072_p[] = {
+ 0xFFFFFFFFFFFFFFFFULL, 0x25E41D2B66C62E37ULL, 0x3C1B20EE3FD59D7CULL,
+ 0x0ABCD06BFA53DDEFULL, 0x1DBF9A42D5C4484EULL, 0xABC521979B0DEADAULL,
+ 0xE86D2BC522363A0DULL, 0x5CAE82AB9C9DF69EULL, 0x64F2E21E71F54BFFULL,
+ 0xF4FD4452E2D74DD3ULL, 0xB4130C93BC437944ULL, 0xAEFE130985139270ULL,
+ 0x598CB0FAC186D91CULL, 0x7AD91D2691F7F7EEULL, 0x61B46FC9D6E6C907ULL,
+ 0xBC34F4DEF99C0238ULL, 0xDE355B3B6519035BULL, 0x886B4238611FCFDCULL,
+ 0xC6F34A26C1B2EFFAULL, 0xC58EF1837D1683B2ULL, 0x3BB5FCBC2EC22005ULL,
+ 0xC3FE3B1B4C6FAD73ULL, 0x8E4F1232EEF28183ULL, 0x9172FE9CE98583FFULL,
+ 0xC03404CD28342F61ULL, 0x9E02FCE1CDF7E2ECULL, 0x0B07A7C8EE0A6D70ULL,
+ 0xAE56EDE76372BB19ULL, 0x1D4F42A3DE394DF4ULL, 0xB96ADAB760D7F468ULL,
+ 0xD108A94BB2C8E3FBULL, 0xBC0AB182B324FB61ULL, 0x30ACCA4F483A797AULL,
+ 0x1DF158A136ADE735ULL, 0xE2A689DAF3EFE872ULL, 0x984F0C70E0E68B77ULL,
+ 0xB557135E7F57C935ULL, 0x856365553DED1AF3ULL, 0x2433F51F5F066ED0ULL,
+ 0xD3DF1ED5D5FD6561ULL, 0xF681B202AEC4617AULL, 0x7D2FE363630C75D8ULL,
+ 0xCC939DCE249B3EF9ULL, 0xA9E13641146433FBULL, 0xD8B9C583CE2D3695ULL,
+ 0xAFDC5620273D3CF1ULL, 0xADF85458A2BB4A9AULL, 0xFFFFFFFFFFFFFFFFULL
+};
+
+static const BN_ULONG ffdhe4096_p[] = {
+ 0xFFFFFFFFFFFFFFFFULL, 0xC68A007E5E655F6AULL, 0x4DB5A851F44182E1ULL,
+ 0x8EC9B55A7F88A46BULL, 0x0A8291CDCEC97DCFULL, 0x2A4ECEA9F98D0ACCULL,
+ 0x1A1DB93D7140003CULL, 0x092999A333CB8B7AULL, 0x6DC778F971AD0038ULL,
+ 0xA907600A918130C4ULL, 0xED6A1E012D9E6832ULL, 0x7135C886EFB4318AULL,
+ 0x87F55BA57E31CC7AULL, 0x7763CF1D55034004ULL, 0xAC7D5F42D69F6D18ULL,
+ 0x7930E9E4E58857B6ULL, 0x6E6F52C3164DF4FBULL, 0x25E41D2B669E1EF1ULL,
+ 0x3C1B20EE3FD59D7CULL, 0x0ABCD06BFA53DDEFULL, 0x1DBF9A42D5C4484EULL,
+ 0xABC521979B0DEADAULL, 0xE86D2BC522363A0DULL, 0x5CAE82AB9C9DF69EULL,
+ 0x64F2E21E71F54BFFULL, 0xF4FD4452E2D74DD3ULL, 0xB4130C93BC437944ULL,
+ 0xAEFE130985139270ULL, 0x598CB0FAC186D91CULL, 0x7AD91D2691F7F7EEULL,
+ 0x61B46FC9D6E6C907ULL, 0xBC34F4DEF99C0238ULL, 0xDE355B3B6519035BULL,
+ 0x886B4238611FCFDCULL, 0xC6F34A26C1B2EFFAULL, 0xC58EF1837D1683B2ULL,
+ 0x3BB5FCBC2EC22005ULL, 0xC3FE3B1B4C6FAD73ULL, 0x8E4F1232EEF28183ULL,
+ 0x9172FE9CE98583FFULL, 0xC03404CD28342F61ULL, 0x9E02FCE1CDF7E2ECULL,
+ 0x0B07A7C8EE0A6D70ULL, 0xAE56EDE76372BB19ULL, 0x1D4F42A3DE394DF4ULL,
+ 0xB96ADAB760D7F468ULL, 0xD108A94BB2C8E3FBULL, 0xBC0AB182B324FB61ULL,
+ 0x30ACCA4F483A797AULL, 0x1DF158A136ADE735ULL, 0xE2A689DAF3EFE872ULL,
+ 0x984F0C70E0E68B77ULL, 0xB557135E7F57C935ULL, 0x856365553DED1AF3ULL,
+ 0x2433F51F5F066ED0ULL, 0xD3DF1ED5D5FD6561ULL, 0xF681B202AEC4617AULL,
+ 0x7D2FE363630C75D8ULL, 0xCC939DCE249B3EF9ULL, 0xA9E13641146433FBULL,
+ 0xD8B9C583CE2D3695ULL, 0xAFDC5620273D3CF1ULL, 0xADF85458A2BB4A9AULL,
+ 0xFFFFFFFFFFFFFFFFULL
+};
+
+static const BN_ULONG ffdhe6144_p[] = {
+ 0xFFFFFFFFFFFFFFFFULL, 0xA40E329CD0E40E65ULL, 0xA41D570D7938DAD4ULL,
+ 0x62A69526D43161C1ULL, 0x3FDD4A8E9ADB1E69ULL, 0x5B3B71F9DC6B80D6ULL,
+ 0xEC9D1810C6272B04ULL, 0x8CCF2DD5CACEF403ULL, 0xE49F5235C95B9117ULL,
+ 0x505DC82DB854338AULL, 0x62292C311562A846ULL, 0xD72B03746AE77F5EULL,
+ 0xF9C9091B462D538CULL, 0x0AE8DB5847A67CBEULL, 0xB3A739C122611682ULL,
+ 0xEEAAC0232A281BF6ULL, 0x94C6651E77CAF992ULL, 0x763E4E4B94B2BBC1ULL,
+ 0x587E38DA0077D9B4ULL, 0x7FB29F8C183023C3ULL, 0x0ABEC1FFF9E3A26EULL,
+ 0xA00EF092350511E3ULL, 0xB855322EDB6340D8ULL, 0xA52471F7A9A96910ULL,
+ 0x388147FB4CFDB477ULL, 0x9B1F5C3E4E46041FULL, 0xCDAD0657FCCFEC71ULL,
+ 0xB38E8C334C701C3AULL, 0x917BDD64B1C0FD4CULL, 0x3BB454329B7624C8ULL,
+ 0x23BA4442CAF53EA6ULL, 0x4E677D2C38532A3AULL, 0x0BFD64B645036C7AULL,
+ 0xC68A007E5E0DD902ULL, 0x4DB5A851F44182E1ULL, 0x8EC9B55A7F88A46BULL,
+ 0x0A8291CDCEC97DCFULL, 0x2A4ECEA9F98D0ACCULL, 0x1A1DB93D7140003CULL,
+ 0x092999A333CB8B7AULL, 0x6DC778F971AD0038ULL, 0xA907600A918130C4ULL,
+ 0xED6A1E012D9E6832ULL, 0x7135C886EFB4318AULL, 0x87F55BA57E31CC7AULL,
+ 0x7763CF1D55034004ULL, 0xAC7D5F42D69F6D18ULL, 0x7930E9E4E58857B6ULL,
+ 0x6E6F52C3164DF4FBULL, 0x25E41D2B669E1EF1ULL, 0x3C1B20EE3FD59D7CULL,
+ 0x0ABCD06BFA53DDEFULL, 0x1DBF9A42D5C4484EULL, 0xABC521979B0DEADAULL,
+ 0xE86D2BC522363A0DULL, 0x5CAE82AB9C9DF69EULL, 0x64F2E21E71F54BFFULL,
+ 0xF4FD4452E2D74DD3ULL, 0xB4130C93BC437944ULL, 0xAEFE130985139270ULL,
+ 0x598CB0FAC186D91CULL, 0x7AD91D2691F7F7EEULL, 0x61B46FC9D6E6C907ULL,
+ 0xBC34F4DEF99C0238ULL, 0xDE355B3B6519035BULL, 0x886B4238611FCFDCULL,
+ 0xC6F34A26C1B2EFFAULL, 0xC58EF1837D1683B2ULL, 0x3BB5FCBC2EC22005ULL,
+ 0xC3FE3B1B4C6FAD73ULL, 0x8E4F1232EEF28183ULL, 0x9172FE9CE98583FFULL,
+ 0xC03404CD28342F61ULL, 0x9E02FCE1CDF7E2ECULL, 0x0B07A7C8EE0A6D70ULL,
+ 0xAE56EDE76372BB19ULL, 0x1D4F42A3DE394DF4ULL, 0xB96ADAB760D7F468ULL,
+ 0xD108A94BB2C8E3FBULL, 0xBC0AB182B324FB61ULL, 0x30ACCA4F483A797AULL,
+ 0x1DF158A136ADE735ULL, 0xE2A689DAF3EFE872ULL, 0x984F0C70E0E68B77ULL,
+ 0xB557135E7F57C935ULL, 0x856365553DED1AF3ULL, 0x2433F51F5F066ED0ULL,
+ 0xD3DF1ED5D5FD6561ULL, 0xF681B202AEC4617AULL, 0x7D2FE363630C75D8ULL,
+ 0xCC939DCE249B3EF9ULL, 0xA9E13641146433FBULL, 0xD8B9C583CE2D3695ULL,
+ 0xAFDC5620273D3CF1ULL, 0xADF85458A2BB4A9AULL, 0xFFFFFFFFFFFFFFFFULL
+};
+
+static const BN_ULONG ffdhe8192_p[] = {
+ 0xFFFFFFFFFFFFFFFFULL, 0xD68C8BB7C5C6424CULL, 0x011E2A94838FF88CULL,
+ 0x0822E506A9F4614EULL, 0x97D11D49F7A8443DULL, 0xA6BBFDE530677F0DULL,
+ 0x2F741EF8C1FE86FEULL, 0xFAFABE1C5D71A87EULL, 0xDED2FBABFBE58A30ULL,
+ 0xB6855DFE72B0A66EULL, 0x1EFC8CE0BA8A4FE8ULL, 0x83F81D4A3F2FA457ULL,
+ 0xA1FE3075A577E231ULL, 0xD5B8019488D9C0A0ULL, 0x624816CDAD9A95F9ULL,
+ 0x99E9E31650C1217BULL, 0x51AA691E0E423CFCULL, 0x1C217E6C3826E52CULL,
+ 0x51A8A93109703FEEULL, 0xBB7099876A460E74ULL, 0x541FC68C9C86B022ULL,
+ 0x59160CC046FD8251ULL, 0x2846C0BA35C35F5CULL, 0x54504AC78B758282ULL,
+ 0x29388839D2AF05E4ULL, 0xCB2C0F1CC01BD702ULL, 0x555B2F747C932665ULL,
+ 0x86B63142A3AB8829ULL, 0x0B8CC3BDF64B10EFULL, 0x687FEB69EDD1CC5EULL,
+ 0xFDB23FCEC9509D43ULL, 0x1E425A31D951AE64ULL, 0x36AD004CF600C838ULL,
+ 0xA40E329CCFF46AAAULL, 0xA41D570D7938DAD4ULL, 0x62A69526D43161C1ULL,
+ 0x3FDD4A8E9ADB1E69ULL, 0x5B3B71F9DC6B80D6ULL, 0xEC9D1810C6272B04ULL,
+ 0x8CCF2DD5CACEF403ULL, 0xE49F5235C95B9117ULL, 0x505DC82DB854338AULL,
+ 0x62292C311562A846ULL, 0xD72B03746AE77F5EULL, 0xF9C9091B462D538CULL,
+ 0x0AE8DB5847A67CBEULL, 0xB3A739C122611682ULL, 0xEEAAC0232A281BF6ULL,
+ 0x94C6651E77CAF992ULL, 0x763E4E4B94B2BBC1ULL, 0x587E38DA0077D9B4ULL,
+ 0x7FB29F8C183023C3ULL, 0x0ABEC1FFF9E3A26EULL, 0xA00EF092350511E3ULL,
+ 0xB855322EDB6340D8ULL, 0xA52471F7A9A96910ULL, 0x388147FB4CFDB477ULL,
+ 0x9B1F5C3E4E46041FULL, 0xCDAD0657FCCFEC71ULL, 0xB38E8C334C701C3AULL,
+ 0x917BDD64B1C0FD4CULL, 0x3BB454329B7624C8ULL, 0x23BA4442CAF53EA6ULL,
+ 0x4E677D2C38532A3AULL, 0x0BFD64B645036C7AULL, 0xC68A007E5E0DD902ULL,
+ 0x4DB5A851F44182E1ULL, 0x8EC9B55A7F88A46BULL, 0x0A8291CDCEC97DCFULL,
+ 0x2A4ECEA9F98D0ACCULL, 0x1A1DB93D7140003CULL, 0x092999A333CB8B7AULL,
+ 0x6DC778F971AD0038ULL, 0xA907600A918130C4ULL, 0xED6A1E012D9E6832ULL,
+ 0x7135C886EFB4318AULL, 0x87F55BA57E31CC7AULL, 0x7763CF1D55034004ULL,
+ 0xAC7D5F42D69F6D18ULL, 0x7930E9E4E58857B6ULL, 0x6E6F52C3164DF4FBULL,
+ 0x25E41D2B669E1EF1ULL, 0x3C1B20EE3FD59D7CULL, 0x0ABCD06BFA53DDEFULL,
+ 0x1DBF9A42D5C4484EULL, 0xABC521979B0DEADAULL, 0xE86D2BC522363A0DULL,
+ 0x5CAE82AB9C9DF69EULL, 0x64F2E21E71F54BFFULL, 0xF4FD4452E2D74DD3ULL,
+ 0xB4130C93BC437944ULL, 0xAEFE130985139270ULL, 0x598CB0FAC186D91CULL,
+ 0x7AD91D2691F7F7EEULL, 0x61B46FC9D6E6C907ULL, 0xBC34F4DEF99C0238ULL,
+ 0xDE355B3B6519035BULL, 0x886B4238611FCFDCULL, 0xC6F34A26C1B2EFFAULL,
+ 0xC58EF1837D1683B2ULL, 0x3BB5FCBC2EC22005ULL, 0xC3FE3B1B4C6FAD73ULL,
+ 0x8E4F1232EEF28183ULL, 0x9172FE9CE98583FFULL, 0xC03404CD28342F61ULL,
+ 0x9E02FCE1CDF7E2ECULL, 0x0B07A7C8EE0A6D70ULL, 0xAE56EDE76372BB19ULL,
+ 0x1D4F42A3DE394DF4ULL, 0xB96ADAB760D7F468ULL, 0xD108A94BB2C8E3FBULL,
+ 0xBC0AB182B324FB61ULL, 0x30ACCA4F483A797AULL, 0x1DF158A136ADE735ULL,
+ 0xE2A689DAF3EFE872ULL, 0x984F0C70E0E68B77ULL, 0xB557135E7F57C935ULL,
+ 0x856365553DED1AF3ULL, 0x2433F51F5F066ED0ULL, 0xD3DF1ED5D5FD6561ULL,
+ 0xF681B202AEC4617AULL, 0x7D2FE363630C75D8ULL, 0xCC939DCE249B3EF9ULL,
+ 0xA9E13641146433FBULL, 0xD8B9C583CE2D3695ULL, 0xAFDC5620273D3CF1ULL,
+ 0xADF85458A2BB4A9AULL, 0xFFFFFFFFFFFFFFFFULL
+};
+
# elif BN_BITS2 == 32
static const BN_ULONG dh1024_160_p[] = {
@@ -194,6 +334,147 @@ static const BN_ULONG dh2048_256_q[] = {
0xA709A097, 0x8CF83642
};
+/* Primes from RFC 7919 */
+
+static const BN_ULONG ffdhe2048_p[] = {
+ 0xFFFFFFFF, 0xFFFFFFFF, 0x61285C97, 0x886B4238, 0xC1B2EFFA, 0xC6F34A26,
+ 0x7D1683B2, 0xC58EF183, 0x2EC22005, 0x3BB5FCBC, 0x4C6FAD73, 0xC3FE3B1B,
+ 0xEEF28183, 0x8E4F1232, 0xE98583FF, 0x9172FE9C, 0x28342F61, 0xC03404CD,
+ 0xCDF7E2EC, 0x9E02FCE1, 0xEE0A6D70, 0x0B07A7C8, 0x6372BB19, 0xAE56EDE7,
+ 0xDE394DF4, 0x1D4F42A3, 0x60D7F468, 0xB96ADAB7, 0xB2C8E3FB, 0xD108A94B,
+ 0xB324FB61, 0xBC0AB182, 0x483A797A, 0x30ACCA4F, 0x36ADE735, 0x1DF158A1,
+ 0xF3EFE872, 0xE2A689DA, 0xE0E68B77, 0x984F0C70, 0x7F57C935, 0xB557135E,
+ 0x3DED1AF3, 0x85636555, 0x5F066ED0, 0x2433F51F, 0xD5FD6561, 0xD3DF1ED5,
+ 0xAEC4617A, 0xF681B202, 0x630C75D8, 0x7D2FE363, 0x249B3EF9, 0xCC939DCE,
+ 0x146433FB, 0xA9E13641, 0xCE2D3695, 0xD8B9C583, 0x273D3CF1, 0xAFDC5620,
+ 0xA2BB4A9A, 0xADF85458, 0xFFFFFFFF, 0xFFFFFFFF
+};
+
+static const BN_ULONG ffdhe3072_p[] = {
+ 0xFFFFFFFF, 0xFFFFFFFF, 0x66C62E37, 0x25E41D2B, 0x3FD59D7C, 0x3C1B20EE,
+ 0xFA53DDEF, 0x0ABCD06B, 0xD5C4484E, 0x1DBF9A42, 0x9B0DEADA, 0xABC52197,
+ 0x22363A0D, 0xE86D2BC5, 0x9C9DF69E, 0x5CAE82AB, 0x71F54BFF, 0x64F2E21E,
+ 0xE2D74DD3, 0xF4FD4452, 0xBC437944, 0xB4130C93, 0x85139270, 0xAEFE1309,
+ 0xC186D91C, 0x598CB0FA, 0x91F7F7EE, 0x7AD91D26, 0xD6E6C907, 0x61B46FC9,
+ 0xF99C0238, 0xBC34F4DE, 0x6519035B, 0xDE355B3B, 0x611FCFDC, 0x886B4238,
+ 0xC1B2EFFA, 0xC6F34A26, 0x7D1683B2, 0xC58EF183, 0x2EC22005, 0x3BB5FCBC,
+ 0x4C6FAD73, 0xC3FE3B1B, 0xEEF28183, 0x8E4F1232, 0xE98583FF, 0x9172FE9C,
+ 0x28342F61, 0xC03404CD, 0xCDF7E2EC, 0x9E02FCE1, 0xEE0A6D70, 0x0B07A7C8,
+ 0x6372BB19, 0xAE56EDE7, 0xDE394DF4, 0x1D4F42A3, 0x60D7F468, 0xB96ADAB7,
+ 0xB2C8E3FB, 0xD108A94B, 0xB324FB61, 0xBC0AB182, 0x483A797A, 0x30ACCA4F,
+ 0x36ADE735, 0x1DF158A1, 0xF3EFE872, 0xE2A689DA, 0xE0E68B77, 0x984F0C70,
+ 0x7F57C935, 0xB557135E, 0x3DED1AF3, 0x85636555, 0x5F066ED0, 0x2433F51F,
+ 0xD5FD6561, 0xD3DF1ED5, 0xAEC4617A, 0xF681B202, 0x630C75D8, 0x7D2FE363,
+ 0x249B3EF9, 0xCC939DCE, 0x146433FB, 0xA9E13641, 0xCE2D3695, 0xD8B9C583,
+ 0x273D3CF1, 0xAFDC5620, 0xA2BB4A9A, 0xADF85458, 0xFFFFFFFF, 0xFFFFFFFF
+};
+
+static const BN_ULONG ffdhe4096_p[] = {
+ 0xFFFFFFFF, 0xFFFFFFFF, 0x5E655F6A, 0xC68A007E, 0xF44182E1, 0x4DB5A851,
+ 0x7F88A46B, 0x8EC9B55A, 0xCEC97DCF, 0x0A8291CD, 0xF98D0ACC, 0x2A4ECEA9,
+ 0x7140003C, 0x1A1DB93D, 0x33CB8B7A, 0x092999A3, 0x71AD0038, 0x6DC778F9,
+ 0x918130C4, 0xA907600A, 0x2D9E6832, 0xED6A1E01, 0xEFB4318A, 0x7135C886,
+ 0x7E31CC7A, 0x87F55BA5, 0x55034004, 0x7763CF1D, 0xD69F6D18, 0xAC7D5F42,
+ 0xE58857B6, 0x7930E9E4, 0x164DF4FB, 0x6E6F52C3, 0x669E1EF1, 0x25E41D2B,
+ 0x3FD59D7C, 0x3C1B20EE, 0xFA53DDEF, 0x0ABCD06B, 0xD5C4484E, 0x1DBF9A42,
+ 0x9B0DEADA, 0xABC52197, 0x22363A0D, 0xE86D2BC5, 0x9C9DF69E, 0x5CAE82AB,
+ 0x71F54BFF, 0x64F2E21E, 0xE2D74DD3, 0xF4FD4452, 0xBC437944, 0xB4130C93,
+ 0x85139270, 0xAEFE1309, 0xC186D91C, 0x598CB0FA, 0x91F7F7EE, 0x7AD91D26,
+ 0xD6E6C907, 0x61B46FC9, 0xF99C0238, 0xBC34F4DE, 0x6519035B, 0xDE355B3B,
+ 0x611FCFDC, 0x886B4238, 0xC1B2EFFA, 0xC6F34A26, 0x7D1683B2, 0xC58EF183,
+ 0x2EC22005, 0x3BB5FCBC, 0x4C6FAD73, 0xC3FE3B1B, 0xEEF28183, 0x8E4F1232,
+ 0xE98583FF, 0x9172FE9C, 0x28342F61, 0xC03404CD, 0xCDF7E2EC, 0x9E02FCE1,
+ 0xEE0A6D70, 0x0B07A7C8, 0x6372BB19, 0xAE56EDE7, 0xDE394DF4, 0x1D4F42A3,
+ 0x60D7F468, 0xB96ADAB7, 0xB2C8E3FB, 0xD108A94B, 0xB324FB61, 0xBC0AB182,
+ 0x483A797A, 0x30ACCA4F, 0x36ADE735, 0x1DF158A1, 0xF3EFE872, 0xE2A689DA,
+ 0xE0E68B77, 0x984F0C70, 0x7F57C935, 0xB557135E, 0x3DED1AF3, 0x85636555,
+ 0x5F066ED0, 0x2433F51F, 0xD5FD6561, 0xD3DF1ED5, 0xAEC4617A, 0xF681B202,
+ 0x630C75D8, 0x7D2FE363, 0x249B3EF9, 0xCC939DCE, 0x146433FB, 0xA9E13641,
+ 0xCE2D3695, 0xD8B9C583, 0x273D3CF1, 0xAFDC5620, 0xA2BB4A9A, 0xADF85458,
+ 0xFFFFFFFF, 0xFFFFFFFF
+};
+
+static const BN_ULONG ffdhe6144_p[] = {
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xD0E40E65, 0xA40E329C, 0x7938DAD4, 0xA41D570D,
+ 0xD43161C1, 0x62A69526, 0x9ADB1E69, 0x3FDD4A8E, 0xDC6B80D6, 0x5B3B71F9,
+ 0xC6272B04, 0xEC9D1810, 0xCACEF403, 0x8CCF2DD5, 0xC95B9117, 0xE49F5235,
+ 0xB854338A, 0x505DC82D, 0x1562A846, 0x62292C31, 0x6AE77F5E, 0xD72B0374,
+ 0x462D538C, 0xF9C9091B, 0x47A67CBE, 0x0AE8DB58, 0x22611682, 0xB3A739C1,
+ 0x2A281BF6, 0xEEAAC023, 0x77CAF992, 0x94C6651E, 0x94B2BBC1, 0x763E4E4B,
+ 0x0077D9B4, 0x587E38DA, 0x183023C3, 0x7FB29F8C, 0xF9E3A26E, 0x0ABEC1FF,
+ 0x350511E3, 0xA00EF092, 0xDB6340D8, 0xB855322E, 0xA9A96910, 0xA52471F7,
+ 0x4CFDB477, 0x388147FB, 0x4E46041F, 0x9B1F5C3E, 0xFCCFEC71, 0xCDAD0657,
+ 0x4C701C3A, 0xB38E8C33, 0xB1C0FD4C, 0x917BDD64, 0x9B7624C8, 0x3BB45432,
+ 0xCAF53EA6, 0x23BA4442, 0x38532A3A, 0x4E677D2C, 0x45036C7A, 0x0BFD64B6,
+ 0x5E0DD902, 0xC68A007E, 0xF44182E1, 0x4DB5A851, 0x7F88A46B, 0x8EC9B55A,
+ 0xCEC97DCF, 0x0A8291CD, 0xF98D0ACC, 0x2A4ECEA9, 0x7140003C, 0x1A1DB93D,
+ 0x33CB8B7A, 0x092999A3, 0x71AD0038, 0x6DC778F9, 0x918130C4, 0xA907600A,
+ 0x2D9E6832, 0xED6A1E01, 0xEFB4318A, 0x7135C886, 0x7E31CC7A, 0x87F55BA5,
+ 0x55034004, 0x7763CF1D, 0xD69F6D18, 0xAC7D5F42, 0xE58857B6, 0x7930E9E4,
+ 0x164DF4FB, 0x6E6F52C3, 0x669E1EF1, 0x25E41D2B, 0x3FD59D7C, 0x3C1B20EE,
+ 0xFA53DDEF, 0x0ABCD06B, 0xD5C4484E, 0x1DBF9A42, 0x9B0DEADA, 0xABC52197,
+ 0x22363A0D, 0xE86D2BC5, 0x9C9DF69E, 0x5CAE82AB, 0x71F54BFF, 0x64F2E21E,
+ 0xE2D74DD3, 0xF4FD4452, 0xBC437944, 0xB4130C93, 0x85139270, 0xAEFE1309,
+ 0xC186D91C, 0x598CB0FA, 0x91F7F7EE, 0x7AD91D26, 0xD6E6C907, 0x61B46FC9,
+ 0xF99C0238, 0xBC34F4DE, 0x6519035B, 0xDE355B3B, 0x611FCFDC, 0x886B4238,
+ 0xC1B2EFFA, 0xC6F34A26, 0x7D1683B2, 0xC58EF183, 0x2EC22005, 0x3BB5FCBC,
+ 0x4C6FAD73, 0xC3FE3B1B, 0xEEF28183, 0x8E4F1232, 0xE98583FF, 0x9172FE9C,
+ 0x28342F61, 0xC03404CD, 0xCDF7E2EC, 0x9E02FCE1, 0xEE0A6D70, 0x0B07A7C8,
+ 0x6372BB19, 0xAE56EDE7, 0xDE394DF4, 0x1D4F42A3, 0x60D7F468, 0xB96ADAB7,
+ 0xB2C8E3FB, 0xD108A94B, 0xB324FB61, 0xBC0AB182, 0x483A797A, 0x30ACCA4F,
+ 0x36ADE735, 0x1DF158A1, 0xF3EFE872, 0xE2A689DA, 0xE0E68B77, 0x984F0C70,
+ 0x7F57C935, 0xB557135E, 0x3DED1AF3, 0x85636555, 0x5F066ED0, 0x2433F51F,
+ 0xD5FD6561, 0xD3DF1ED5, 0xAEC4617A, 0xF681B202, 0x630C75D8, 0x7D2FE363,
+ 0x249B3EF9, 0xCC939DCE, 0x146433FB, 0xA9E13641, 0xCE2D3695, 0xD8B9C583,
+ 0x273D3CF1, 0xAFDC5620, 0xA2BB4A9A, 0xADF85458, 0xFFFFFFFF, 0xFFFFFFFF
+};
+
+static const BN_ULONG ffdhe8192_p[] = {
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xC5C6424C, 0xD68C8BB7, 0x838FF88C, 0x011E2A94,
+ 0xA9F4614E, 0x0822E506, 0xF7A8443D, 0x97D11D49, 0x30677F0D, 0xA6BBFDE5,
+ 0xC1FE86FE, 0x2F741EF8, 0x5D71A87E, 0xFAFABE1C, 0xFBE58A30, 0xDED2FBAB,
+ 0x72B0A66E, 0xB6855DFE, 0xBA8A4FE8, 0x1EFC8CE0, 0x3F2FA457, 0x83F81D4A,
+ 0xA577E231, 0xA1FE3075, 0x88D9C0A0, 0xD5B80194, 0xAD9A95F9, 0x624816CD,
+ 0x50C1217B, 0x99E9E316, 0x0E423CFC, 0x51AA691E, 0x3826E52C, 0x1C217E6C,
+ 0x09703FEE, 0x51A8A931, 0x6A460E74, 0xBB709987, 0x9C86B022, 0x541FC68C,
+ 0x46FD8251, 0x59160CC0, 0x35C35F5C, 0x2846C0BA, 0x8B758282, 0x54504AC7,
+ 0xD2AF05E4, 0x29388839, 0xC01BD702, 0xCB2C0F1C, 0x7C932665, 0x555B2F74,
+ 0xA3AB8829, 0x86B63142, 0xF64B10EF, 0x0B8CC3BD, 0xEDD1CC5E, 0x687FEB69,
+ 0xC9509D43, 0xFDB23FCE, 0xD951AE64, 0x1E425A31, 0xF600C838, 0x36AD004C,
+ 0xCFF46AAA, 0xA40E329C, 0x7938DAD4, 0xA41D570D, 0xD43161C1, 0x62A69526,
+ 0x9ADB1E69, 0x3FDD4A8E, 0xDC6B80D6, 0x5B3B71F9, 0xC6272B04, 0xEC9D1810,
+ 0xCACEF403, 0x8CCF2DD5, 0xC95B9117, 0xE49F5235, 0xB854338A, 0x505DC82D,
+ 0x1562A846, 0x62292C31, 0x6AE77F5E, 0xD72B0374, 0x462D538C, 0xF9C9091B,
+ 0x47A67CBE, 0x0AE8DB58, 0x22611682, 0xB3A739C1, 0x2A281BF6, 0xEEAAC023,
+ 0x77CAF992, 0x94C6651E, 0x94B2BBC1, 0x763E4E4B, 0x0077D9B4, 0x587E38DA,
+ 0x183023C3, 0x7FB29F8C, 0xF9E3A26E, 0x0ABEC1FF, 0x350511E3, 0xA00EF092,
+ 0xDB6340D8, 0xB855322E, 0xA9A96910, 0xA52471F7, 0x4CFDB477, 0x388147FB,
+ 0x4E46041F, 0x9B1F5C3E, 0xFCCFEC71, 0xCDAD0657, 0x4C701C3A, 0xB38E8C33,
+ 0xB1C0FD4C, 0x917BDD64, 0x9B7624C8, 0x3BB45432, 0xCAF53EA6, 0x23BA4442,
+ 0x38532A3A, 0x4E677D2C, 0x45036C7A, 0x0BFD64B6, 0x5E0DD902, 0xC68A007E,
+ 0xF44182E1, 0x4DB5A851, 0x7F88A46B, 0x8EC9B55A, 0xCEC97DCF, 0x0A8291CD,
+ 0xF98D0ACC, 0x2A4ECEA9, 0x7140003C, 0x1A1DB93D, 0x33CB8B7A, 0x092999A3,
+ 0x71AD0038, 0x6DC778F9, 0x918130C4, 0xA907600A, 0x2D9E6832, 0xED6A1E01,
+ 0xEFB4318A, 0x7135C886, 0x7E31CC7A, 0x87F55BA5, 0x55034004, 0x7763CF1D,
+ 0xD69F6D18, 0xAC7D5F42, 0xE58857B6, 0x7930E9E4, 0x164DF4FB, 0x6E6F52C3,
+ 0x669E1EF1, 0x25E41D2B, 0x3FD59D7C, 0x3C1B20EE, 0xFA53DDEF, 0x0ABCD06B,
+ 0xD5C4484E, 0x1DBF9A42, 0x9B0DEADA, 0xABC52197, 0x22363A0D, 0xE86D2BC5,
+ 0x9C9DF69E, 0x5CAE82AB, 0x71F54BFF, 0x64F2E21E, 0xE2D74DD3, 0xF4FD4452,
+ 0xBC437944, 0xB4130C93, 0x85139270, 0xAEFE1309, 0xC186D91C, 0x598CB0FA,
+ 0x91F7F7EE, 0x7AD91D26, 0xD6E6C907, 0x61B46FC9, 0xF99C0238, 0xBC34F4DE,
+ 0x6519035B, 0xDE355B3B, 0x611FCFDC, 0x886B4238, 0xC1B2EFFA, 0xC6F34A26,
+ 0x7D1683B2, 0xC58EF183, 0x2EC22005, 0x3BB5FCBC, 0x4C6FAD73, 0xC3FE3B1B,
+ 0xEEF28183, 0x8E4F1232, 0xE98583FF, 0x9172FE9C, 0x28342F61, 0xC03404CD,
+ 0xCDF7E2EC, 0x9E02FCE1, 0xEE0A6D70, 0x0B07A7C8, 0x6372BB19, 0xAE56EDE7,
+ 0xDE394DF4, 0x1D4F42A3, 0x60D7F468, 0xB96ADAB7, 0xB2C8E3FB, 0xD108A94B,
+ 0xB324FB61, 0xBC0AB182, 0x483A797A, 0x30ACCA4F, 0x36ADE735, 0x1DF158A1,
+ 0xF3EFE872, 0xE2A689DA, 0xE0E68B77, 0x984F0C70, 0x7F57C935, 0xB557135E,
+ 0x3DED1AF3, 0x85636555, 0x5F066ED0, 0x2433F51F, 0xD5FD6561, 0xD3DF1ED5,
+ 0xAEC4617A, 0xF681B202, 0x630C75D8, 0x7D2FE363, 0x249B3EF9, 0xCC939DCE,
+ 0x146433FB, 0xA9E13641, 0xCE2D3695, 0xD8B9C583, 0x273D3CF1, 0xAFDC5620,
+ 0xA2BB4A9A, 0xADF85458, 0xFFFFFFFF, 0xFFFFFFFF
+};
+
# else
# error "unsupported BN_BITS2"
# endif
@@ -206,6 +487,10 @@ static const BN_ULONG dh2048_256_q[] = {
OSSL_NELEM(x),\
0, BN_FLG_STATIC_DATA };
+static const BN_ULONG value_2 = 2;
+
+const BIGNUM _bignum_const_2 =
+ { (BN_ULONG *)&value_2, 1, 1, 0, BN_FLG_STATIC_DATA };
make_dh_bn(dh1024_160_p)
make_dh_bn(dh1024_160_g)
@@ -217,4 +502,11 @@ make_dh_bn(dh2048_256_p)
make_dh_bn(dh2048_256_g)
make_dh_bn(dh2048_256_q)
+make_dh_bn(ffdhe2048_p)
+make_dh_bn(ffdhe3072_p)
+make_dh_bn(ffdhe4096_p)
+make_dh_bn(ffdhe6144_p)
+make_dh_bn(ffdhe8192_p)
+
+
#endif
diff --git a/deps/openssl/openssl/crypto/bn/bn_div.c b/deps/openssl/openssl/crypto/bn/bn_div.c
index 884ff29917..70add10c7d 100644
--- a/deps/openssl/openssl/crypto/bn/bn_div.c
+++ b/deps/openssl/openssl/crypto/bn/bn_div.c
@@ -24,17 +24,17 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
bn_check_top(d);
if (BN_is_zero(d)) {
BNerr(BN_F_BN_DIV, BN_R_DIV_BY_ZERO);
- return (0);
+ return 0;
}
if (BN_ucmp(m, d) < 0) {
if (rem != NULL) {
if (BN_copy(rem, m) == NULL)
- return (0);
+ return 0;
}
if (dv != NULL)
BN_zero(dv);
- return (1);
+ return 1;
}
BN_CTX_start(ctx);
@@ -81,7 +81,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
ret = 1;
end:
BN_CTX_end(ctx);
- return (ret);
+ return ret;
}
#else
@@ -97,8 +97,6 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
* understand why...);
* - divl doesn't only calculate quotient, but also leaves
* remainder in %edx which we can definitely use here:-)
- *
- * <appro@fy.chalmers.se>
*/
# undef bn_div_words
# define bn_div_words(n0,n1,d0) \
@@ -113,7 +111,6 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
# elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG)
/*
* Same story here, but it's 128-bit by 64-bit division. Wow!
- * <appro@fy.chalmers.se>
*/
# undef bn_div_words
# define bn_div_words(n0,n1,d0) \
@@ -177,28 +174,25 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
if (BN_is_zero(divisor)) {
BNerr(BN_F_BN_DIV, BN_R_DIV_BY_ZERO);
- return (0);
+ return 0;
}
if (!no_branch && BN_ucmp(num, divisor) < 0) {
if (rm != NULL) {
if (BN_copy(rm, num) == NULL)
- return (0);
+ return 0;
}
if (dv != NULL)
BN_zero(dv);
- return (1);
+ return 1;
}
BN_CTX_start(ctx);
+ res = (dv == NULL) ? BN_CTX_get(ctx) : dv;
tmp = BN_CTX_get(ctx);
snum = BN_CTX_get(ctx);
sdiv = BN_CTX_get(ctx);
- if (dv == NULL)
- res = BN_CTX_get(ctx);
- else
- res = dv;
- if (sdiv == NULL || res == NULL || tmp == NULL || snum == NULL)
+ if (sdiv == NULL)
goto err;
/* First we normalise the numbers */
@@ -415,10 +409,10 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
if (no_branch)
bn_correct_top(res);
BN_CTX_end(ctx);
- return (1);
+ return 1;
err:
bn_check_top(rm);
BN_CTX_end(ctx);
- return (0);
+ return 0;
}
#endif
diff --git a/deps/openssl/openssl/crypto/bn/bn_err.c b/deps/openssl/openssl/crypto/bn/bn_err.c
index 5fe9db9ede..dd87c152cf 100644
--- a/deps/openssl/openssl/crypto/bn/bn_err.c
+++ b/deps/openssl/openssl/crypto/bn/bn_err.c
@@ -1,6 +1,6 @@
/*
* Generated by util/mkerr.pl DO NOT EDIT
- * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -8,87 +8,99 @@
* https://www.openssl.org/source/license.html
*/
-#include <stdio.h>
#include <openssl/err.h>
-#include <openssl/bn.h>
+#include <openssl/bnerr.h>
-/* BEGIN ERROR CODES */
#ifndef OPENSSL_NO_ERR
-# define ERR_FUNC(func) ERR_PACK(ERR_LIB_BN,func,0)
-# define ERR_REASON(reason) ERR_PACK(ERR_LIB_BN,0,reason)
-
-static ERR_STRING_DATA BN_str_functs[] = {
- {ERR_FUNC(BN_F_BNRAND), "bnrand"},
- {ERR_FUNC(BN_F_BN_BLINDING_CONVERT_EX), "BN_BLINDING_convert_ex"},
- {ERR_FUNC(BN_F_BN_BLINDING_CREATE_PARAM), "BN_BLINDING_create_param"},
- {ERR_FUNC(BN_F_BN_BLINDING_INVERT_EX), "BN_BLINDING_invert_ex"},
- {ERR_FUNC(BN_F_BN_BLINDING_NEW), "BN_BLINDING_new"},
- {ERR_FUNC(BN_F_BN_BLINDING_UPDATE), "BN_BLINDING_update"},
- {ERR_FUNC(BN_F_BN_BN2DEC), "BN_bn2dec"},
- {ERR_FUNC(BN_F_BN_BN2HEX), "BN_bn2hex"},
- {ERR_FUNC(BN_F_BN_COMPUTE_WNAF), "bn_compute_wNAF"},
- {ERR_FUNC(BN_F_BN_CTX_GET), "BN_CTX_get"},
- {ERR_FUNC(BN_F_BN_CTX_NEW), "BN_CTX_new"},
- {ERR_FUNC(BN_F_BN_CTX_START), "BN_CTX_start"},
- {ERR_FUNC(BN_F_BN_DIV), "BN_div"},
- {ERR_FUNC(BN_F_BN_DIV_RECP), "BN_div_recp"},
- {ERR_FUNC(BN_F_BN_EXP), "BN_exp"},
- {ERR_FUNC(BN_F_BN_EXPAND_INTERNAL), "bn_expand_internal"},
- {ERR_FUNC(BN_F_BN_GENCB_NEW), "BN_GENCB_new"},
- {ERR_FUNC(BN_F_BN_GENERATE_DSA_NONCE), "BN_generate_dsa_nonce"},
- {ERR_FUNC(BN_F_BN_GENERATE_PRIME_EX), "BN_generate_prime_ex"},
- {ERR_FUNC(BN_F_BN_GF2M_MOD), "BN_GF2m_mod"},
- {ERR_FUNC(BN_F_BN_GF2M_MOD_EXP), "BN_GF2m_mod_exp"},
- {ERR_FUNC(BN_F_BN_GF2M_MOD_MUL), "BN_GF2m_mod_mul"},
- {ERR_FUNC(BN_F_BN_GF2M_MOD_SOLVE_QUAD), "BN_GF2m_mod_solve_quad"},
- {ERR_FUNC(BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR), "BN_GF2m_mod_solve_quad_arr"},
- {ERR_FUNC(BN_F_BN_GF2M_MOD_SQR), "BN_GF2m_mod_sqr"},
- {ERR_FUNC(BN_F_BN_GF2M_MOD_SQRT), "BN_GF2m_mod_sqrt"},
- {ERR_FUNC(BN_F_BN_LSHIFT), "BN_lshift"},
- {ERR_FUNC(BN_F_BN_MOD_EXP2_MONT), "BN_mod_exp2_mont"},
- {ERR_FUNC(BN_F_BN_MOD_EXP_MONT), "BN_mod_exp_mont"},
- {ERR_FUNC(BN_F_BN_MOD_EXP_MONT_CONSTTIME), "BN_mod_exp_mont_consttime"},
- {ERR_FUNC(BN_F_BN_MOD_EXP_MONT_WORD), "BN_mod_exp_mont_word"},
- {ERR_FUNC(BN_F_BN_MOD_EXP_RECP), "BN_mod_exp_recp"},
- {ERR_FUNC(BN_F_BN_MOD_EXP_SIMPLE), "BN_mod_exp_simple"},
- {ERR_FUNC(BN_F_BN_MOD_INVERSE), "BN_mod_inverse"},
- {ERR_FUNC(BN_F_BN_MOD_INVERSE_NO_BRANCH), "BN_mod_inverse_no_branch"},
- {ERR_FUNC(BN_F_BN_MOD_LSHIFT_QUICK), "BN_mod_lshift_quick"},
- {ERR_FUNC(BN_F_BN_MOD_SQRT), "BN_mod_sqrt"},
- {ERR_FUNC(BN_F_BN_MPI2BN), "BN_mpi2bn"},
- {ERR_FUNC(BN_F_BN_NEW), "BN_new"},
- {ERR_FUNC(BN_F_BN_RAND), "BN_rand"},
- {ERR_FUNC(BN_F_BN_RAND_RANGE), "BN_rand_range"},
- {ERR_FUNC(BN_F_BN_RSHIFT), "BN_rshift"},
- {ERR_FUNC(BN_F_BN_SET_WORDS), "bn_set_words"},
- {ERR_FUNC(BN_F_BN_USUB), "BN_usub"},
+static const ERR_STRING_DATA BN_str_functs[] = {
+ {ERR_PACK(ERR_LIB_BN, BN_F_BNRAND, 0), "bnrand"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BNRAND_RANGE, 0), "bnrand_range"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_BLINDING_CONVERT_EX, 0),
+ "BN_BLINDING_convert_ex"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_BLINDING_CREATE_PARAM, 0),
+ "BN_BLINDING_create_param"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_BLINDING_INVERT_EX, 0),
+ "BN_BLINDING_invert_ex"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_BLINDING_NEW, 0), "BN_BLINDING_new"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_BLINDING_UPDATE, 0), "BN_BLINDING_update"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_BN2DEC, 0), "BN_bn2dec"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_BN2HEX, 0), "BN_bn2hex"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_COMPUTE_WNAF, 0), "bn_compute_wNAF"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_CTX_GET, 0), "BN_CTX_get"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_CTX_NEW, 0), "BN_CTX_new"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_CTX_START, 0), "BN_CTX_start"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_DIV, 0), "BN_div"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_DIV_RECP, 0), "BN_div_recp"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_EXP, 0), "BN_exp"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_EXPAND_INTERNAL, 0), "bn_expand_internal"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_GENCB_NEW, 0), "BN_GENCB_new"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_GENERATE_DSA_NONCE, 0),
+ "BN_generate_dsa_nonce"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_GENERATE_PRIME_EX, 0),
+ "BN_generate_prime_ex"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD, 0), "BN_GF2m_mod"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_EXP, 0), "BN_GF2m_mod_exp"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_MUL, 0), "BN_GF2m_mod_mul"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_SOLVE_QUAD, 0),
+ "BN_GF2m_mod_solve_quad"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR, 0),
+ "BN_GF2m_mod_solve_quad_arr"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_SQR, 0), "BN_GF2m_mod_sqr"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_SQRT, 0), "BN_GF2m_mod_sqrt"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_LSHIFT, 0), "BN_lshift"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP2_MONT, 0), "BN_mod_exp2_mont"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP_MONT, 0), "BN_mod_exp_mont"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP_MONT_CONSTTIME, 0),
+ "BN_mod_exp_mont_consttime"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP_MONT_WORD, 0),
+ "BN_mod_exp_mont_word"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP_RECP, 0), "BN_mod_exp_recp"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP_SIMPLE, 0), "BN_mod_exp_simple"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_INVERSE, 0), "BN_mod_inverse"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_INVERSE_NO_BRANCH, 0),
+ "BN_mod_inverse_no_branch"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_LSHIFT_QUICK, 0), "BN_mod_lshift_quick"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_SQRT, 0), "BN_mod_sqrt"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_MONT_CTX_NEW, 0), "BN_MONT_CTX_new"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_MPI2BN, 0), "BN_mpi2bn"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_NEW, 0), "BN_new"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_POOL_GET, 0), "BN_POOL_get"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_RAND, 0), "BN_rand"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_RAND_RANGE, 0), "BN_rand_range"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_RECP_CTX_NEW, 0), "BN_RECP_CTX_new"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_RSHIFT, 0), "BN_rshift"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_SET_WORDS, 0), "bn_set_words"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_STACK_PUSH, 0), "BN_STACK_push"},
+ {ERR_PACK(ERR_LIB_BN, BN_F_BN_USUB, 0), "BN_usub"},
{0, NULL}
};
-static ERR_STRING_DATA BN_str_reasons[] = {
- {ERR_REASON(BN_R_ARG2_LT_ARG3), "arg2 lt arg3"},
- {ERR_REASON(BN_R_BAD_RECIPROCAL), "bad reciprocal"},
- {ERR_REASON(BN_R_BIGNUM_TOO_LONG), "bignum too long"},
- {ERR_REASON(BN_R_BITS_TOO_SMALL), "bits too small"},
- {ERR_REASON(BN_R_CALLED_WITH_EVEN_MODULUS), "called with even modulus"},
- {ERR_REASON(BN_R_DIV_BY_ZERO), "div by zero"},
- {ERR_REASON(BN_R_ENCODING_ERROR), "encoding error"},
- {ERR_REASON(BN_R_EXPAND_ON_STATIC_BIGNUM_DATA),
- "expand on static bignum data"},
- {ERR_REASON(BN_R_INPUT_NOT_REDUCED), "input not reduced"},
- {ERR_REASON(BN_R_INVALID_LENGTH), "invalid length"},
- {ERR_REASON(BN_R_INVALID_RANGE), "invalid range"},
- {ERR_REASON(BN_R_INVALID_SHIFT), "invalid shift"},
- {ERR_REASON(BN_R_NOT_A_SQUARE), "not a square"},
- {ERR_REASON(BN_R_NOT_INITIALIZED), "not initialized"},
- {ERR_REASON(BN_R_NO_INVERSE), "no inverse"},
- {ERR_REASON(BN_R_NO_SOLUTION), "no solution"},
- {ERR_REASON(BN_R_PRIVATE_KEY_TOO_LARGE), "private key too large"},
- {ERR_REASON(BN_R_P_IS_NOT_PRIME), "p is not prime"},
- {ERR_REASON(BN_R_TOO_MANY_ITERATIONS), "too many iterations"},
- {ERR_REASON(BN_R_TOO_MANY_TEMPORARY_VARIABLES),
- "too many temporary variables"},
+static const ERR_STRING_DATA BN_str_reasons[] = {
+ {ERR_PACK(ERR_LIB_BN, 0, BN_R_ARG2_LT_ARG3), "arg2 lt arg3"},
+ {ERR_PACK(ERR_LIB_BN, 0, BN_R_BAD_RECIPROCAL), "bad reciprocal"},
+ {ERR_PACK(ERR_LIB_BN, 0, BN_R_BIGNUM_TOO_LONG), "bignum too long"},
+ {ERR_PACK(ERR_LIB_BN, 0, BN_R_BITS_TOO_SMALL), "bits too small"},
+ {ERR_PACK(ERR_LIB_BN, 0, BN_R_CALLED_WITH_EVEN_MODULUS),
+ "called with even modulus"},
+ {ERR_PACK(ERR_LIB_BN, 0, BN_R_DIV_BY_ZERO), "div by zero"},
+ {ERR_PACK(ERR_LIB_BN, 0, BN_R_ENCODING_ERROR), "encoding error"},
+ {ERR_PACK(ERR_LIB_BN, 0, BN_R_EXPAND_ON_STATIC_BIGNUM_DATA),
+ "expand on static bignum data"},
+ {ERR_PACK(ERR_LIB_BN, 0, BN_R_INPUT_NOT_REDUCED), "input not reduced"},
+ {ERR_PACK(ERR_LIB_BN, 0, BN_R_INVALID_LENGTH), "invalid length"},
+ {ERR_PACK(ERR_LIB_BN, 0, BN_R_INVALID_RANGE), "invalid range"},
+ {ERR_PACK(ERR_LIB_BN, 0, BN_R_INVALID_SHIFT), "invalid shift"},
+ {ERR_PACK(ERR_LIB_BN, 0, BN_R_NOT_A_SQUARE), "not a square"},
+ {ERR_PACK(ERR_LIB_BN, 0, BN_R_NOT_INITIALIZED), "not initialized"},
+ {ERR_PACK(ERR_LIB_BN, 0, BN_R_NO_INVERSE), "no inverse"},
+ {ERR_PACK(ERR_LIB_BN, 0, BN_R_NO_SOLUTION), "no solution"},
+ {ERR_PACK(ERR_LIB_BN, 0, BN_R_PRIVATE_KEY_TOO_LARGE),
+ "private key too large"},
+ {ERR_PACK(ERR_LIB_BN, 0, BN_R_P_IS_NOT_PRIME), "p is not prime"},
+ {ERR_PACK(ERR_LIB_BN, 0, BN_R_TOO_MANY_ITERATIONS), "too many iterations"},
+ {ERR_PACK(ERR_LIB_BN, 0, BN_R_TOO_MANY_TEMPORARY_VARIABLES),
+ "too many temporary variables"},
{0, NULL}
};
@@ -97,10 +109,9 @@ static ERR_STRING_DATA BN_str_reasons[] = {
int ERR_load_BN_strings(void)
{
#ifndef OPENSSL_NO_ERR
-
if (ERR_func_error_string(BN_str_functs[0].error) == NULL) {
- ERR_load_strings(0, BN_str_functs);
- ERR_load_strings(0, BN_str_reasons);
+ ERR_load_strings_const(BN_str_functs);
+ ERR_load_strings_const(BN_str_reasons);
}
#endif
return 1;
diff --git a/deps/openssl/openssl/crypto/bn/bn_exp.c b/deps/openssl/openssl/crypto/bn/bn_exp.c
index a6ad475a0b..c026ffcb33 100644
--- a/deps/openssl/openssl/crypto/bn/bn_exp.c
+++ b/deps/openssl/openssl/crypto/bn/bn_exp.c
@@ -51,10 +51,7 @@ int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
}
BN_CTX_start(ctx);
- if ((r == a) || (r == p))
- rr = BN_CTX_get(ctx);
- else
- rr = r;
+ rr = ((r == a) || (r == p)) ? BN_CTX_get(ctx) : r;
v = BN_CTX_get(ctx);
if (rr == NULL || v == NULL)
goto err;
@@ -86,7 +83,7 @@ int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
err:
BN_CTX_end(ctx);
bn_check_top(r);
- return (ret);
+ return ret;
}
int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
@@ -134,13 +131,6 @@ int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
#define RECP_MUL_MOD
#ifdef MONT_MUL_MOD
- /*
- * I have finally been able to take out this pre-condition of the top bit
- * being set. It was caused by an error in BN_div with negatives. There
- * was also another problem when for a^b%m a >= m. eay 07-May-97
- */
- /* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
-
if (BN_is_odd(m)) {
# ifdef MONT_EXP_WORD
if (a->top == 1 && !a->neg
@@ -165,7 +155,7 @@ int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
#endif
bn_check_top(r);
- return (ret);
+ return ret;
}
int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
@@ -201,7 +191,7 @@ int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
BN_CTX_start(ctx);
aa = BN_CTX_get(ctx);
val[0] = BN_CTX_get(ctx);
- if (!aa || !val[0])
+ if (val[0] == NULL)
goto err;
BN_RECP_CTX_init(&recp);
@@ -300,7 +290,7 @@ int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
BN_CTX_end(ctx);
BN_RECP_CTX_free(&recp);
bn_check_top(r);
- return (ret);
+ return ret;
}
int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
@@ -326,7 +316,7 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
if (!BN_is_odd(m)) {
BNerr(BN_F_BN_MOD_EXP_MONT, BN_R_CALLED_WITH_EVEN_MODULUS);
- return (0);
+ return 0;
}
bits = BN_num_bits(p);
if (bits == 0) {
@@ -344,7 +334,7 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
d = BN_CTX_get(ctx);
r = BN_CTX_get(ctx);
val[0] = BN_CTX_get(ctx);
- if (!d || !r || !val[0])
+ if (val[0] == NULL)
goto err;
/*
@@ -366,11 +356,6 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
aa = val[0];
} else
aa = a;
- if (BN_is_zero(aa)) {
- BN_zero(rr);
- ret = 1;
- goto err;
- }
if (!bn_to_mont_fixed_top(val[0], aa, mont, ctx))
goto err; /* 1 */
@@ -481,10 +466,9 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
BN_MONT_CTX_free(mont);
BN_CTX_end(ctx);
bn_check_top(rr);
- return (ret);
+ return ret;
}
-#if defined(SPARC_T4_MONT)
static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos)
{
BN_ULONG ret = 0;
@@ -503,7 +487,6 @@ static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos)
return ret & BN_MASK2;
}
-#endif
/*
* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific
@@ -610,7 +593,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
const BIGNUM *m, BN_CTX *ctx,
BN_MONT_CTX *in_mont)
{
- int i, bits, ret = 0, window, wvalue;
+ int i, bits, ret = 0, window, wvalue, wmask, window0;
int top;
BN_MONT_CTX *mont = NULL;
@@ -629,7 +612,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
if (!BN_is_odd(m)) {
BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME, BN_R_CALLED_WITH_EVEN_MODULUS);
- return (0);
+ return 0;
}
top = m->top;
@@ -666,31 +649,33 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
}
#ifdef RSAZ_ENABLED
- /*
- * If the size of the operands allow it, perform the optimized
- * RSAZ exponentiation. For further information see
- * crypto/bn/rsaz_exp.c and accompanying assembly modules.
- */
- if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024)
- && rsaz_avx2_eligible()) {
- if (NULL == bn_wexpand(rr, 16))
+ if (!a->neg) {
+ /*
+ * If the size of the operands allow it, perform the optimized
+ * RSAZ exponentiation. For further information see
+ * crypto/bn/rsaz_exp.c and accompanying assembly modules.
+ */
+ if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024)
+ && rsaz_avx2_eligible()) {
+ if (NULL == bn_wexpand(rr, 16))
+ goto err;
+ RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d,
+ mont->n0[0]);
+ rr->top = 16;
+ rr->neg = 0;
+ bn_correct_top(rr);
+ ret = 1;
goto err;
- RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d,
- mont->n0[0]);
- rr->top = 16;
- rr->neg = 0;
- bn_correct_top(rr);
- ret = 1;
- goto err;
- } else if ((8 == a->top) && (8 == p->top) && (BN_num_bits(m) == 512)) {
- if (NULL == bn_wexpand(rr, 8))
+ } else if ((8 == a->top) && (8 == p->top) && (BN_num_bits(m) == 512)) {
+ if (NULL == bn_wexpand(rr, 8))
+ goto err;
+ RSAZ_512_mod_exp(rr->d, a->d, p->d, m->d, mont->n0[0], mont->RR.d);
+ rr->top = 8;
+ rr->neg = 0;
+ bn_correct_top(rr);
+ ret = 1;
goto err;
- RSAZ_512_mod_exp(rr->d, a->d, p->d, m->d, mont->n0[0], mont->RR.d);
- rr->top = 8;
- rr->neg = 0;
- bn_correct_top(rr);
- ret = 1;
- goto err;
+ }
}
#endif
@@ -763,7 +748,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
/* prepare a^1 in Montgomery domain */
if (a->neg || BN_ucmp(a, m) >= 0) {
- if (!BN_mod(&am, a, m, ctx))
+ if (!BN_nnmod(&am, a, m, ctx))
goto err;
if (!bn_to_mont_fixed_top(&am, &am, mont, ctx))
goto err;
@@ -861,20 +846,27 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
top /= 2;
bn_flip_t4(np, mont->N.d, top);
- bits--;
- for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
- wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
+ /*
+ * The exponent may not have a whole number of fixed-size windows.
+ * To simplify the main loop, the initial window has between 1 and
+ * full-window-size bits such that what remains is always a whole
+ * number of windows
+ */
+ window0 = (bits - 1) % 5 + 1;
+ wmask = (1 << window0) - 1;
+ bits -= window0;
+ wvalue = bn_get_bits(p, bits) & wmask;
bn_gather5_t4(tmp.d, top, powerbuf, wvalue);
/*
* Scan the exponent one window at a time starting from the most
* significant bits.
*/
- while (bits >= 0) {
+ while (bits > 0) {
if (bits < stride)
- stride = bits + 1;
+ stride = bits;
bits -= stride;
- wvalue = bn_get_bits(p, bits + 1);
+ wvalue = bn_get_bits(p, bits);
if ((*pwr5_worker) (tmp.d, np, n0, powerbuf, wvalue, stride))
continue;
@@ -982,32 +974,36 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
bn_scatter5(tmp.d, top, powerbuf, i);
}
# endif
- bits--;
- for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
- wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
+ /*
+ * The exponent may not have a whole number of fixed-size windows.
+ * To simplify the main loop, the initial window has between 1 and
+ * full-window-size bits such that what remains is always a whole
+ * number of windows
+ */
+ window0 = (bits - 1) % 5 + 1;
+ wmask = (1 << window0) - 1;
+ bits -= window0;
+ wvalue = bn_get_bits(p, bits) & wmask;
bn_gather5(tmp.d, top, powerbuf, wvalue);
/*
* Scan the exponent one window at a time starting from the most
* significant bits.
*/
- if (top & 7)
- while (bits >= 0) {
- for (wvalue = 0, i = 0; i < 5; i++, bits--)
- wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
-
+ if (top & 7) {
+ while (bits > 0) {
bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top,
- wvalue);
+ bn_get_bits5(p->d, bits -= 5));
+ }
} else {
- while (bits >= 0) {
- wvalue = bn_get_bits5(p->d, bits - 4);
- bits -= 5;
- bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
+ while (bits > 0) {
+ bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top,
+ bn_get_bits5(p->d, bits -= 5));
}
}
@@ -1049,28 +1045,45 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
}
}
- bits--;
- for (wvalue = 0, i = bits % window; i >= 0; i--, bits--)
- wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
+ /*
+ * The exponent may not have a whole number of fixed-size windows.
+ * To simplify the main loop, the initial window has between 1 and
+ * full-window-size bits such that what remains is always a whole
+ * number of windows
+ */
+ window0 = (bits - 1) % window + 1;
+ wmask = (1 << window0) - 1;
+ bits -= window0;
+ wvalue = bn_get_bits(p, bits) & wmask;
if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf, wvalue,
window))
goto err;
+ wmask = (1 << window) - 1;
/*
* Scan the exponent one window at a time starting from the most
* significant bits.
*/
- while (bits >= 0) {
- wvalue = 0; /* The 'value' of the window */
+ while (bits > 0) {
- /* Scan the window, squaring the result as we go */
- for (i = 0; i < window; i++, bits--) {
+ /* Square the result window-size times */
+ for (i = 0; i < window; i++)
if (!bn_mul_mont_fixed_top(&tmp, &tmp, &tmp, mont, ctx))
goto err;
- wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
- }
/*
+ * Get a window's worth of bits from the exponent
+ * This avoids calling BN_is_bit_set for each bit, which
+ * is not only slower but also makes each bit vulnerable to
+ * EM (and likely other) side-channel attacks like One&Done
+ * (for details see "One&Done: A Single-Decryption EM-Based
+ * Attack on OpenSSL's Constant-Time Blinded RSA" by M. Alam,
+ * H. Khan, M. Dey, N. Sinha, R. Callan, A. Zajic, and
+ * M. Prvulovic, in USENIX Security'18)
+ */
+ bits -= window;
+ wvalue = bn_get_bits(p, bits) & wmask;
+ /*
* Fetch the appropriate pre-computed value from the pre-buf
*/
if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, wvalue,
@@ -1108,7 +1121,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
OPENSSL_free(powerbufFree);
}
BN_CTX_end(ctx);
- return (ret);
+ return ret;
}
int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
@@ -1118,7 +1131,7 @@ int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
int b, bits, ret = 0;
int r_is_one;
BN_ULONG w, next_w;
- BIGNUM *d, *r, *t;
+ BIGNUM *r, *t;
BIGNUM *swap_tmp;
#define BN_MOD_MUL_WORD(r, w, m) \
(BN_mul_word(r, (w)) && \
@@ -1149,7 +1162,7 @@ int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
if (!BN_is_odd(m)) {
BNerr(BN_F_BN_MOD_EXP_MONT_WORD, BN_R_CALLED_WITH_EVEN_MODULUS);
- return (0);
+ return 0;
}
if (m->top == 1)
a %= m->d[0]; /* make sure that 'a' is reduced */
@@ -1172,10 +1185,9 @@ int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
}
BN_CTX_start(ctx);
- d = BN_CTX_get(ctx);
r = BN_CTX_get(ctx);
t = BN_CTX_get(ctx);
- if (d == NULL || r == NULL || t == NULL)
+ if (t == NULL)
goto err;
if (in_mont != NULL)
@@ -1256,7 +1268,7 @@ int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
BN_MONT_CTX_free(mont);
BN_CTX_end(ctx);
bn_check_top(rr);
- return (ret);
+ return ret;
}
/* The old fallback, simple version :-) */
@@ -1292,7 +1304,7 @@ int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
BN_CTX_start(ctx);
d = BN_CTX_get(ctx);
val[0] = BN_CTX_get(ctx);
- if (!d || !val[0])
+ if (val[0] == NULL)
goto err;
if (!BN_nnmod(val[0], a, m, ctx))
@@ -1377,5 +1389,5 @@ int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
err:
BN_CTX_end(ctx);
bn_check_top(r);
- return (ret);
+ return ret;
}
diff --git a/deps/openssl/openssl/crypto/bn/bn_exp2.c b/deps/openssl/openssl/crypto/bn/bn_exp2.c
index 5141c21f6d..082c9286a0 100644
--- a/deps/openssl/openssl/crypto/bn/bn_exp2.c
+++ b/deps/openssl/openssl/crypto/bn/bn_exp2.c
@@ -1,5 +1,5 @@
/*
- * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2017 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -34,7 +34,7 @@ int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1,
if (!(m->d[0] & 1)) {
BNerr(BN_F_BN_MOD_EXP2_MONT, BN_R_CALLED_WITH_EVEN_MODULUS);
- return (0);
+ return 0;
}
bits1 = BN_num_bits(p1);
bits2 = BN_num_bits(p2);
@@ -50,7 +50,7 @@ int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1,
r = BN_CTX_get(ctx);
val1[0] = BN_CTX_get(ctx);
val2[0] = BN_CTX_get(ctx);
- if (!d || !r || !val1[0] || !val2[0])
+ if (val2[0] == NULL)
goto err;
if (in_mont != NULL)
@@ -197,5 +197,5 @@ int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1,
BN_MONT_CTX_free(mont);
BN_CTX_end(ctx);
bn_check_top(rr);
- return (ret);
+ return ret;
}
diff --git a/deps/openssl/openssl/crypto/bn/bn_gcd.c b/deps/openssl/openssl/crypto/bn/bn_gcd.c
index bed231c8fa..0091ea4e08 100644
--- a/deps/openssl/openssl/crypto/bn/bn_gcd.c
+++ b/deps/openssl/openssl/crypto/bn/bn_gcd.c
@@ -23,7 +23,7 @@ int BN_gcd(BIGNUM *r, const BIGNUM *in_a, const BIGNUM *in_b, BN_CTX *ctx)
BN_CTX_start(ctx);
a = BN_CTX_get(ctx);
b = BN_CTX_get(ctx);
- if (a == NULL || b == NULL)
+ if (b == NULL)
goto err;
if (BN_copy(a, in_a) == NULL)
@@ -48,7 +48,7 @@ int BN_gcd(BIGNUM *r, const BIGNUM *in_a, const BIGNUM *in_b, BN_CTX *ctx)
err:
BN_CTX_end(ctx);
bn_check_top(r);
- return (ret);
+ return ret;
}
static BIGNUM *euclid(BIGNUM *a, BIGNUM *b)
@@ -111,9 +111,9 @@ static BIGNUM *euclid(BIGNUM *a, BIGNUM *b)
goto err;
}
bn_check_top(a);
- return (a);
+ return a;
err:
- return (NULL);
+ return NULL;
}
/* solves ax == 1 (mod n) */
@@ -448,7 +448,7 @@ BIGNUM *int_bn_mod_inverse(BIGNUM *in,
BN_free(R);
BN_CTX_end(ctx);
bn_check_top(ret);
- return (ret);
+ return ret;
}
/*
@@ -619,5 +619,5 @@ static BIGNUM *BN_mod_inverse_no_branch(BIGNUM *in,
BN_free(R);
BN_CTX_end(ctx);
bn_check_top(ret);
- return (ret);
+ return ret;
}
diff --git a/deps/openssl/openssl/crypto/bn/bn_gf2m.c b/deps/openssl/openssl/crypto/bn/bn_gf2m.c
index d80f3ec940..34d8b69c1e 100644
--- a/deps/openssl/openssl/crypto/bn/bn_gf2m.c
+++ b/deps/openssl/openssl/crypto/bn/bn_gf2m.c
@@ -1,5 +1,6 @@
/*
* Copyright 2002-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -7,17 +8,6 @@
* https://www.openssl.org/source/license.html
*/
-/* ====================================================================
- * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED.
- *
- * The Elliptic Curve Public-Key Crypto Library (ECC Code) included
- * herein is developed by SUN MICROSYSTEMS, INC., and is contributed
- * to the OpenSSL project.
- *
- * The ECC Code is licensed pursuant to the OpenSSL open source
- * license provided below.
- */
-
#include <assert.h>
#include <limits.h>
#include <stdio.h>
@@ -559,7 +549,8 @@ int BN_GF2m_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
* Hernandez, J.L., and Menezes, A. "Software Implementation of Elliptic
* Curve Cryptography Over Binary Fields".
*/
-int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
+static int BN_GF2m_mod_inv_vartime(BIGNUM *r, const BIGNUM *a,
+ const BIGNUM *p, BN_CTX *ctx)
{
BIGNUM *b, *c = NULL, *u = NULL, *v = NULL, *tmp;
int ret = 0;
@@ -569,13 +560,11 @@ int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
BN_CTX_start(ctx);
- if ((b = BN_CTX_get(ctx)) == NULL)
- goto err;
- if ((c = BN_CTX_get(ctx)) == NULL)
- goto err;
- if ((u = BN_CTX_get(ctx)) == NULL)
- goto err;
- if ((v = BN_CTX_get(ctx)) == NULL)
+ b = BN_CTX_get(ctx);
+ c = BN_CTX_get(ctx);
+ u = BN_CTX_get(ctx);
+ v = BN_CTX_get(ctx);
+ if (v == NULL)
goto err;
if (!BN_GF2m_mod(u, a, p))
@@ -727,6 +716,46 @@ int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
return ret;
}
+/*-
+ * Wrapper for BN_GF2m_mod_inv_vartime that blinds the input before calling.
+ * This is not constant time.
+ * But it does eliminate first order deduction on the input.
+ */
+int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
+{
+ BIGNUM *b = NULL;
+ int ret = 0;
+
+ BN_CTX_start(ctx);
+ if ((b = BN_CTX_get(ctx)) == NULL)
+ goto err;
+
+ /* generate blinding value */
+ do {
+ if (!BN_priv_rand(b, BN_num_bits(p) - 1,
+ BN_RAND_TOP_ANY, BN_RAND_BOTTOM_ANY))
+ goto err;
+ } while (BN_is_zero(b));
+
+ /* r := a * b */
+ if (!BN_GF2m_mod_mul(r, a, b, p, ctx))
+ goto err;
+
+ /* r := 1/(a * b) */
+ if (!BN_GF2m_mod_inv_vartime(r, r, p, ctx))
+ goto err;
+
+ /* r := b/(a * b) = 1/a */
+ if (!BN_GF2m_mod_mul(r, r, b, p, ctx))
+ goto err;
+
+ ret = 1;
+
+ err:
+ BN_CTX_end(ctx);
+ return ret;
+}
+
/*
* Invert xx, reduce modulo p, and store the result in r. r could be xx.
* This function calls down to the BN_GF2m_mod_inv implementation; this
@@ -754,7 +783,6 @@ int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *xx, const int p[],
return ret;
}
-# ifndef OPENSSL_SUN_GF2M_DIV
/*
* Divide y by x, reduce modulo p, and store the result in r. r could be x
* or y, x could equal y.
@@ -785,94 +813,6 @@ int BN_GF2m_mod_div(BIGNUM *r, const BIGNUM *y, const BIGNUM *x,
BN_CTX_end(ctx);
return ret;
}
-# else
-/*
- * Divide y by x, reduce modulo p, and store the result in r. r could be x
- * or y, x could equal y. Uses algorithm Modular_Division_GF(2^m) from
- * Chang-Shantz, S. "From Euclid's GCD to Montgomery Multiplication to the
- * Great Divide".
- */
-int BN_GF2m_mod_div(BIGNUM *r, const BIGNUM *y, const BIGNUM *x,
- const BIGNUM *p, BN_CTX *ctx)
-{
- BIGNUM *a, *b, *u, *v;
- int ret = 0;
-
- bn_check_top(y);
- bn_check_top(x);
- bn_check_top(p);
-
- BN_CTX_start(ctx);
-
- a = BN_CTX_get(ctx);
- b = BN_CTX_get(ctx);
- u = BN_CTX_get(ctx);
- v = BN_CTX_get(ctx);
- if (v == NULL)
- goto err;
-
- /* reduce x and y mod p */
- if (!BN_GF2m_mod(u, y, p))
- goto err;
- if (!BN_GF2m_mod(a, x, p))
- goto err;
- if (!BN_copy(b, p))
- goto err;
-
- while (!BN_is_odd(a)) {
- if (!BN_rshift1(a, a))
- goto err;
- if (BN_is_odd(u))
- if (!BN_GF2m_add(u, u, p))
- goto err;
- if (!BN_rshift1(u, u))
- goto err;
- }
-
- do {
- if (BN_GF2m_cmp(b, a) > 0) {
- if (!BN_GF2m_add(b, b, a))
- goto err;
- if (!BN_GF2m_add(v, v, u))
- goto err;
- do {
- if (!BN_rshift1(b, b))
- goto err;
- if (BN_is_odd(v))
- if (!BN_GF2m_add(v, v, p))
- goto err;
- if (!BN_rshift1(v, v))
- goto err;
- } while (!BN_is_odd(b));
- } else if (BN_abs_is_word(a, 1))
- break;
- else {
- if (!BN_GF2m_add(a, a, b))
- goto err;
- if (!BN_GF2m_add(u, u, v))
- goto err;
- do {
- if (!BN_rshift1(a, a))
- goto err;
- if (BN_is_odd(u))
- if (!BN_GF2m_add(u, u, p))
- goto err;
- if (!BN_rshift1(u, u))
- goto err;
- } while (!BN_is_odd(a));
- }
- } while (1);
-
- if (!BN_copy(r, u))
- goto err;
- bn_check_top(r);
- ret = 1;
-
- err:
- BN_CTX_end(ctx);
- return ret;
-}
-# endif
/*
* Divide yy by xx, reduce modulo p, and store the result in r. r could be xx
@@ -918,7 +858,7 @@ int BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
bn_check_top(b);
if (BN_is_zero(b))
- return (BN_one(r));
+ return BN_one(r);
if (BN_abs_is_word(b, 1))
return (BN_copy(r, a) != NULL);
@@ -1091,7 +1031,7 @@ int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a_, const int p[],
if (tmp == NULL)
goto err;
do {
- if (!BN_rand(rho, p[0], BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY))
+ if (!BN_priv_rand(rho, p[0], BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY))
goto err;
if (!BN_GF2m_mod_arr(rho, rho, p))
goto err;
diff --git a/deps/openssl/openssl/crypto/bn/bn_intern.c b/deps/openssl/openssl/crypto/bn/bn_intern.c
index 7b25927f9b..46bc97575d 100644
--- a/deps/openssl/openssl/crypto/bn/bn_intern.c
+++ b/deps/openssl/openssl/crypto/bn/bn_intern.c
@@ -143,11 +143,6 @@ int bn_get_top(const BIGNUM *a)
return a->top;
}
-void bn_set_top(BIGNUM *a, int top)
-{
- a->top = top;
-}
-
int bn_get_dmax(const BIGNUM *a)
{
return a->dmax;
@@ -202,13 +197,3 @@ int bn_set_words(BIGNUM *a, const BN_ULONG *words, int num_words)
bn_correct_top(a);
return 1;
}
-
-size_t bn_sizeof_BIGNUM(void)
-{
- return sizeof(BIGNUM);
-}
-
-BIGNUM *bn_array_el(BIGNUM *base, int el)
-{
- return &base[el];
-}
diff --git a/deps/openssl/openssl/crypto/bn/bn_lcl.h b/deps/openssl/openssl/crypto/bn/bn_lcl.h
index 4d9808f5b8..8a36db2e8b 100644
--- a/deps/openssl/openssl/crypto/bn/bn_lcl.h
+++ b/deps/openssl/openssl/crypto/bn/bn_lcl.h
@@ -23,10 +23,6 @@
# include "internal/bn_int.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
-
/*
* These preprocessor symbols control various aspects of the bignum headers
* and library code. They're not defined by any "normal" configuration, as
@@ -156,11 +152,6 @@ extern "C" {
*/
# define BN_FLG_FIXED_TOP 0x10000
# ifdef BN_DEBUG_RAND
-/* To avoid "make update" cvs wars due to BN_DEBUG, use some tricks */
-# ifndef RAND_bytes
-int RAND_bytes(unsigned char *buf, int num);
-# define BN_DEBUG_TRIX
-# endif
# define bn_pollute(a) \
do { \
const BIGNUM *_bnum1 = (a); \
@@ -176,9 +167,6 @@ int RAND_bytes(unsigned char *buf, int num);
sizeof(*_not_const) * (_bnum1->dmax - _bnum1->top)); \
} \
} while(0)
-# ifdef BN_DEBUG_TRIX
-# undef RAND_bytes
-# endif
# else
# define bn_pollute(a)
# endif
@@ -187,9 +175,9 @@ int RAND_bytes(unsigned char *buf, int num);
const BIGNUM *_bnum2 = (a); \
if (_bnum2 != NULL) { \
int _top = _bnum2->top; \
- OPENSSL_assert((_top == 0 && !_bnum2->neg) || \
- (_top && ((_bnum2->flags & BN_FLG_FIXED_TOP) \
- || _bnum2->d[_top - 1] != 0))); \
+ (void)ossl_assert((_top == 0 && !_bnum2->neg) || \
+ (_top && ((_bnum2->flags & BN_FLG_FIXED_TOP) \
+ || _bnum2->d[_top - 1] != 0))); \
bn_pollute(_bnum2); \
} \
} while(0)
@@ -200,8 +188,8 @@ int RAND_bytes(unsigned char *buf, int num);
# define bn_wcheck_size(bn, words) \
do { \
const BIGNUM *_bnum2 = (bn); \
- OPENSSL_assert((words) <= (_bnum2)->dmax && \
- (words) >= (_bnum2)->top); \
+ assert((words) <= (_bnum2)->dmax && \
+ (words) >= (_bnum2)->top); \
/* avoid unused variable warning with NDEBUG */ \
(void)(_bnum2); \
} while(0)
@@ -370,59 +358,58 @@ struct bn_gencb_st {
# if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) && !defined(PEDANTIC)
/*
* BN_UMULT_HIGH section.
- *
- * No, I'm not trying to overwhelm you when stating that the
- * product of N-bit numbers is 2*N bits wide:-) No, I don't expect
- * you to be impressed when I say that if the compiler doesn't
- * support 2*N integer type, then you have to replace every N*N
- * multiplication with 4 (N/2)*(N/2) accompanied by some shifts
- * and additions which unavoidably results in severe performance
- * penalties. Of course provided that the hardware is capable of
- * producing 2*N result... That's when you normally start
- * considering assembler implementation. However! It should be
- * pointed out that some CPUs (most notably Alpha, PowerPC and
- * upcoming IA-64 family:-) provide *separate* instruction
- * calculating the upper half of the product placing the result
- * into a general purpose register. Now *if* the compiler supports
- * inline assembler, then it's not impossible to implement the
- * "bignum" routines (and have the compiler optimize 'em)
- * exhibiting "native" performance in C. That's what BN_UMULT_HIGH
- * macro is about:-)
- *
- * <appro@fy.chalmers.se>
+ * If the compiler doesn't support 2*N integer type, then you have to
+ * replace every N*N multiplication with 4 (N/2)*(N/2) accompanied by some
+ * shifts and additions which unavoidably results in severe performance
+ * penalties. Of course provided that the hardware is capable of producing
+ * 2*N result... That's when you normally start considering assembler
+ * implementation. However! It should be pointed out that some CPUs (e.g.,
+ * PowerPC, Alpha, and IA-64) provide *separate* instruction calculating
+ * the upper half of the product placing the result into a general
+ * purpose register. Now *if* the compiler supports inline assembler,
+ * then it's not impossible to implement the "bignum" routines (and have
+ * the compiler optimize 'em) exhibiting "native" performance in C. That's
+ * what BN_UMULT_HIGH macro is about:-) Note that more recent compilers do
+ * support 2*64 integer type, which is also used here.
*/
-# if defined(__alpha) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
+# if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16 && \
+ (defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG))
+# define BN_UMULT_HIGH(a,b) (((__uint128_t)(a)*(b))>>64)
+# define BN_UMULT_LOHI(low,high,a,b) ({ \
+ __uint128_t ret=(__uint128_t)(a)*(b); \
+ (high)=ret>>64; (low)=ret; })
+# elif defined(__alpha) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
# if defined(__DECC)
# include <c_asm.h>
# define BN_UMULT_HIGH(a,b) (BN_ULONG)asm("umulh %a0,%a1,%v0",(a),(b))
# elif defined(__GNUC__) && __GNUC__>=2
-# define BN_UMULT_HIGH(a,b) ({ \
+# define BN_UMULT_HIGH(a,b) ({ \
register BN_ULONG ret; \
asm ("umulh %1,%2,%0" \
: "=r"(ret) \
: "r"(a), "r"(b)); \
- ret; })
+ ret; })
# endif /* compiler */
-# elif defined(_ARCH_PPC) && defined(__64BIT__) && defined(SIXTY_FOUR_BIT_LONG)
+# elif defined(_ARCH_PPC64) && defined(SIXTY_FOUR_BIT_LONG)
# if defined(__GNUC__) && __GNUC__>=2
-# define BN_UMULT_HIGH(a,b) ({ \
+# define BN_UMULT_HIGH(a,b) ({ \
register BN_ULONG ret; \
asm ("mulhdu %0,%1,%2" \
: "=r"(ret) \
: "r"(a), "r"(b)); \
- ret; })
+ ret; })
# endif /* compiler */
# elif (defined(__x86_64) || defined(__x86_64__)) && \
(defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
# if defined(__GNUC__) && __GNUC__>=2
-# define BN_UMULT_HIGH(a,b) ({ \
+# define BN_UMULT_HIGH(a,b) ({ \
register BN_ULONG ret,discard; \
asm ("mulq %3" \
: "=a"(discard),"=d"(ret) \
: "a"(a), "g"(b) \
: "cc"); \
- ret; })
-# define BN_UMULT_LOHI(low,high,a,b) \
+ ret; })
+# define BN_UMULT_LOHI(low,high,a,b) \
asm ("mulq %3" \
: "=a"(low),"=d"(high) \
: "a"(a),"g"(b) \
@@ -439,43 +426,29 @@ unsigned __int64 _umul128(unsigned __int64 a, unsigned __int64 b,
# endif
# elif defined(__mips) && (defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG))
# if defined(__GNUC__) && __GNUC__>=2
-# if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
- /* "h" constraint is not an option on R6 and was removed in 4.4 */
-# define BN_UMULT_HIGH(a,b) (((__uint128_t)(a)*(b))>>64)
-# define BN_UMULT_LOHI(low,high,a,b) ({ \
- __uint128_t ret=(__uint128_t)(a)*(b); \
- (high)=ret>>64; (low)=ret; })
-# else
-# define BN_UMULT_HIGH(a,b) ({ \
+# define BN_UMULT_HIGH(a,b) ({ \
register BN_ULONG ret; \
asm ("dmultu %1,%2" \
: "=h"(ret) \
: "r"(a), "r"(b) : "l"); \
ret; })
-# define BN_UMULT_LOHI(low,high,a,b)\
+# define BN_UMULT_LOHI(low,high,a,b) \
asm ("dmultu %2,%3" \
: "=l"(low),"=h"(high) \
: "r"(a), "r"(b));
-# endif
# endif
# elif defined(__aarch64__) && defined(SIXTY_FOUR_BIT_LONG)
# if defined(__GNUC__) && __GNUC__>=2
-# define BN_UMULT_HIGH(a,b) ({ \
+# define BN_UMULT_HIGH(a,b) ({ \
register BN_ULONG ret; \
asm ("umulh %0,%1,%2" \
: "=r"(ret) \
: "r"(a), "r"(b)); \
- ret; })
+ ret; })
# endif
# endif /* cpu */
# endif /* OPENSSL_NO_ASM */
-/*************************************************************
- * Using the long long type
- */
-# define Lw(t) (((BN_ULONG)(t))&BN_MASK2)
-# define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2)
-
# ifdef BN_DEBUG_RAND
# define bn_clear_top2max(a) \
{ \
@@ -489,6 +462,12 @@ unsigned __int64 _umul128(unsigned __int64 a, unsigned __int64 b,
# endif
# ifdef BN_LLONG
+/*******************************************************************
+ * Using the long long type, has to be twice as wide as BN_ULONG...
+ */
+# define Lw(t) (((BN_ULONG)(t))&BN_MASK2)
+# define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2)
+
# define mul_add(r,a,w,c) { \
BN_ULLONG t; \
t=(BN_ULLONG)w * (a) + (r) + (c); \
@@ -666,10 +645,6 @@ void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t);
void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n);
void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
BN_ULONG *t);
-void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
- BN_ULONG *t);
-BN_ULONG bn_add_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
- int cl, int dl);
BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
int cl, int dl);
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
@@ -681,8 +656,6 @@ BIGNUM *int_bn_mod_inverse(BIGNUM *in,
int bn_probable_prime_dh(BIGNUM *rnd, int bits,
const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx);
-int bn_probable_prime_dh_retry(BIGNUM *rnd, int bits, BN_CTX *ctx);
-int bn_probable_prime_dh_coprime(BIGNUM *rnd, int bits, BN_CTX *ctx);
static ossl_inline BIGNUM *bn_expand(BIGNUM *a, int bits)
{
@@ -695,8 +668,4 @@ static ossl_inline BIGNUM *bn_expand(BIGNUM *a, int bits)
return bn_expand2((a),(bits+BN_BITS2-1)/BN_BITS2);
}
-#ifdef __cplusplus
-}
-#endif
-
#endif
diff --git a/deps/openssl/openssl/crypto/bn/bn_lib.c b/deps/openssl/openssl/crypto/bn/bn_lib.c
index 3f3c7bbb2f..80f910c807 100644
--- a/deps/openssl/openssl/crypto/bn/bn_lib.c
+++ b/deps/openssl/openssl/crypto/bn/bn_lib.c
@@ -66,15 +66,15 @@ void BN_set_params(int mult, int high, int low, int mont)
int BN_get_params(int which)
{
if (which == 0)
- return (bn_limit_bits);
+ return bn_limit_bits;
else if (which == 1)
- return (bn_limit_bits_high);
+ return bn_limit_bits_high;
else if (which == 2)
- return (bn_limit_bits_low);
+ return bn_limit_bits_low;
else if (which == 3)
- return (bn_limit_bits_mont);
+ return bn_limit_bits_mont;
else
- return (0);
+ return 0;
}
#endif
@@ -84,7 +84,7 @@ const BIGNUM *BN_value_one(void)
static const BIGNUM const_one =
{ (BN_ULONG *)&data_one, 1, 1, 0, BN_FLG_STATIC_DATA };
- return (&const_one);
+ return &const_one;
}
int BN_num_bits_word(BN_ULONG l)
@@ -153,37 +153,26 @@ static void bn_free_d(BIGNUM *a)
void BN_clear_free(BIGNUM *a)
{
- int i;
-
if (a == NULL)
return;
- bn_check_top(a);
- if (a->d != NULL) {
+ if (a->d != NULL && !BN_get_flags(a, BN_FLG_STATIC_DATA)) {
OPENSSL_cleanse(a->d, a->dmax * sizeof(a->d[0]));
- if (!BN_get_flags(a, BN_FLG_STATIC_DATA))
- bn_free_d(a);
+ bn_free_d(a);
}
- i = BN_get_flags(a, BN_FLG_MALLOCED);
- OPENSSL_cleanse(a, sizeof(*a));
- if (i)
+ if (BN_get_flags(a, BN_FLG_MALLOCED)) {
+ OPENSSL_cleanse(a, sizeof(*a));
OPENSSL_free(a);
+ }
}
void BN_free(BIGNUM *a)
{
if (a == NULL)
return;
- bn_check_top(a);
if (!BN_get_flags(a, BN_FLG_STATIC_DATA))
bn_free_d(a);
if (a->flags & BN_FLG_MALLOCED)
OPENSSL_free(a);
- else {
-#if OPENSSL_API_COMPAT < 0x00908000L
- a->flags |= BN_FLG_FREE;
-#endif
- a->d = NULL;
- }
}
void bn_init(BIGNUM *a)
@@ -200,11 +189,11 @@ BIGNUM *BN_new(void)
if ((ret = OPENSSL_zalloc(sizeof(*ret))) == NULL) {
BNerr(BN_F_BN_NEW, ERR_R_MALLOC_FAILURE);
- return (NULL);
+ return NULL;
}
ret->flags = BN_FLG_MALLOCED;
bn_check_top(ret);
- return (ret);
+ return ret;
}
BIGNUM *BN_secure_new(void)
@@ -212,16 +201,14 @@ BIGNUM *BN_new(void)
BIGNUM *ret = BN_new();
if (ret != NULL)
ret->flags |= BN_FLG_SECURE;
- return (ret);
+ return ret;
}
/* This is used by bn_expand2() */
/* The caller MUST check that words > b->dmax before calling this */
static BN_ULONG *bn_expand_internal(const BIGNUM *b, int words)
{
- BN_ULONG *A, *a = NULL;
- const BN_ULONG *B;
- int i;
+ BN_ULONG *a = NULL;
if (words > (INT_MAX / (4 * BN_BITS2))) {
BNerr(BN_F_BN_EXPAND_INTERNAL, BN_R_BIGNUM_TOO_LONG);
@@ -229,62 +216,22 @@ static BN_ULONG *bn_expand_internal(const BIGNUM *b, int words)
}
if (BN_get_flags(b, BN_FLG_STATIC_DATA)) {
BNerr(BN_F_BN_EXPAND_INTERNAL, BN_R_EXPAND_ON_STATIC_BIGNUM_DATA);
- return (NULL);
+ return NULL;
}
if (BN_get_flags(b, BN_FLG_SECURE))
- a = A = OPENSSL_secure_zalloc(words * sizeof(*a));
+ a = OPENSSL_secure_zalloc(words * sizeof(*a));
else
- a = A = OPENSSL_zalloc(words * sizeof(*a));
- if (A == NULL) {
+ a = OPENSSL_zalloc(words * sizeof(*a));
+ if (a == NULL) {
BNerr(BN_F_BN_EXPAND_INTERNAL, ERR_R_MALLOC_FAILURE);
- return (NULL);
+ return NULL;
}
-#if 1
- B = b->d;
- /* Check if the previous number needs to be copied */
- if (B != NULL) {
- for (i = b->top >> 2; i > 0; i--, A += 4, B += 4) {
- /*
- * The fact that the loop is unrolled
- * 4-wise is a tribute to Intel. It's
- * the one that doesn't have enough
- * registers to accommodate more data.
- * I'd unroll it 8-wise otherwise:-)
- *
- * <appro@fy.chalmers.se>
- */
- BN_ULONG a0, a1, a2, a3;
- a0 = B[0];
- a1 = B[1];
- a2 = B[2];
- a3 = B[3];
- A[0] = a0;
- A[1] = a1;
- A[2] = a2;
- A[3] = a3;
- }
- switch (b->top & 3) {
- case 3:
- A[2] = B[2];
- /* fall thru */
- case 2:
- A[1] = B[1];
- /* fall thru */
- case 1:
- A[0] = B[0];
- /* fall thru */
- case 0:
- /* Without the "case 0" some old optimizers got this wrong. */
- ;
- }
- }
-#else
- memset(A, 0, sizeof(*A) * words);
- memcpy(A, b->d, sizeof(b->d[0]) * b->top);
-#endif
+ assert(b->top <= words);
+ if (b->top > 0)
+ memcpy(a, b->d, sizeof(*a) * b->top);
- return (a);
+ return a;
}
/*
@@ -333,53 +280,21 @@ BIGNUM *BN_dup(const BIGNUM *a)
BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b)
{
- int i;
- BN_ULONG *A;
- const BN_ULONG *B;
-
bn_check_top(b);
if (a == b)
- return (a);
+ return a;
if (bn_wexpand(a, b->top) == NULL)
- return (NULL);
-
-#if 1
- A = a->d;
- B = b->d;
- for (i = b->top >> 2; i > 0; i--, A += 4, B += 4) {
- BN_ULONG a0, a1, a2, a3;
- a0 = B[0];
- a1 = B[1];
- a2 = B[2];
- a3 = B[3];
- A[0] = a0;
- A[1] = a1;
- A[2] = a2;
- A[3] = a3;
- }
- /* ultrix cc workaround, see comments in bn_expand_internal */
- switch (b->top & 3) {
- case 3:
- A[2] = B[2];
- /* fall thru */
- case 2:
- A[1] = B[1];
- /* fall thru */
- case 1:
- A[0] = B[0];
- /* fall thru */
- case 0:;
- }
-#else
- memcpy(a->d, b->d, sizeof(b->d[0]) * b->top);
-#endif
+ return NULL;
+
+ if (b->top > 0)
+ memcpy(a->d, b->d, sizeof(b->d[0]) * b->top);
a->neg = b->neg;
a->top = b->top;
a->flags |= b->flags & BN_FLG_FIXED_TOP;
bn_check_top(a);
- return (a);
+ return a;
}
#define FLAGS_DATA(flags) ((flags) & (BN_FLG_STATIC_DATA \
@@ -445,13 +360,13 @@ int BN_set_word(BIGNUM *a, BN_ULONG w)
{
bn_check_top(a);
if (bn_expand(a, (int)sizeof(BN_ULONG) * 8) == NULL)
- return (0);
+ return 0;
a->neg = 0;
a->d[0] = w;
a->top = (w ? 1 : 0);
a->flags &= ~BN_FLG_FIXED_TOP;
bn_check_top(a);
- return (1);
+ return 1;
}
BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret)
@@ -464,7 +379,7 @@ BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret)
if (ret == NULL)
ret = bn = BN_new();
if (ret == NULL)
- return (NULL);
+ return NULL;
bn_check_top(ret);
/* Skip leading zero's. */
for ( ; len > 0 && *s == 0; s++, len--)
@@ -472,7 +387,7 @@ BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret)
n = len;
if (n == 0) {
ret->top = 0;
- return (ret);
+ return ret;
}
i = ((n - 1) / BN_BYTES) + 1;
m = ((n - 1) % (BN_BYTES));
@@ -496,7 +411,7 @@ BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret)
* bit set (-ve number)
*/
bn_correct_top(ret);
- return (ret);
+ return ret;
}
/* ignore negative */
@@ -564,7 +479,7 @@ BIGNUM *BN_lebin2bn(const unsigned char *s, int len, BIGNUM *ret)
if (ret == NULL)
ret = bn = BN_new();
if (ret == NULL)
- return (NULL);
+ return NULL;
bn_check_top(ret);
s += len;
/* Skip trailing zeroes. */
@@ -631,7 +546,7 @@ int BN_ucmp(const BIGNUM *a, const BIGNUM *b)
i = a->top - b->top;
if (i != 0)
- return (i);
+ return i;
ap = a->d;
bp = b->d;
for (i = a->top - 1; i >= 0; i--) {
@@ -640,7 +555,7 @@ int BN_ucmp(const BIGNUM *a, const BIGNUM *b)
if (t1 != t2)
return ((t1 > t2) ? 1 : -1);
}
- return (0);
+ return 0;
}
int BN_cmp(const BIGNUM *a, const BIGNUM *b)
@@ -651,11 +566,11 @@ int BN_cmp(const BIGNUM *a, const BIGNUM *b)
if ((a == NULL) || (b == NULL)) {
if (a != NULL)
- return (-1);
+ return -1;
else if (b != NULL)
- return (1);
+ return 1;
else
- return (0);
+ return 0;
}
bn_check_top(a);
@@ -663,9 +578,9 @@ int BN_cmp(const BIGNUM *a, const BIGNUM *b)
if (a->neg != b->neg) {
if (a->neg)
- return (-1);
+ return -1;
else
- return (1);
+ return 1;
}
if (a->neg == 0) {
gt = 1;
@@ -676,18 +591,18 @@ int BN_cmp(const BIGNUM *a, const BIGNUM *b)
}
if (a->top > b->top)
- return (gt);
+ return gt;
if (a->top < b->top)
- return (lt);
+ return lt;
for (i = a->top - 1; i >= 0; i--) {
t1 = a->d[i];
t2 = b->d[i];
if (t1 > t2)
- return (gt);
+ return gt;
if (t1 < t2)
- return (lt);
+ return lt;
}
- return (0);
+ return 0;
}
int BN_set_bit(BIGNUM *a, int n)
@@ -701,7 +616,7 @@ int BN_set_bit(BIGNUM *a, int n)
j = n % BN_BITS2;
if (a->top <= i) {
if (bn_wexpand(a, i + 1) == NULL)
- return (0);
+ return 0;
for (k = a->top; k < i + 1; k++)
a->d[k] = 0;
a->top = i + 1;
@@ -710,7 +625,7 @@ int BN_set_bit(BIGNUM *a, int n)
a->d[i] |= (((BN_ULONG)1) << j);
bn_check_top(a);
- return (1);
+ return 1;
}
int BN_clear_bit(BIGNUM *a, int n)
@@ -724,11 +639,11 @@ int BN_clear_bit(BIGNUM *a, int n)
i = n / BN_BITS2;
j = n % BN_BITS2;
if (a->top <= i)
- return (0);
+ return 0;
a->d[i] &= (~(((BN_ULONG)1) << j));
bn_correct_top(a);
- return (1);
+ return 1;
}
int BN_is_bit_set(const BIGNUM *a, int n)
@@ -764,7 +679,7 @@ int BN_mask_bits(BIGNUM *a, int n)
a->d[w] &= ~(BN_MASK2 << b);
}
bn_correct_top(a);
- return (1);
+ return 1;
}
void BN_set_negative(BIGNUM *a, int b)
@@ -790,7 +705,7 @@ int bn_cmp_words(const BN_ULONG *a, const BN_ULONG *b, int n)
if (aa != bb)
return ((aa > bb) ? 1 : -1);
}
- return (0);
+ return 0;
}
/*
@@ -1000,7 +915,7 @@ BN_GENCB *BN_GENCB_new(void)
if ((ret = OPENSSL_malloc(sizeof(*ret))) == NULL) {
BNerr(BN_F_BN_GENCB_NEW, ERR_R_MALLOC_FAILURE);
- return (NULL);
+ return NULL;
}
return ret;
diff --git a/deps/openssl/openssl/crypto/bn/bn_mod.c b/deps/openssl/openssl/crypto/bn/bn_mod.c
index 2e98035bd8..712fc8ac14 100644
--- a/deps/openssl/openssl/crypto/bn/bn_mod.c
+++ b/deps/openssl/openssl/crypto/bn/bn_mod.c
@@ -216,7 +216,7 @@ int BN_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m,
ret = 1;
err:
BN_CTX_end(ctx);
- return (ret);
+ return ret;
}
int BN_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx)
diff --git a/deps/openssl/openssl/crypto/bn/bn_mont.c b/deps/openssl/openssl/crypto/bn/bn_mont.c
index 41214334b8..393d27c392 100644
--- a/deps/openssl/openssl/crypto/bn/bn_mont.c
+++ b/deps/openssl/openssl/crypto/bn/bn_mont.c
@@ -44,12 +44,12 @@ int bn_mul_mont_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
#if defined(OPENSSL_BN_ASM_MONT) && defined(MONT_WORD)
if (num > 1 && a->top == num && b->top == num) {
if (bn_wexpand(r, num) == NULL)
- return (0);
+ return 0;
if (bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num)) {
r->neg = a->neg ^ b->neg;
r->top = num;
r->flags |= BN_FLG_FIXED_TOP;
- return (1);
+ return 1;
}
}
#endif
@@ -81,7 +81,7 @@ int bn_mul_mont_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
ret = 1;
err:
BN_CTX_end(ctx);
- return (ret);
+ return ret;
}
#ifdef MONT_WORD
@@ -96,17 +96,18 @@ static int bn_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
nl = n->top;
if (nl == 0) {
ret->top = 0;
- return (1);
+ return 1;
}
max = (2 * nl); /* carry is stored separately */
if (bn_wexpand(r, max) == NULL)
- return (0);
+ return 0;
r->neg ^= n->neg;
np = n->d;
rp = r->d;
+ /* clear the top words of T */
for (rtop = r->top, i = 0; i < max; i++) {
v = (BN_ULONG)0 - ((i - rtop) >> (8 * sizeof(rtop) - 1));
rp[i] &= v;
@@ -130,7 +131,7 @@ static int bn_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
}
if (bn_wexpand(ret, nl) == NULL)
- return (0);
+ return 0;
ret->top = nl;
ret->flags |= BN_FLG_FIXED_TOP;
ret->neg = r->neg;
@@ -154,7 +155,7 @@ static int bn_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
ap[i] = 0;
}
- return (1);
+ return 1;
}
#endif /* MONT_WORD */
@@ -188,7 +189,7 @@ int bn_from_mont_fixed_top(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
BN_CTX_start(ctx);
t1 = BN_CTX_get(ctx);
t2 = BN_CTX_get(ctx);
- if (t1 == NULL || t2 == NULL)
+ if (t2 == NULL)
goto err;
if (!BN_copy(t1, a))
@@ -215,7 +216,7 @@ int bn_from_mont_fixed_top(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
err:
BN_CTX_end(ctx);
#endif /* MONT_WORD */
- return (retn);
+ return retn;
}
int bn_to_mont_fixed_top(BIGNUM *r, const BIGNUM *a, BN_MONT_CTX *mont,
@@ -228,20 +229,22 @@ BN_MONT_CTX *BN_MONT_CTX_new(void)
{
BN_MONT_CTX *ret;
- if ((ret = OPENSSL_malloc(sizeof(*ret))) == NULL)
- return (NULL);
+ if ((ret = OPENSSL_malloc(sizeof(*ret))) == NULL) {
+ BNerr(BN_F_BN_MONT_CTX_NEW, ERR_R_MALLOC_FAILURE);
+ return NULL;
+ }
BN_MONT_CTX_init(ret);
ret->flags = BN_FLG_MALLOCED;
- return (ret);
+ return ret;
}
void BN_MONT_CTX_init(BN_MONT_CTX *ctx)
{
ctx->ri = 0;
- bn_init(&(ctx->RR));
- bn_init(&(ctx->N));
- bn_init(&(ctx->Ni));
+ bn_init(&ctx->RR);
+ bn_init(&ctx->N);
+ bn_init(&ctx->Ni);
ctx->n0[0] = ctx->n0[1] = 0;
ctx->flags = 0;
}
@@ -250,10 +253,9 @@ void BN_MONT_CTX_free(BN_MONT_CTX *mont)
{
if (mont == NULL)
return;
-
- BN_clear_free(&(mont->RR));
- BN_clear_free(&(mont->N));
- BN_clear_free(&(mont->Ni));
+ BN_clear_free(&mont->RR);
+ BN_clear_free(&mont->N);
+ BN_clear_free(&mont->Ni);
if (mont->flags & BN_FLG_MALLOCED)
OPENSSL_free(mont);
}
@@ -409,7 +411,7 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from)
{
if (to == from)
- return (to);
+ return to;
if (!BN_copy(&(to->RR), &(from->RR)))
return NULL;
@@ -420,7 +422,7 @@ BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from)
to->ri = from->ri;
to->n0[0] = from->n0[0];
to->n0[1] = from->n0[1];
- return (to);
+ return to;
}
BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, CRYPTO_RWLOCK *lock,
diff --git a/deps/openssl/openssl/crypto/bn/bn_mul.c b/deps/openssl/openssl/crypto/bn/bn_mul.c
index 237d7df106..5eda65cfbb 100644
--- a/deps/openssl/openssl/crypto/bn/bn_mul.c
+++ b/deps/openssl/openssl/crypto/bn/bn_mul.c
@@ -154,170 +154,6 @@ BN_ULONG bn_sub_part_words(BN_ULONG *r,
}
#endif
-BN_ULONG bn_add_part_words(BN_ULONG *r,
- const BN_ULONG *a, const BN_ULONG *b,
- int cl, int dl)
-{
- BN_ULONG c, l, t;
-
- assert(cl >= 0);
- c = bn_add_words(r, a, b, cl);
-
- if (dl == 0)
- return c;
-
- r += cl;
- a += cl;
- b += cl;
-
- if (dl < 0) {
- int save_dl = dl;
- while (c) {
- l = (c + b[0]) & BN_MASK2;
- c = (l < c);
- r[0] = l;
- if (++dl >= 0)
- break;
-
- l = (c + b[1]) & BN_MASK2;
- c = (l < c);
- r[1] = l;
- if (++dl >= 0)
- break;
-
- l = (c + b[2]) & BN_MASK2;
- c = (l < c);
- r[2] = l;
- if (++dl >= 0)
- break;
-
- l = (c + b[3]) & BN_MASK2;
- c = (l < c);
- r[3] = l;
- if (++dl >= 0)
- break;
-
- save_dl = dl;
- b += 4;
- r += 4;
- }
- if (dl < 0) {
- if (save_dl < dl) {
- switch (dl - save_dl) {
- case 1:
- r[1] = b[1];
- if (++dl >= 0)
- break;
- /* fall thru */
- case 2:
- r[2] = b[2];
- if (++dl >= 0)
- break;
- /* fall thru */
- case 3:
- r[3] = b[3];
- if (++dl >= 0)
- break;
- }
- b += 4;
- r += 4;
- }
- }
- if (dl < 0) {
- for (;;) {
- r[0] = b[0];
- if (++dl >= 0)
- break;
- r[1] = b[1];
- if (++dl >= 0)
- break;
- r[2] = b[2];
- if (++dl >= 0)
- break;
- r[3] = b[3];
- if (++dl >= 0)
- break;
-
- b += 4;
- r += 4;
- }
- }
- } else {
- int save_dl = dl;
- while (c) {
- t = (a[0] + c) & BN_MASK2;
- c = (t < c);
- r[0] = t;
- if (--dl <= 0)
- break;
-
- t = (a[1] + c) & BN_MASK2;
- c = (t < c);
- r[1] = t;
- if (--dl <= 0)
- break;
-
- t = (a[2] + c) & BN_MASK2;
- c = (t < c);
- r[2] = t;
- if (--dl <= 0)
- break;
-
- t = (a[3] + c) & BN_MASK2;
- c = (t < c);
- r[3] = t;
- if (--dl <= 0)
- break;
-
- save_dl = dl;
- a += 4;
- r += 4;
- }
- if (dl > 0) {
- if (save_dl > dl) {
- switch (save_dl - dl) {
- case 1:
- r[1] = a[1];
- if (--dl <= 0)
- break;
- /* fall thru */
- case 2:
- r[2] = a[2];
- if (--dl <= 0)
- break;
- /* fall thru */
- case 3:
- r[3] = a[3];
- if (--dl <= 0)
- break;
- }
- a += 4;
- r += 4;
- }
- }
- if (dl > 0) {
- for (;;) {
- r[0] = a[0];
- if (--dl <= 0)
- break;
- r[1] = a[1];
- if (--dl <= 0)
- break;
- r[2] = a[2];
- if (--dl <= 0)
- break;
- r[3] = a[3];
- if (--dl <= 0)
- break;
-
- a += 4;
- r += 4;
- }
- }
- }
- return c;
-}
-
#ifdef BN_RECURSION
/*
* Karatsuba recursive multiplication algorithm (cf. Knuth, The Art of
@@ -505,7 +341,6 @@ void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
break;
case -3:
- /* break; */
case -2:
bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
@@ -514,14 +349,12 @@ void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
case -1:
case 0:
case 1:
- /* break; */
case 2:
bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
neg = 1;
break;
case 3:
- /* break; */
case 4:
bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
@@ -659,176 +492,6 @@ void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
bn_add_words(&(r[n]), &(r[n]), &(t[n]), n);
}
}
-
-/*-
- * a and b must be the same size, which is n2.
- * r needs to be n2 words and t needs to be n2*2
- * l is the low words of the output.
- * t needs to be n2*3
- */
-void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
- BN_ULONG *t)
-{
- int i, n;
- int c1, c2;
- int neg, oneg, zero;
- BN_ULONG ll, lc, *lp, *mp;
-
- n = n2 / 2;
-
- /* Calculate (al-ah)*(bh-bl) */
- neg = zero = 0;
- c1 = bn_cmp_words(&(a[0]), &(a[n]), n);
- c2 = bn_cmp_words(&(b[n]), &(b[0]), n);
- switch (c1 * 3 + c2) {
- case -4:
- bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n);
- bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n);
- break;
- case -3:
- zero = 1;
- break;
- case -2:
- bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n);
- bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n);
- neg = 1;
- break;
- case -1:
- case 0:
- case 1:
- zero = 1;
- break;
- case 2:
- bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n);
- bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n);
- neg = 1;
- break;
- case 3:
- zero = 1;
- break;
- case 4:
- bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n);
- bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n);
- break;
- }
-
- oneg = neg;
- /* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
- /* r[10] = (a[1]*b[1]) */
-# ifdef BN_MUL_COMBA
- if (n == 8) {
- bn_mul_comba8(&(t[0]), &(r[0]), &(r[n]));
- bn_mul_comba8(r, &(a[n]), &(b[n]));
- } else
-# endif
- {
- bn_mul_recursive(&(t[0]), &(r[0]), &(r[n]), n, 0, 0, &(t[n2]));
- bn_mul_recursive(r, &(a[n]), &(b[n]), n, 0, 0, &(t[n2]));
- }
-
- /*-
- * s0 == low(al*bl)
- * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
- * We know s0 and s1 so the only unknown is high(al*bl)
- * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
- * high(al*bl) == s1 - (r[0]+l[0]+t[0])
- */
- if (l != NULL) {
- lp = &(t[n2 + n]);
- bn_add_words(lp, &(r[0]), &(l[0]), n);
- } else {
- lp = &(r[0]);
- }
-
- if (neg)
- neg = (int)(bn_sub_words(&(t[n2]), lp, &(t[0]), n));
- else {
- bn_add_words(&(t[n2]), lp, &(t[0]), n);
- neg = 0;
- }
-
- if (l != NULL) {
- bn_sub_words(&(t[n2 + n]), &(l[n]), &(t[n2]), n);
- } else {
- lp = &(t[n2 + n]);
- mp = &(t[n2]);
- for (i = 0; i < n; i++)
- lp[i] = ((~mp[i]) + 1) & BN_MASK2;
- }
-
- /*-
- * s[0] = low(al*bl)
- * t[3] = high(al*bl)
- * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
- * r[10] = (a[1]*b[1])
- */
- /*-
- * R[10] = al*bl
- * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
- * R[32] = ah*bh
- */
- /*-
- * R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
- * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
- * R[3]=r[1]+(carry/borrow)
- */
- if (l != NULL) {
- lp = &(t[n2]);
- c1 = (int)(bn_add_words(lp, &(t[n2 + n]), &(l[0]), n));
- } else {
- lp = &(t[n2 + n]);
- c1 = 0;
- }
- c1 += (int)(bn_add_words(&(t[n2]), lp, &(r[0]), n));
- if (oneg)
- c1 -= (int)(bn_sub_words(&(t[n2]), &(t[n2]), &(t[0]), n));
- else
- c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), &(t[0]), n));
-
- c2 = (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n2 + n]), n));
- c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(r[n]), n));
- if (oneg)
- c2 -= (int)(bn_sub_words(&(r[0]), &(r[0]), &(t[n]), n));
- else
- c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n]), n));
-
- if (c1 != 0) { /* Add starting at r[0], could be +ve or -ve */
- i = 0;
- if (c1 > 0) {
- lc = c1;
- do {
- ll = (r[i] + lc) & BN_MASK2;
- r[i++] = ll;
- lc = (lc > ll);
- } while (lc);
- } else {
- lc = -c1;
- do {
- ll = r[i];
- r[i++] = (ll - lc) & BN_MASK2;
- lc = (lc > ll);
- } while (lc);
- }
- }
- if (c2 != 0) { /* Add starting at r[1] */
- i = n;
- if (c2 > 0) {
- lc = c2;
- do {
- ll = (r[i] + lc) & BN_MASK2;
- r[i++] = ll;
- lc = (lc > ll);
- } while (lc);
- } else {
- lc = -c2;
- do {
- ll = r[i];
- r[i++] = (ll - lc) & BN_MASK2;
- lc = (lc > ll);
- } while (lc);
- }
- }
-}
#endif /* BN_RECURSION */
int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
@@ -863,7 +526,7 @@ int bn_mul_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
if ((al == 0) || (bl == 0)) {
BN_zero(r);
- return (1);
+ return 1;
}
top = al + bl;
@@ -953,7 +616,7 @@ int bn_mul_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
err:
bn_check_top(r);
BN_CTX_end(ctx);
- return (ret);
+ return ret;
}
void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
diff --git a/deps/openssl/openssl/crypto/bn/bn_nist.c b/deps/openssl/openssl/crypto/bn/bn_nist.c
index 53598f97ef..dcdd321c66 100644
--- a/deps/openssl/openssl/crypto/bn/bn_nist.c
+++ b/deps/openssl/openssl/crypto/bn/bn_nist.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2002-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2002-2018 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -254,7 +254,7 @@ static void nist_cp_bn_0(BN_ULONG *dst, const BN_ULONG *src, int top, int max)
int i;
#ifdef BN_DEBUG
- OPENSSL_assert(top <= max);
+ (void)ossl_assert(top <= max);
#endif
for (i = 0; i < top; i++)
dst[i] = src[i];
diff --git a/deps/openssl/openssl/crypto/bn/bn_prime.c b/deps/openssl/openssl/crypto/bn/bn_prime.c
index 616389cfa6..b91b31b1f3 100644
--- a/deps/openssl/openssl/crypto/bn/bn_prime.c
+++ b/deps/openssl/openssl/crypto/bn/bn_prime.c
@@ -1,7 +1,5 @@
/*
- * WARNING: do not edit!
- * Generated by crypto/bn/bn_prime.pl
- * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -29,53 +27,6 @@ static int probable_prime_dh_safe(BIGNUM *rnd, int bits,
const BIGNUM *add, const BIGNUM *rem,
BN_CTX *ctx);
-static const int prime_offsets[480] = {
- 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
- 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163,
- 167, 169, 173, 179, 181, 191, 193, 197, 199, 211, 221, 223, 227, 229,
- 233, 239, 241, 247, 251, 257, 263, 269, 271, 277, 281, 283, 289, 293,
- 299, 307, 311, 313, 317, 323, 331, 337, 347, 349, 353, 359, 361, 367,
- 373, 377, 379, 383, 389, 391, 397, 401, 403, 409, 419, 421, 431, 433,
- 437, 439, 443, 449, 457, 461, 463, 467, 479, 481, 487, 491, 493, 499,
- 503, 509, 521, 523, 527, 529, 533, 541, 547, 551, 557, 559, 563, 569,
- 571, 577, 587, 589, 593, 599, 601, 607, 611, 613, 617, 619, 629, 631,
- 641, 643, 647, 653, 659, 661, 667, 673, 677, 683, 689, 691, 697, 701,
- 703, 709, 713, 719, 727, 731, 733, 739, 743, 751, 757, 761, 767, 769,
- 773, 779, 787, 793, 797, 799, 809, 811, 817, 821, 823, 827, 829, 839,
- 841, 851, 853, 857, 859, 863, 871, 877, 881, 883, 887, 893, 899, 901,
- 907, 911, 919, 923, 929, 937, 941, 943, 947, 949, 953, 961, 967, 971,
- 977, 983, 989, 991, 997, 1003, 1007, 1009, 1013, 1019, 1021, 1027, 1031,
- 1033, 1037, 1039, 1049, 1051, 1061, 1063, 1069, 1073, 1079, 1081, 1087,
- 1091, 1093, 1097, 1103, 1109, 1117, 1121, 1123, 1129, 1139, 1147, 1151,
- 1153, 1157, 1159, 1163, 1171, 1181, 1187, 1189, 1193, 1201, 1207, 1213,
- 1217, 1219, 1223, 1229, 1231, 1237, 1241, 1247, 1249, 1259, 1261, 1271,
- 1273, 1277, 1279, 1283, 1289, 1291, 1297, 1301, 1303, 1307, 1313, 1319,
- 1321, 1327, 1333, 1339, 1343, 1349, 1357, 1361, 1363, 1367, 1369, 1373,
- 1381, 1387, 1391, 1399, 1403, 1409, 1411, 1417, 1423, 1427, 1429, 1433,
- 1439, 1447, 1451, 1453, 1457, 1459, 1469, 1471, 1481, 1483, 1487, 1489,
- 1493, 1499, 1501, 1511, 1513, 1517, 1523, 1531, 1537, 1541, 1543, 1549,
- 1553, 1559, 1567, 1571, 1577, 1579, 1583, 1591, 1597, 1601, 1607, 1609,
- 1613, 1619, 1621, 1627, 1633, 1637, 1643, 1649, 1651, 1657, 1663, 1667,
- 1669, 1679, 1681, 1691, 1693, 1697, 1699, 1703, 1709, 1711, 1717, 1721,
- 1723, 1733, 1739, 1741, 1747, 1751, 1753, 1759, 1763, 1769, 1777, 1781,
- 1783, 1787, 1789, 1801, 1807, 1811, 1817, 1819, 1823, 1829, 1831, 1843,
- 1847, 1849, 1853, 1861, 1867, 1871, 1873, 1877, 1879, 1889, 1891, 1901,
- 1907, 1909, 1913, 1919, 1921, 1927, 1931, 1933, 1937, 1943, 1949, 1951,
- 1957, 1961, 1963, 1973, 1979, 1987, 1993, 1997, 1999, 2003, 2011, 2017,
- 2021, 2027, 2029, 2033, 2039, 2041, 2047, 2053, 2059, 2063, 2069, 2071,
- 2077, 2081, 2083, 2087, 2089, 2099, 2111, 2113, 2117, 2119, 2129, 2131,
- 2137, 2141, 2143, 2147, 2153, 2159, 2161, 2171, 2173, 2179, 2183, 2197,
- 2201, 2203, 2207, 2209, 2213, 2221, 2227, 2231, 2237, 2239, 2243, 2249,
- 2251, 2257, 2263, 2267, 2269, 2273, 2279, 2281, 2287, 2291, 2293, 2297,
- 2309, 2311
-};
-
-static const int prime_offset_count = 480;
-static const int prime_multiplier = 2310;
-static const int prime_multiplier_bits = 11; /* 2^|prime_multiplier_bits| <=
- * |prime_multiplier| */
-static const int first_prime_index = 5;
-
int BN_GENCB_call(BN_GENCB *cb, int a, int b)
{
/* No callback means continue */
@@ -127,7 +78,7 @@ int BN_generate_prime_ex(BIGNUM *ret, int bits, int safe,
goto err;
BN_CTX_start(ctx);
t = BN_CTX_get(ctx);
- if (!t)
+ if (t == NULL)
goto err;
loop:
/* make a random number and set the top and bottom bits */
@@ -203,26 +154,28 @@ int BN_is_prime_fasttest_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed,
int i, j, ret = -1;
int k;
BN_CTX *ctx = NULL;
- BIGNUM *A1, *A1_odd, *check; /* taken from ctx */
+ BIGNUM *A1, *A1_odd, *A3, *check; /* taken from ctx */
BN_MONT_CTX *mont = NULL;
- if (BN_cmp(a, BN_value_one()) <= 0)
+ /* Take care of the really small primes 2 & 3 */
+ if (BN_is_word(a, 2) || BN_is_word(a, 3))
+ return 1;
+
+ /* Check odd and bigger than 1 */
+ if (!BN_is_odd(a) || BN_cmp(a, BN_value_one()) <= 0)
return 0;
if (checks == BN_prime_checks)
checks = BN_prime_checks_for_size(BN_num_bits(a));
/* first look for small factors */
- if (!BN_is_odd(a))
- /* a is even => a is prime if and only if a == 2 */
- return BN_is_word(a, 2);
if (do_trial_division) {
for (i = 1; i < NUMPRIMES; i++) {
BN_ULONG mod = BN_mod_word(a, primes[i]);
if (mod == (BN_ULONG)-1)
goto err;
if (mod == 0)
- return 0;
+ return BN_is_word(a, primes[i]);
}
if (!BN_GENCB_call(cb, 1, -1))
goto err;
@@ -235,20 +188,18 @@ int BN_is_prime_fasttest_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed,
BN_CTX_start(ctx);
A1 = BN_CTX_get(ctx);
+ A3 = BN_CTX_get(ctx);
A1_odd = BN_CTX_get(ctx);
check = BN_CTX_get(ctx);
if (check == NULL)
goto err;
/* compute A1 := a - 1 */
- if (!BN_copy(A1, a))
+ if (!BN_copy(A1, a) || !BN_sub_word(A1, 1))
goto err;
- if (!BN_sub_word(A1, 1))
+ /* compute A3 := a - 3 */
+ if (!BN_copy(A3, a) || !BN_sub_word(A3, 3))
goto err;
- if (BN_is_zero(A1)) {
- ret = 0;
- goto err;
- }
/* write A1 as A1_odd * 2^k */
k = 1;
@@ -265,11 +216,9 @@ int BN_is_prime_fasttest_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed,
goto err;
for (i = 0; i < checks; i++) {
- if (!BN_pseudo_rand_range(check, A1))
+ /* 1 < check < a-1 */
+ if (!BN_priv_rand_range(check, A3) || !BN_add_word(check, 2))
goto err;
- if (!BN_add_word(check, 1))
- goto err;
- /* now 1 <= check < a */
j = witness(check, a, A1, A1_odd, k, ctx, mont);
if (j == -1)
@@ -290,83 +239,6 @@ int BN_is_prime_fasttest_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed,
}
BN_MONT_CTX_free(mont);
- return (ret);
-}
-
-int bn_probable_prime_dh_retry(BIGNUM *rnd, int bits, BN_CTX *ctx)
-{
- int i;
- int ret = 0;
-
- loop:
- if (!BN_rand(rnd, bits, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ODD))
- goto err;
-
- /* we now have a random number 'rand' to test. */
-
- for (i = 1; i < NUMPRIMES; i++) {
- /* check that rnd is a prime */
- BN_ULONG mod = BN_mod_word(rnd, (BN_ULONG)primes[i]);
- if (mod == (BN_ULONG)-1)
- goto err;
- if (mod <= 1) {
- goto loop;
- }
- }
- ret = 1;
-
- err:
- bn_check_top(rnd);
- return (ret);
-}
-
-int bn_probable_prime_dh_coprime(BIGNUM *rnd, int bits, BN_CTX *ctx)
-{
- int i;
- BIGNUM *offset_index;
- BIGNUM *offset_count;
- int ret = 0;
-
- OPENSSL_assert(bits > prime_multiplier_bits);
-
- BN_CTX_start(ctx);
- if ((offset_index = BN_CTX_get(ctx)) == NULL)
- goto err;
- if ((offset_count = BN_CTX_get(ctx)) == NULL)
- goto err;
-
- if (!BN_add_word(offset_count, prime_offset_count))
- goto err;
-
- loop:
- if (!BN_rand(rnd, bits - prime_multiplier_bits,
- BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ODD))
- goto err;
- if (BN_is_bit_set(rnd, bits))
- goto loop;
- if (!BN_rand_range(offset_index, offset_count))
- goto err;
-
- if (!BN_mul_word(rnd, prime_multiplier)
- || !BN_add_word(rnd, prime_offsets[BN_get_word(offset_index)]))
- goto err;
-
- /* we now have a random number 'rand' to test. */
-
- /* skip coprimes */
- for (i = first_prime_index; i < NUMPRIMES; i++) {
- /* check that rnd is a prime */
- BN_ULONG mod = BN_mod_word(rnd, (BN_ULONG)primes[i]);
- if (mod == (BN_ULONG)-1)
- goto err;
- if (mod <= 1)
- goto loop;
- }
- ret = 1;
-
- err:
- BN_CTX_end(ctx);
- bn_check_top(rnd);
return ret;
}
@@ -405,8 +277,9 @@ static int probable_prime(BIGNUM *rnd, int bits, prime_t *mods)
char is_single_word = bits <= BN_BITS2;
again:
- if (!BN_rand(rnd, bits, BN_RAND_TOP_TWO, BN_RAND_BOTTOM_ODD))
- return (0);
+ /* TODO: Not all primes are private */
+ if (!BN_priv_rand(rnd, bits, BN_RAND_TOP_TWO, BN_RAND_BOTTOM_ODD))
+ return 0;
/* we now have a random number 'rnd' to test. */
for (i = 1; i < NUMPRIMES; i++) {
BN_ULONG mod = BN_mod_word(rnd, (BN_ULONG)primes[i]);
@@ -472,11 +345,11 @@ static int probable_prime(BIGNUM *rnd, int bits, prime_t *mods)
}
}
if (!BN_add_word(rnd, delta))
- return (0);
+ return 0;
if (BN_num_bits(rnd) != bits)
goto again;
bn_check_top(rnd);
- return (1);
+ return 1;
}
int bn_probable_prime_dh(BIGNUM *rnd, int bits,
@@ -525,7 +398,7 @@ int bn_probable_prime_dh(BIGNUM *rnd, int bits,
err:
BN_CTX_end(ctx);
bn_check_top(rnd);
- return (ret);
+ return ret;
}
static int probable_prime_dh_safe(BIGNUM *p, int bits, const BIGNUM *padd,
@@ -592,5 +465,5 @@ static int probable_prime_dh_safe(BIGNUM *p, int bits, const BIGNUM *padd,
err:
BN_CTX_end(ctx);
bn_check_top(p);
- return (ret);
+ return ret;
}
diff --git a/deps/openssl/openssl/crypto/bn/bn_prime.h b/deps/openssl/openssl/crypto/bn/bn_prime.h
index 41440fa4e1..a64c9630f3 100644
--- a/deps/openssl/openssl/crypto/bn/bn_prime.h
+++ b/deps/openssl/openssl/crypto/bn/bn_prime.h
@@ -2,7 +2,7 @@
* WARNING: do not edit!
* Generated by crypto/bn/bn_prime.pl
*
- * Copyright 1998-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1998-2018 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -14,261 +14,260 @@ typedef unsigned short prime_t;
# define NUMPRIMES 2048
static const prime_t primes[2048] = {
-
- 2, 3, 5, 7, 11, 13, 17, 19,
- 23, 29, 31, 37, 41, 43, 47, 53,
- 59, 61, 67, 71, 73, 79, 83, 89,
- 97, 101, 103, 107, 109, 113, 127, 131,
- 137, 139, 149, 151, 157, 163, 167, 173,
- 179, 181, 191, 193, 197, 199, 211, 223,
- 227, 229, 233, 239, 241, 251, 257, 263,
- 269, 271, 277, 281, 283, 293, 307, 311,
- 313, 317, 331, 337, 347, 349, 353, 359,
- 367, 373, 379, 383, 389, 397, 401, 409,
- 419, 421, 431, 433, 439, 443, 449, 457,
- 461, 463, 467, 479, 487, 491, 499, 503,
- 509, 521, 523, 541, 547, 557, 563, 569,
- 571, 577, 587, 593, 599, 601, 607, 613,
- 617, 619, 631, 641, 643, 647, 653, 659,
- 661, 673, 677, 683, 691, 701, 709, 719,
- 727, 733, 739, 743, 751, 757, 761, 769,
- 773, 787, 797, 809, 811, 821, 823, 827,
- 829, 839, 853, 857, 859, 863, 877, 881,
- 883, 887, 907, 911, 919, 929, 937, 941,
- 947, 953, 967, 971, 977, 983, 991, 997,
- 1009, 1013, 1019, 1021, 1031, 1033, 1039, 1049,
- 1051, 1061, 1063, 1069, 1087, 1091, 1093, 1097,
- 1103, 1109, 1117, 1123, 1129, 1151, 1153, 1163,
- 1171, 1181, 1187, 1193, 1201, 1213, 1217, 1223,
- 1229, 1231, 1237, 1249, 1259, 1277, 1279, 1283,
- 1289, 1291, 1297, 1301, 1303, 1307, 1319, 1321,
- 1327, 1361, 1367, 1373, 1381, 1399, 1409, 1423,
- 1427, 1429, 1433, 1439, 1447, 1451, 1453, 1459,
- 1471, 1481, 1483, 1487, 1489, 1493, 1499, 1511,
- 1523, 1531, 1543, 1549, 1553, 1559, 1567, 1571,
- 1579, 1583, 1597, 1601, 1607, 1609, 1613, 1619,
- 1621, 1627, 1637, 1657, 1663, 1667, 1669, 1693,
- 1697, 1699, 1709, 1721, 1723, 1733, 1741, 1747,
- 1753, 1759, 1777, 1783, 1787, 1789, 1801, 1811,
- 1823, 1831, 1847, 1861, 1867, 1871, 1873, 1877,
- 1879, 1889, 1901, 1907, 1913, 1931, 1933, 1949,
- 1951, 1973, 1979, 1987, 1993, 1997, 1999, 2003,
- 2011, 2017, 2027, 2029, 2039, 2053, 2063, 2069,
- 2081, 2083, 2087, 2089, 2099, 2111, 2113, 2129,
- 2131, 2137, 2141, 2143, 2153, 2161, 2179, 2203,
- 2207, 2213, 2221, 2237, 2239, 2243, 2251, 2267,
- 2269, 2273, 2281, 2287, 2293, 2297, 2309, 2311,
- 2333, 2339, 2341, 2347, 2351, 2357, 2371, 2377,
- 2381, 2383, 2389, 2393, 2399, 2411, 2417, 2423,
- 2437, 2441, 2447, 2459, 2467, 2473, 2477, 2503,
- 2521, 2531, 2539, 2543, 2549, 2551, 2557, 2579,
- 2591, 2593, 2609, 2617, 2621, 2633, 2647, 2657,
- 2659, 2663, 2671, 2677, 2683, 2687, 2689, 2693,
- 2699, 2707, 2711, 2713, 2719, 2729, 2731, 2741,
- 2749, 2753, 2767, 2777, 2789, 2791, 2797, 2801,
- 2803, 2819, 2833, 2837, 2843, 2851, 2857, 2861,
- 2879, 2887, 2897, 2903, 2909, 2917, 2927, 2939,
- 2953, 2957, 2963, 2969, 2971, 2999, 3001, 3011,
- 3019, 3023, 3037, 3041, 3049, 3061, 3067, 3079,
- 3083, 3089, 3109, 3119, 3121, 3137, 3163, 3167,
- 3169, 3181, 3187, 3191, 3203, 3209, 3217, 3221,
- 3229, 3251, 3253, 3257, 3259, 3271, 3299, 3301,
- 3307, 3313, 3319, 3323, 3329, 3331, 3343, 3347,
- 3359, 3361, 3371, 3373, 3389, 3391, 3407, 3413,
- 3433, 3449, 3457, 3461, 3463, 3467, 3469, 3491,
- 3499, 3511, 3517, 3527, 3529, 3533, 3539, 3541,
- 3547, 3557, 3559, 3571, 3581, 3583, 3593, 3607,
- 3613, 3617, 3623, 3631, 3637, 3643, 3659, 3671,
- 3673, 3677, 3691, 3697, 3701, 3709, 3719, 3727,
- 3733, 3739, 3761, 3767, 3769, 3779, 3793, 3797,
- 3803, 3821, 3823, 3833, 3847, 3851, 3853, 3863,
- 3877, 3881, 3889, 3907, 3911, 3917, 3919, 3923,
- 3929, 3931, 3943, 3947, 3967, 3989, 4001, 4003,
- 4007, 4013, 4019, 4021, 4027, 4049, 4051, 4057,
- 4073, 4079, 4091, 4093, 4099, 4111, 4127, 4129,
- 4133, 4139, 4153, 4157, 4159, 4177, 4201, 4211,
- 4217, 4219, 4229, 4231, 4241, 4243, 4253, 4259,
- 4261, 4271, 4273, 4283, 4289, 4297, 4327, 4337,
- 4339, 4349, 4357, 4363, 4373, 4391, 4397, 4409,
- 4421, 4423, 4441, 4447, 4451, 4457, 4463, 4481,
- 4483, 4493, 4507, 4513, 4517, 4519, 4523, 4547,
- 4549, 4561, 4567, 4583, 4591, 4597, 4603, 4621,
- 4637, 4639, 4643, 4649, 4651, 4657, 4663, 4673,
- 4679, 4691, 4703, 4721, 4723, 4729, 4733, 4751,
- 4759, 4783, 4787, 4789, 4793, 4799, 4801, 4813,
- 4817, 4831, 4861, 4871, 4877, 4889, 4903, 4909,
- 4919, 4931, 4933, 4937, 4943, 4951, 4957, 4967,
- 4969, 4973, 4987, 4993, 4999, 5003, 5009, 5011,
- 5021, 5023, 5039, 5051, 5059, 5077, 5081, 5087,
- 5099, 5101, 5107, 5113, 5119, 5147, 5153, 5167,
- 5171, 5179, 5189, 5197, 5209, 5227, 5231, 5233,
- 5237, 5261, 5273, 5279, 5281, 5297, 5303, 5309,
- 5323, 5333, 5347, 5351, 5381, 5387, 5393, 5399,
- 5407, 5413, 5417, 5419, 5431, 5437, 5441, 5443,
- 5449, 5471, 5477, 5479, 5483, 5501, 5503, 5507,
- 5519, 5521, 5527, 5531, 5557, 5563, 5569, 5573,
- 5581, 5591, 5623, 5639, 5641, 5647, 5651, 5653,
- 5657, 5659, 5669, 5683, 5689, 5693, 5701, 5711,
- 5717, 5737, 5741, 5743, 5749, 5779, 5783, 5791,
- 5801, 5807, 5813, 5821, 5827, 5839, 5843, 5849,
- 5851, 5857, 5861, 5867, 5869, 5879, 5881, 5897,
- 5903, 5923, 5927, 5939, 5953, 5981, 5987, 6007,
- 6011, 6029, 6037, 6043, 6047, 6053, 6067, 6073,
- 6079, 6089, 6091, 6101, 6113, 6121, 6131, 6133,
- 6143, 6151, 6163, 6173, 6197, 6199, 6203, 6211,
- 6217, 6221, 6229, 6247, 6257, 6263, 6269, 6271,
- 6277, 6287, 6299, 6301, 6311, 6317, 6323, 6329,
- 6337, 6343, 6353, 6359, 6361, 6367, 6373, 6379,
- 6389, 6397, 6421, 6427, 6449, 6451, 6469, 6473,
- 6481, 6491, 6521, 6529, 6547, 6551, 6553, 6563,
- 6569, 6571, 6577, 6581, 6599, 6607, 6619, 6637,
- 6653, 6659, 6661, 6673, 6679, 6689, 6691, 6701,
- 6703, 6709, 6719, 6733, 6737, 6761, 6763, 6779,
- 6781, 6791, 6793, 6803, 6823, 6827, 6829, 6833,
- 6841, 6857, 6863, 6869, 6871, 6883, 6899, 6907,
- 6911, 6917, 6947, 6949, 6959, 6961, 6967, 6971,
- 6977, 6983, 6991, 6997, 7001, 7013, 7019, 7027,
- 7039, 7043, 7057, 7069, 7079, 7103, 7109, 7121,
- 7127, 7129, 7151, 7159, 7177, 7187, 7193, 7207,
- 7211, 7213, 7219, 7229, 7237, 7243, 7247, 7253,
- 7283, 7297, 7307, 7309, 7321, 7331, 7333, 7349,
- 7351, 7369, 7393, 7411, 7417, 7433, 7451, 7457,
- 7459, 7477, 7481, 7487, 7489, 7499, 7507, 7517,
- 7523, 7529, 7537, 7541, 7547, 7549, 7559, 7561,
- 7573, 7577, 7583, 7589, 7591, 7603, 7607, 7621,
- 7639, 7643, 7649, 7669, 7673, 7681, 7687, 7691,
- 7699, 7703, 7717, 7723, 7727, 7741, 7753, 7757,
- 7759, 7789, 7793, 7817, 7823, 7829, 7841, 7853,
- 7867, 7873, 7877, 7879, 7883, 7901, 7907, 7919,
- 7927, 7933, 7937, 7949, 7951, 7963, 7993, 8009,
- 8011, 8017, 8039, 8053, 8059, 8069, 8081, 8087,
- 8089, 8093, 8101, 8111, 8117, 8123, 8147, 8161,
- 8167, 8171, 8179, 8191, 8209, 8219, 8221, 8231,
- 8233, 8237, 8243, 8263, 8269, 8273, 8287, 8291,
- 8293, 8297, 8311, 8317, 8329, 8353, 8363, 8369,
- 8377, 8387, 8389, 8419, 8423, 8429, 8431, 8443,
- 8447, 8461, 8467, 8501, 8513, 8521, 8527, 8537,
- 8539, 8543, 8563, 8573, 8581, 8597, 8599, 8609,
- 8623, 8627, 8629, 8641, 8647, 8663, 8669, 8677,
- 8681, 8689, 8693, 8699, 8707, 8713, 8719, 8731,
- 8737, 8741, 8747, 8753, 8761, 8779, 8783, 8803,
- 8807, 8819, 8821, 8831, 8837, 8839, 8849, 8861,
- 8863, 8867, 8887, 8893, 8923, 8929, 8933, 8941,
- 8951, 8963, 8969, 8971, 8999, 9001, 9007, 9011,
- 9013, 9029, 9041, 9043, 9049, 9059, 9067, 9091,
- 9103, 9109, 9127, 9133, 9137, 9151, 9157, 9161,
- 9173, 9181, 9187, 9199, 9203, 9209, 9221, 9227,
- 9239, 9241, 9257, 9277, 9281, 9283, 9293, 9311,
- 9319, 9323, 9337, 9341, 9343, 9349, 9371, 9377,
- 9391, 9397, 9403, 9413, 9419, 9421, 9431, 9433,
- 9437, 9439, 9461, 9463, 9467, 9473, 9479, 9491,
- 9497, 9511, 9521, 9533, 9539, 9547, 9551, 9587,
- 9601, 9613, 9619, 9623, 9629, 9631, 9643, 9649,
- 9661, 9677, 9679, 9689, 9697, 9719, 9721, 9733,
- 9739, 9743, 9749, 9767, 9769, 9781, 9787, 9791,
- 9803, 9811, 9817, 9829, 9833, 9839, 9851, 9857,
- 9859, 9871, 9883, 9887, 9901, 9907, 9923, 9929,
- 9931, 9941, 9949, 9967, 9973, 10007, 10009, 10037,
- 10039, 10061, 10067, 10069, 10079, 10091, 10093, 10099,
- 10103, 10111, 10133, 10139, 10141, 10151, 10159, 10163,
- 10169, 10177, 10181, 10193, 10211, 10223, 10243, 10247,
- 10253, 10259, 10267, 10271, 10273, 10289, 10301, 10303,
- 10313, 10321, 10331, 10333, 10337, 10343, 10357, 10369,
- 10391, 10399, 10427, 10429, 10433, 10453, 10457, 10459,
- 10463, 10477, 10487, 10499, 10501, 10513, 10529, 10531,
- 10559, 10567, 10589, 10597, 10601, 10607, 10613, 10627,
- 10631, 10639, 10651, 10657, 10663, 10667, 10687, 10691,
- 10709, 10711, 10723, 10729, 10733, 10739, 10753, 10771,
- 10781, 10789, 10799, 10831, 10837, 10847, 10853, 10859,
- 10861, 10867, 10883, 10889, 10891, 10903, 10909, 10937,
- 10939, 10949, 10957, 10973, 10979, 10987, 10993, 11003,
- 11027, 11047, 11057, 11059, 11069, 11071, 11083, 11087,
- 11093, 11113, 11117, 11119, 11131, 11149, 11159, 11161,
- 11171, 11173, 11177, 11197, 11213, 11239, 11243, 11251,
- 11257, 11261, 11273, 11279, 11287, 11299, 11311, 11317,
- 11321, 11329, 11351, 11353, 11369, 11383, 11393, 11399,
- 11411, 11423, 11437, 11443, 11447, 11467, 11471, 11483,
- 11489, 11491, 11497, 11503, 11519, 11527, 11549, 11551,
- 11579, 11587, 11593, 11597, 11617, 11621, 11633, 11657,
- 11677, 11681, 11689, 11699, 11701, 11717, 11719, 11731,
- 11743, 11777, 11779, 11783, 11789, 11801, 11807, 11813,
- 11821, 11827, 11831, 11833, 11839, 11863, 11867, 11887,
- 11897, 11903, 11909, 11923, 11927, 11933, 11939, 11941,
- 11953, 11959, 11969, 11971, 11981, 11987, 12007, 12011,
- 12037, 12041, 12043, 12049, 12071, 12073, 12097, 12101,
- 12107, 12109, 12113, 12119, 12143, 12149, 12157, 12161,
- 12163, 12197, 12203, 12211, 12227, 12239, 12241, 12251,
- 12253, 12263, 12269, 12277, 12281, 12289, 12301, 12323,
- 12329, 12343, 12347, 12373, 12377, 12379, 12391, 12401,
- 12409, 12413, 12421, 12433, 12437, 12451, 12457, 12473,
- 12479, 12487, 12491, 12497, 12503, 12511, 12517, 12527,
- 12539, 12541, 12547, 12553, 12569, 12577, 12583, 12589,
- 12601, 12611, 12613, 12619, 12637, 12641, 12647, 12653,
- 12659, 12671, 12689, 12697, 12703, 12713, 12721, 12739,
- 12743, 12757, 12763, 12781, 12791, 12799, 12809, 12821,
- 12823, 12829, 12841, 12853, 12889, 12893, 12899, 12907,
- 12911, 12917, 12919, 12923, 12941, 12953, 12959, 12967,
- 12973, 12979, 12983, 13001, 13003, 13007, 13009, 13033,
- 13037, 13043, 13049, 13063, 13093, 13099, 13103, 13109,
- 13121, 13127, 13147, 13151, 13159, 13163, 13171, 13177,
- 13183, 13187, 13217, 13219, 13229, 13241, 13249, 13259,
- 13267, 13291, 13297, 13309, 13313, 13327, 13331, 13337,
- 13339, 13367, 13381, 13397, 13399, 13411, 13417, 13421,
- 13441, 13451, 13457, 13463, 13469, 13477, 13487, 13499,
- 13513, 13523, 13537, 13553, 13567, 13577, 13591, 13597,
- 13613, 13619, 13627, 13633, 13649, 13669, 13679, 13681,
- 13687, 13691, 13693, 13697, 13709, 13711, 13721, 13723,
- 13729, 13751, 13757, 13759, 13763, 13781, 13789, 13799,
- 13807, 13829, 13831, 13841, 13859, 13873, 13877, 13879,
- 13883, 13901, 13903, 13907, 13913, 13921, 13931, 13933,
- 13963, 13967, 13997, 13999, 14009, 14011, 14029, 14033,
- 14051, 14057, 14071, 14081, 14083, 14087, 14107, 14143,
- 14149, 14153, 14159, 14173, 14177, 14197, 14207, 14221,
- 14243, 14249, 14251, 14281, 14293, 14303, 14321, 14323,
- 14327, 14341, 14347, 14369, 14387, 14389, 14401, 14407,
- 14411, 14419, 14423, 14431, 14437, 14447, 14449, 14461,
- 14479, 14489, 14503, 14519, 14533, 14537, 14543, 14549,
- 14551, 14557, 14561, 14563, 14591, 14593, 14621, 14627,
- 14629, 14633, 14639, 14653, 14657, 14669, 14683, 14699,
- 14713, 14717, 14723, 14731, 14737, 14741, 14747, 14753,
- 14759, 14767, 14771, 14779, 14783, 14797, 14813, 14821,
- 14827, 14831, 14843, 14851, 14867, 14869, 14879, 14887,
- 14891, 14897, 14923, 14929, 14939, 14947, 14951, 14957,
- 14969, 14983, 15013, 15017, 15031, 15053, 15061, 15073,
- 15077, 15083, 15091, 15101, 15107, 15121, 15131, 15137,
- 15139, 15149, 15161, 15173, 15187, 15193, 15199, 15217,
- 15227, 15233, 15241, 15259, 15263, 15269, 15271, 15277,
- 15287, 15289, 15299, 15307, 15313, 15319, 15329, 15331,
- 15349, 15359, 15361, 15373, 15377, 15383, 15391, 15401,
- 15413, 15427, 15439, 15443, 15451, 15461, 15467, 15473,
- 15493, 15497, 15511, 15527, 15541, 15551, 15559, 15569,
- 15581, 15583, 15601, 15607, 15619, 15629, 15641, 15643,
- 15647, 15649, 15661, 15667, 15671, 15679, 15683, 15727,
- 15731, 15733, 15737, 15739, 15749, 15761, 15767, 15773,
- 15787, 15791, 15797, 15803, 15809, 15817, 15823, 15859,
- 15877, 15881, 15887, 15889, 15901, 15907, 15913, 15919,
- 15923, 15937, 15959, 15971, 15973, 15991, 16001, 16007,
- 16033, 16057, 16061, 16063, 16067, 16069, 16073, 16087,
- 16091, 16097, 16103, 16111, 16127, 16139, 16141, 16183,
- 16187, 16189, 16193, 16217, 16223, 16229, 16231, 16249,
- 16253, 16267, 16273, 16301, 16319, 16333, 16339, 16349,
- 16361, 16363, 16369, 16381, 16411, 16417, 16421, 16427,
- 16433, 16447, 16451, 16453, 16477, 16481, 16487, 16493,
- 16519, 16529, 16547, 16553, 16561, 16567, 16573, 16603,
- 16607, 16619, 16631, 16633, 16649, 16651, 16657, 16661,
- 16673, 16691, 16693, 16699, 16703, 16729, 16741, 16747,
- 16759, 16763, 16787, 16811, 16823, 16829, 16831, 16843,
- 16871, 16879, 16883, 16889, 16901, 16903, 16921, 16927,
- 16931, 16937, 16943, 16963, 16979, 16981, 16987, 16993,
- 17011, 17021, 17027, 17029, 17033, 17041, 17047, 17053,
- 17077, 17093, 17099, 17107, 17117, 17123, 17137, 17159,
- 17167, 17183, 17189, 17191, 17203, 17207, 17209, 17231,
- 17239, 17257, 17291, 17293, 17299, 17317, 17321, 17327,
- 17333, 17341, 17351, 17359, 17377, 17383, 17387, 17389,
- 17393, 17401, 17417, 17419, 17431, 17443, 17449, 17467,
- 17471, 17477, 17483, 17489, 17491, 17497, 17509, 17519,
- 17539, 17551, 17569, 17573, 17579, 17581, 17597, 17599,
- 17609, 17623, 17627, 17657, 17659, 17669, 17681, 17683,
- 17707, 17713, 17729, 17737, 17747, 17749, 17761, 17783,
- 17789, 17791, 17807, 17827, 17837, 17839, 17851, 17863,
+ 2, 3, 5, 7, 11, 13, 17, 19,
+ 23, 29, 31, 37, 41, 43, 47, 53,
+ 59, 61, 67, 71, 73, 79, 83, 89,
+ 97, 101, 103, 107, 109, 113, 127, 131,
+ 137, 139, 149, 151, 157, 163, 167, 173,
+ 179, 181, 191, 193, 197, 199, 211, 223,
+ 227, 229, 233, 239, 241, 251, 257, 263,
+ 269, 271, 277, 281, 283, 293, 307, 311,
+ 313, 317, 331, 337, 347, 349, 353, 359,
+ 367, 373, 379, 383, 389, 397, 401, 409,
+ 419, 421, 431, 433, 439, 443, 449, 457,
+ 461, 463, 467, 479, 487, 491, 499, 503,
+ 509, 521, 523, 541, 547, 557, 563, 569,
+ 571, 577, 587, 593, 599, 601, 607, 613,
+ 617, 619, 631, 641, 643, 647, 653, 659,
+ 661, 673, 677, 683, 691, 701, 709, 719,
+ 727, 733, 739, 743, 751, 757, 761, 769,
+ 773, 787, 797, 809, 811, 821, 823, 827,
+ 829, 839, 853, 857, 859, 863, 877, 881,
+ 883, 887, 907, 911, 919, 929, 937, 941,
+ 947, 953, 967, 971, 977, 983, 991, 997,
+ 1009, 1013, 1019, 1021, 1031, 1033, 1039, 1049,
+ 1051, 1061, 1063, 1069, 1087, 1091, 1093, 1097,
+ 1103, 1109, 1117, 1123, 1129, 1151, 1153, 1163,
+ 1171, 1181, 1187, 1193, 1201, 1213, 1217, 1223,
+ 1229, 1231, 1237, 1249, 1259, 1277, 1279, 1283,
+ 1289, 1291, 1297, 1301, 1303, 1307, 1319, 1321,
+ 1327, 1361, 1367, 1373, 1381, 1399, 1409, 1423,
+ 1427, 1429, 1433, 1439, 1447, 1451, 1453, 1459,
+ 1471, 1481, 1483, 1487, 1489, 1493, 1499, 1511,
+ 1523, 1531, 1543, 1549, 1553, 1559, 1567, 1571,
+ 1579, 1583, 1597, 1601, 1607, 1609, 1613, 1619,
+ 1621, 1627, 1637, 1657, 1663, 1667, 1669, 1693,
+ 1697, 1699, 1709, 1721, 1723, 1733, 1741, 1747,
+ 1753, 1759, 1777, 1783, 1787, 1789, 1801, 1811,
+ 1823, 1831, 1847, 1861, 1867, 1871, 1873, 1877,
+ 1879, 1889, 1901, 1907, 1913, 1931, 1933, 1949,
+ 1951, 1973, 1979, 1987, 1993, 1997, 1999, 2003,
+ 2011, 2017, 2027, 2029, 2039, 2053, 2063, 2069,
+ 2081, 2083, 2087, 2089, 2099, 2111, 2113, 2129,
+ 2131, 2137, 2141, 2143, 2153, 2161, 2179, 2203,
+ 2207, 2213, 2221, 2237, 2239, 2243, 2251, 2267,
+ 2269, 2273, 2281, 2287, 2293, 2297, 2309, 2311,
+ 2333, 2339, 2341, 2347, 2351, 2357, 2371, 2377,
+ 2381, 2383, 2389, 2393, 2399, 2411, 2417, 2423,
+ 2437, 2441, 2447, 2459, 2467, 2473, 2477, 2503,
+ 2521, 2531, 2539, 2543, 2549, 2551, 2557, 2579,
+ 2591, 2593, 2609, 2617, 2621, 2633, 2647, 2657,
+ 2659, 2663, 2671, 2677, 2683, 2687, 2689, 2693,
+ 2699, 2707, 2711, 2713, 2719, 2729, 2731, 2741,
+ 2749, 2753, 2767, 2777, 2789, 2791, 2797, 2801,
+ 2803, 2819, 2833, 2837, 2843, 2851, 2857, 2861,
+ 2879, 2887, 2897, 2903, 2909, 2917, 2927, 2939,
+ 2953, 2957, 2963, 2969, 2971, 2999, 3001, 3011,
+ 3019, 3023, 3037, 3041, 3049, 3061, 3067, 3079,
+ 3083, 3089, 3109, 3119, 3121, 3137, 3163, 3167,
+ 3169, 3181, 3187, 3191, 3203, 3209, 3217, 3221,
+ 3229, 3251, 3253, 3257, 3259, 3271, 3299, 3301,
+ 3307, 3313, 3319, 3323, 3329, 3331, 3343, 3347,
+ 3359, 3361, 3371, 3373, 3389, 3391, 3407, 3413,
+ 3433, 3449, 3457, 3461, 3463, 3467, 3469, 3491,
+ 3499, 3511, 3517, 3527, 3529, 3533, 3539, 3541,
+ 3547, 3557, 3559, 3571, 3581, 3583, 3593, 3607,
+ 3613, 3617, 3623, 3631, 3637, 3643, 3659, 3671,
+ 3673, 3677, 3691, 3697, 3701, 3709, 3719, 3727,
+ 3733, 3739, 3761, 3767, 3769, 3779, 3793, 3797,
+ 3803, 3821, 3823, 3833, 3847, 3851, 3853, 3863,
+ 3877, 3881, 3889, 3907, 3911, 3917, 3919, 3923,
+ 3929, 3931, 3943, 3947, 3967, 3989, 4001, 4003,
+ 4007, 4013, 4019, 4021, 4027, 4049, 4051, 4057,
+ 4073, 4079, 4091, 4093, 4099, 4111, 4127, 4129,
+ 4133, 4139, 4153, 4157, 4159, 4177, 4201, 4211,
+ 4217, 4219, 4229, 4231, 4241, 4243, 4253, 4259,
+ 4261, 4271, 4273, 4283, 4289, 4297, 4327, 4337,
+ 4339, 4349, 4357, 4363, 4373, 4391, 4397, 4409,
+ 4421, 4423, 4441, 4447, 4451, 4457, 4463, 4481,
+ 4483, 4493, 4507, 4513, 4517, 4519, 4523, 4547,
+ 4549, 4561, 4567, 4583, 4591, 4597, 4603, 4621,
+ 4637, 4639, 4643, 4649, 4651, 4657, 4663, 4673,
+ 4679, 4691, 4703, 4721, 4723, 4729, 4733, 4751,
+ 4759, 4783, 4787, 4789, 4793, 4799, 4801, 4813,
+ 4817, 4831, 4861, 4871, 4877, 4889, 4903, 4909,
+ 4919, 4931, 4933, 4937, 4943, 4951, 4957, 4967,
+ 4969, 4973, 4987, 4993, 4999, 5003, 5009, 5011,
+ 5021, 5023, 5039, 5051, 5059, 5077, 5081, 5087,
+ 5099, 5101, 5107, 5113, 5119, 5147, 5153, 5167,
+ 5171, 5179, 5189, 5197, 5209, 5227, 5231, 5233,
+ 5237, 5261, 5273, 5279, 5281, 5297, 5303, 5309,
+ 5323, 5333, 5347, 5351, 5381, 5387, 5393, 5399,
+ 5407, 5413, 5417, 5419, 5431, 5437, 5441, 5443,
+ 5449, 5471, 5477, 5479, 5483, 5501, 5503, 5507,
+ 5519, 5521, 5527, 5531, 5557, 5563, 5569, 5573,
+ 5581, 5591, 5623, 5639, 5641, 5647, 5651, 5653,
+ 5657, 5659, 5669, 5683, 5689, 5693, 5701, 5711,
+ 5717, 5737, 5741, 5743, 5749, 5779, 5783, 5791,
+ 5801, 5807, 5813, 5821, 5827, 5839, 5843, 5849,
+ 5851, 5857, 5861, 5867, 5869, 5879, 5881, 5897,
+ 5903, 5923, 5927, 5939, 5953, 5981, 5987, 6007,
+ 6011, 6029, 6037, 6043, 6047, 6053, 6067, 6073,
+ 6079, 6089, 6091, 6101, 6113, 6121, 6131, 6133,
+ 6143, 6151, 6163, 6173, 6197, 6199, 6203, 6211,
+ 6217, 6221, 6229, 6247, 6257, 6263, 6269, 6271,
+ 6277, 6287, 6299, 6301, 6311, 6317, 6323, 6329,
+ 6337, 6343, 6353, 6359, 6361, 6367, 6373, 6379,
+ 6389, 6397, 6421, 6427, 6449, 6451, 6469, 6473,
+ 6481, 6491, 6521, 6529, 6547, 6551, 6553, 6563,
+ 6569, 6571, 6577, 6581, 6599, 6607, 6619, 6637,
+ 6653, 6659, 6661, 6673, 6679, 6689, 6691, 6701,
+ 6703, 6709, 6719, 6733, 6737, 6761, 6763, 6779,
+ 6781, 6791, 6793, 6803, 6823, 6827, 6829, 6833,
+ 6841, 6857, 6863, 6869, 6871, 6883, 6899, 6907,
+ 6911, 6917, 6947, 6949, 6959, 6961, 6967, 6971,
+ 6977, 6983, 6991, 6997, 7001, 7013, 7019, 7027,
+ 7039, 7043, 7057, 7069, 7079, 7103, 7109, 7121,
+ 7127, 7129, 7151, 7159, 7177, 7187, 7193, 7207,
+ 7211, 7213, 7219, 7229, 7237, 7243, 7247, 7253,
+ 7283, 7297, 7307, 7309, 7321, 7331, 7333, 7349,
+ 7351, 7369, 7393, 7411, 7417, 7433, 7451, 7457,
+ 7459, 7477, 7481, 7487, 7489, 7499, 7507, 7517,
+ 7523, 7529, 7537, 7541, 7547, 7549, 7559, 7561,
+ 7573, 7577, 7583, 7589, 7591, 7603, 7607, 7621,
+ 7639, 7643, 7649, 7669, 7673, 7681, 7687, 7691,
+ 7699, 7703, 7717, 7723, 7727, 7741, 7753, 7757,
+ 7759, 7789, 7793, 7817, 7823, 7829, 7841, 7853,
+ 7867, 7873, 7877, 7879, 7883, 7901, 7907, 7919,
+ 7927, 7933, 7937, 7949, 7951, 7963, 7993, 8009,
+ 8011, 8017, 8039, 8053, 8059, 8069, 8081, 8087,
+ 8089, 8093, 8101, 8111, 8117, 8123, 8147, 8161,
+ 8167, 8171, 8179, 8191, 8209, 8219, 8221, 8231,
+ 8233, 8237, 8243, 8263, 8269, 8273, 8287, 8291,
+ 8293, 8297, 8311, 8317, 8329, 8353, 8363, 8369,
+ 8377, 8387, 8389, 8419, 8423, 8429, 8431, 8443,
+ 8447, 8461, 8467, 8501, 8513, 8521, 8527, 8537,
+ 8539, 8543, 8563, 8573, 8581, 8597, 8599, 8609,
+ 8623, 8627, 8629, 8641, 8647, 8663, 8669, 8677,
+ 8681, 8689, 8693, 8699, 8707, 8713, 8719, 8731,
+ 8737, 8741, 8747, 8753, 8761, 8779, 8783, 8803,
+ 8807, 8819, 8821, 8831, 8837, 8839, 8849, 8861,
+ 8863, 8867, 8887, 8893, 8923, 8929, 8933, 8941,
+ 8951, 8963, 8969, 8971, 8999, 9001, 9007, 9011,
+ 9013, 9029, 9041, 9043, 9049, 9059, 9067, 9091,
+ 9103, 9109, 9127, 9133, 9137, 9151, 9157, 9161,
+ 9173, 9181, 9187, 9199, 9203, 9209, 9221, 9227,
+ 9239, 9241, 9257, 9277, 9281, 9283, 9293, 9311,
+ 9319, 9323, 9337, 9341, 9343, 9349, 9371, 9377,
+ 9391, 9397, 9403, 9413, 9419, 9421, 9431, 9433,
+ 9437, 9439, 9461, 9463, 9467, 9473, 9479, 9491,
+ 9497, 9511, 9521, 9533, 9539, 9547, 9551, 9587,
+ 9601, 9613, 9619, 9623, 9629, 9631, 9643, 9649,
+ 9661, 9677, 9679, 9689, 9697, 9719, 9721, 9733,
+ 9739, 9743, 9749, 9767, 9769, 9781, 9787, 9791,
+ 9803, 9811, 9817, 9829, 9833, 9839, 9851, 9857,
+ 9859, 9871, 9883, 9887, 9901, 9907, 9923, 9929,
+ 9931, 9941, 9949, 9967, 9973, 10007, 10009, 10037,
+ 10039, 10061, 10067, 10069, 10079, 10091, 10093, 10099,
+ 10103, 10111, 10133, 10139, 10141, 10151, 10159, 10163,
+ 10169, 10177, 10181, 10193, 10211, 10223, 10243, 10247,
+ 10253, 10259, 10267, 10271, 10273, 10289, 10301, 10303,
+ 10313, 10321, 10331, 10333, 10337, 10343, 10357, 10369,
+ 10391, 10399, 10427, 10429, 10433, 10453, 10457, 10459,
+ 10463, 10477, 10487, 10499, 10501, 10513, 10529, 10531,
+ 10559, 10567, 10589, 10597, 10601, 10607, 10613, 10627,
+ 10631, 10639, 10651, 10657, 10663, 10667, 10687, 10691,
+ 10709, 10711, 10723, 10729, 10733, 10739, 10753, 10771,
+ 10781, 10789, 10799, 10831, 10837, 10847, 10853, 10859,
+ 10861, 10867, 10883, 10889, 10891, 10903, 10909, 10937,
+ 10939, 10949, 10957, 10973, 10979, 10987, 10993, 11003,
+ 11027, 11047, 11057, 11059, 11069, 11071, 11083, 11087,
+ 11093, 11113, 11117, 11119, 11131, 11149, 11159, 11161,
+ 11171, 11173, 11177, 11197, 11213, 11239, 11243, 11251,
+ 11257, 11261, 11273, 11279, 11287, 11299, 11311, 11317,
+ 11321, 11329, 11351, 11353, 11369, 11383, 11393, 11399,
+ 11411, 11423, 11437, 11443, 11447, 11467, 11471, 11483,
+ 11489, 11491, 11497, 11503, 11519, 11527, 11549, 11551,
+ 11579, 11587, 11593, 11597, 11617, 11621, 11633, 11657,
+ 11677, 11681, 11689, 11699, 11701, 11717, 11719, 11731,
+ 11743, 11777, 11779, 11783, 11789, 11801, 11807, 11813,
+ 11821, 11827, 11831, 11833, 11839, 11863, 11867, 11887,
+ 11897, 11903, 11909, 11923, 11927, 11933, 11939, 11941,
+ 11953, 11959, 11969, 11971, 11981, 11987, 12007, 12011,
+ 12037, 12041, 12043, 12049, 12071, 12073, 12097, 12101,
+ 12107, 12109, 12113, 12119, 12143, 12149, 12157, 12161,
+ 12163, 12197, 12203, 12211, 12227, 12239, 12241, 12251,
+ 12253, 12263, 12269, 12277, 12281, 12289, 12301, 12323,
+ 12329, 12343, 12347, 12373, 12377, 12379, 12391, 12401,
+ 12409, 12413, 12421, 12433, 12437, 12451, 12457, 12473,
+ 12479, 12487, 12491, 12497, 12503, 12511, 12517, 12527,
+ 12539, 12541, 12547, 12553, 12569, 12577, 12583, 12589,
+ 12601, 12611, 12613, 12619, 12637, 12641, 12647, 12653,
+ 12659, 12671, 12689, 12697, 12703, 12713, 12721, 12739,
+ 12743, 12757, 12763, 12781, 12791, 12799, 12809, 12821,
+ 12823, 12829, 12841, 12853, 12889, 12893, 12899, 12907,
+ 12911, 12917, 12919, 12923, 12941, 12953, 12959, 12967,
+ 12973, 12979, 12983, 13001, 13003, 13007, 13009, 13033,
+ 13037, 13043, 13049, 13063, 13093, 13099, 13103, 13109,
+ 13121, 13127, 13147, 13151, 13159, 13163, 13171, 13177,
+ 13183, 13187, 13217, 13219, 13229, 13241, 13249, 13259,
+ 13267, 13291, 13297, 13309, 13313, 13327, 13331, 13337,
+ 13339, 13367, 13381, 13397, 13399, 13411, 13417, 13421,
+ 13441, 13451, 13457, 13463, 13469, 13477, 13487, 13499,
+ 13513, 13523, 13537, 13553, 13567, 13577, 13591, 13597,
+ 13613, 13619, 13627, 13633, 13649, 13669, 13679, 13681,
+ 13687, 13691, 13693, 13697, 13709, 13711, 13721, 13723,
+ 13729, 13751, 13757, 13759, 13763, 13781, 13789, 13799,
+ 13807, 13829, 13831, 13841, 13859, 13873, 13877, 13879,
+ 13883, 13901, 13903, 13907, 13913, 13921, 13931, 13933,
+ 13963, 13967, 13997, 13999, 14009, 14011, 14029, 14033,
+ 14051, 14057, 14071, 14081, 14083, 14087, 14107, 14143,
+ 14149, 14153, 14159, 14173, 14177, 14197, 14207, 14221,
+ 14243, 14249, 14251, 14281, 14293, 14303, 14321, 14323,
+ 14327, 14341, 14347, 14369, 14387, 14389, 14401, 14407,
+ 14411, 14419, 14423, 14431, 14437, 14447, 14449, 14461,
+ 14479, 14489, 14503, 14519, 14533, 14537, 14543, 14549,
+ 14551, 14557, 14561, 14563, 14591, 14593, 14621, 14627,
+ 14629, 14633, 14639, 14653, 14657, 14669, 14683, 14699,
+ 14713, 14717, 14723, 14731, 14737, 14741, 14747, 14753,
+ 14759, 14767, 14771, 14779, 14783, 14797, 14813, 14821,
+ 14827, 14831, 14843, 14851, 14867, 14869, 14879, 14887,
+ 14891, 14897, 14923, 14929, 14939, 14947, 14951, 14957,
+ 14969, 14983, 15013, 15017, 15031, 15053, 15061, 15073,
+ 15077, 15083, 15091, 15101, 15107, 15121, 15131, 15137,
+ 15139, 15149, 15161, 15173, 15187, 15193, 15199, 15217,
+ 15227, 15233, 15241, 15259, 15263, 15269, 15271, 15277,
+ 15287, 15289, 15299, 15307, 15313, 15319, 15329, 15331,
+ 15349, 15359, 15361, 15373, 15377, 15383, 15391, 15401,
+ 15413, 15427, 15439, 15443, 15451, 15461, 15467, 15473,
+ 15493, 15497, 15511, 15527, 15541, 15551, 15559, 15569,
+ 15581, 15583, 15601, 15607, 15619, 15629, 15641, 15643,
+ 15647, 15649, 15661, 15667, 15671, 15679, 15683, 15727,
+ 15731, 15733, 15737, 15739, 15749, 15761, 15767, 15773,
+ 15787, 15791, 15797, 15803, 15809, 15817, 15823, 15859,
+ 15877, 15881, 15887, 15889, 15901, 15907, 15913, 15919,
+ 15923, 15937, 15959, 15971, 15973, 15991, 16001, 16007,
+ 16033, 16057, 16061, 16063, 16067, 16069, 16073, 16087,
+ 16091, 16097, 16103, 16111, 16127, 16139, 16141, 16183,
+ 16187, 16189, 16193, 16217, 16223, 16229, 16231, 16249,
+ 16253, 16267, 16273, 16301, 16319, 16333, 16339, 16349,
+ 16361, 16363, 16369, 16381, 16411, 16417, 16421, 16427,
+ 16433, 16447, 16451, 16453, 16477, 16481, 16487, 16493,
+ 16519, 16529, 16547, 16553, 16561, 16567, 16573, 16603,
+ 16607, 16619, 16631, 16633, 16649, 16651, 16657, 16661,
+ 16673, 16691, 16693, 16699, 16703, 16729, 16741, 16747,
+ 16759, 16763, 16787, 16811, 16823, 16829, 16831, 16843,
+ 16871, 16879, 16883, 16889, 16901, 16903, 16921, 16927,
+ 16931, 16937, 16943, 16963, 16979, 16981, 16987, 16993,
+ 17011, 17021, 17027, 17029, 17033, 17041, 17047, 17053,
+ 17077, 17093, 17099, 17107, 17117, 17123, 17137, 17159,
+ 17167, 17183, 17189, 17191, 17203, 17207, 17209, 17231,
+ 17239, 17257, 17291, 17293, 17299, 17317, 17321, 17327,
+ 17333, 17341, 17351, 17359, 17377, 17383, 17387, 17389,
+ 17393, 17401, 17417, 17419, 17431, 17443, 17449, 17467,
+ 17471, 17477, 17483, 17489, 17491, 17497, 17509, 17519,
+ 17539, 17551, 17569, 17573, 17579, 17581, 17597, 17599,
+ 17609, 17623, 17627, 17657, 17659, 17669, 17681, 17683,
+ 17707, 17713, 17729, 17737, 17747, 17749, 17761, 17783,
+ 17789, 17791, 17807, 17827, 17837, 17839, 17851, 17863,
};
diff --git a/deps/openssl/openssl/crypto/bn/bn_prime.pl b/deps/openssl/openssl/crypto/bn/bn_prime.pl
index 163d4a9d30..eeca475b93 100644
--- a/deps/openssl/openssl/crypto/bn/bn_prime.pl
+++ b/deps/openssl/openssl/crypto/bn/bn_prime.pl
@@ -1,17 +1,19 @@
#! /usr/bin/env perl
-# Copyright 1998-2016 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 1998-2018 The OpenSSL Project Authors. All Rights Reserved.
#
# Licensed under the OpenSSL license (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
# in the file LICENSE in the source distribution or at
# https://www.openssl.org/source/license.html
+# Output year depends on the year of the script.
+my $YEAR = [localtime([stat($0)]->[9])]->[5] + 1900;
print <<"EOF";
/*
* WARNING: do not edit!
* Generated by crypto/bn/bn_prime.pl
*
- * Copyright 1998-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1998-$YEAR The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -38,9 +40,9 @@ loop: while ($#primes < $num-1) {
print "typedef unsigned short prime_t;\n";
printf "# define NUMPRIMES %d\n\n", $num;
-printf "static const prime_t primes[%d] = {\n", $num;
+printf "static const prime_t primes[%d] = {", $num;
for (my $i = 0; $i <= $#primes; $i++) {
- printf "\n " if ($i % 8) == 0;
- printf "%4d, ", $primes[$i];
+ printf "\n " if ($i % 8) == 0;
+ printf " %5d,", $primes[$i];
}
print "\n};\n";
diff --git a/deps/openssl/openssl/crypto/bn/bn_print.c b/deps/openssl/openssl/crypto/bn/bn_print.c
index 5ffe2fc9ba..1853269d90 100644
--- a/deps/openssl/openssl/crypto/bn/bn_print.c
+++ b/deps/openssl/openssl/crypto/bn/bn_print.c
@@ -1,5 +1,5 @@
/*
- * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2017 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -8,7 +8,7 @@
*/
#include <stdio.h>
-#include <ctype.h>
+#include "internal/ctype.h"
#include <limits.h>
#include "internal/cryptlib.h"
#include <openssl/buffer.h>
@@ -32,27 +32,27 @@ char *BN_bn2hex(const BIGNUM *a)
}
p = buf;
if (a->neg)
- *(p++) = '-';
+ *p++ = '-';
for (i = a->top - 1; i >= 0; i--) {
for (j = BN_BITS2 - 8; j >= 0; j -= 8) {
/* strip leading zeros */
- v = ((int)(a->d[i] >> (long)j)) & 0xff;
- if (z || (v != 0)) {
- *(p++) = Hex[v >> 4];
- *(p++) = Hex[v & 0x0f];
+ v = (int)((a->d[i] >> j) & 0xff);
+ if (z || v != 0) {
+ *p++ = Hex[v >> 4];
+ *p++ = Hex[v & 0x0f];
z = 1;
}
}
}
*p = '\0';
err:
- return (buf);
+ return buf;
}
/* Must 'OPENSSL_free' the returned data */
char *BN_bn2dec(const BIGNUM *a)
{
- int i = 0, num, ok = 0;
+ int i = 0, num, ok = 0, n, tbytes;
char *buf = NULL;
char *p;
BIGNUM *t = NULL;
@@ -67,22 +67,22 @@ char *BN_bn2dec(const BIGNUM *a)
*/
i = BN_num_bits(a) * 3;
num = (i / 10 + i / 1000 + 1) + 1;
+ tbytes = num + 3; /* negative and terminator and one spare? */
bn_data_num = num / BN_DEC_NUM + 1;
bn_data = OPENSSL_malloc(bn_data_num * sizeof(BN_ULONG));
- buf = OPENSSL_malloc(num + 3);
- if ((buf == NULL) || (bn_data == NULL)) {
+ buf = OPENSSL_malloc(tbytes);
+ if (buf == NULL || bn_data == NULL) {
BNerr(BN_F_BN_BN2DEC, ERR_R_MALLOC_FAILURE);
goto err;
}
if ((t = BN_dup(a)) == NULL)
goto err;
-#define BUF_REMAIN (num+3 - (size_t)(p - buf))
p = buf;
lp = bn_data;
if (BN_is_zero(t)) {
- *(p++) = '0';
- *(p++) = '\0';
+ *p++ = '0';
+ *p++ = '\0';
} else {
if (BN_is_negative(t))
*p++ = '-';
@@ -101,14 +101,16 @@ char *BN_bn2dec(const BIGNUM *a)
* the last one needs truncation. The blocks need to be reversed in
* order.
*/
- BIO_snprintf(p, BUF_REMAIN, BN_DEC_FMT1, *lp);
- while (*p)
- p++;
+ n = BIO_snprintf(p, tbytes - (size_t)(p - buf), BN_DEC_FMT1, *lp);
+ if (n < 0)
+ goto err;
+ p += n;
while (lp != bn_data) {
lp--;
- BIO_snprintf(p, BUF_REMAIN, BN_DEC_FMT2, *lp);
- while (*p)
- p++;
+ n = BIO_snprintf(p, tbytes - (size_t)(p - buf), BN_DEC_FMT2, *lp);
+ if (n < 0)
+ goto err;
+ p += n;
}
}
ok = 1;
@@ -128,28 +130,28 @@ int BN_hex2bn(BIGNUM **bn, const char *a)
int neg = 0, h, m, i, j, k, c;
int num;
- if ((a == NULL) || (*a == '\0'))
- return (0);
+ if (a == NULL || *a == '\0')
+ return 0;
if (*a == '-') {
neg = 1;
a++;
}
- for (i = 0; i <= (INT_MAX/4) && isxdigit((unsigned char)a[i]); i++)
+ for (i = 0; i <= INT_MAX / 4 && ossl_isxdigit(a[i]); i++)
continue;
- if (i == 0 || i > INT_MAX/4)
+ if (i == 0 || i > INT_MAX / 4)
goto err;
num = i + neg;
if (bn == NULL)
- return (num);
+ return num;
/* a is the start of the hex digits, and it is 'i' long */
if (*bn == NULL) {
if ((ret = BN_new()) == NULL)
- return (0);
+ return 0;
} else {
ret = *bn;
BN_zero(ret);
@@ -163,7 +165,7 @@ int BN_hex2bn(BIGNUM **bn, const char *a)
m = 0;
h = 0;
while (j > 0) {
- m = ((BN_BYTES * 2) <= j) ? (BN_BYTES * 2) : j;
+ m = (BN_BYTES * 2 <= j) ? BN_BYTES * 2 : j;
l = 0;
for (;;) {
c = a[j - m];
@@ -177,7 +179,7 @@ int BN_hex2bn(BIGNUM **bn, const char *a)
break;
}
}
- j -= (BN_BYTES * 2);
+ j -= BN_BYTES * 2;
}
ret->top = h;
bn_correct_top(ret);
@@ -187,11 +189,11 @@ int BN_hex2bn(BIGNUM **bn, const char *a)
/* Don't set the negative flag if it's zero. */
if (ret->top != 0)
ret->neg = neg;
- return (num);
+ return num;
err:
if (*bn == NULL)
BN_free(ret);
- return (0);
+ return 0;
}
int BN_dec2bn(BIGNUM **bn, const char *a)
@@ -201,22 +203,22 @@ int BN_dec2bn(BIGNUM **bn, const char *a)
int neg = 0, i, j;
int num;
- if ((a == NULL) || (*a == '\0'))
- return (0);
+ if (a == NULL || *a == '\0')
+ return 0;
if (*a == '-') {
neg = 1;
a++;
}
- for (i = 0; i <= (INT_MAX/4) && isdigit((unsigned char)a[i]); i++)
+ for (i = 0; i <= INT_MAX / 4 && ossl_isdigit(a[i]); i++)
continue;
- if (i == 0 || i > INT_MAX/4)
+ if (i == 0 || i > INT_MAX / 4)
goto err;
num = i + neg;
if (bn == NULL)
- return (num);
+ return num;
/*
* a is the start of the digits, and it is 'i' long. We chop it into
@@ -224,7 +226,7 @@ int BN_dec2bn(BIGNUM **bn, const char *a)
*/
if (*bn == NULL) {
if ((ret = BN_new()) == NULL)
- return (0);
+ return 0;
} else {
ret = *bn;
BN_zero(ret);
@@ -234,7 +236,7 @@ int BN_dec2bn(BIGNUM **bn, const char *a)
if (bn_expand(ret, i * 4) == NULL)
goto err;
- j = BN_DEC_NUM - (i % BN_DEC_NUM);
+ j = BN_DEC_NUM - i % BN_DEC_NUM;
if (j == BN_DEC_NUM)
j = 0;
l = 0;
@@ -257,11 +259,11 @@ int BN_dec2bn(BIGNUM **bn, const char *a)
/* Don't set the negative flag if it's zero. */
if (ret->top != 0)
ret->neg = neg;
- return (num);
+ return num;
err:
if (*bn == NULL)
BN_free(ret);
- return (0);
+ return 0;
}
int BN_asc2bn(BIGNUM **bn, const char *a)
@@ -291,11 +293,11 @@ int BN_print_fp(FILE *fp, const BIGNUM *a)
int ret;
if ((b = BIO_new(BIO_s_file())) == NULL)
- return (0);
+ return 0;
BIO_set_fp(b, fp, BIO_NOCLOSE);
ret = BN_print(b, a);
BIO_free(b);
- return (ret);
+ return ret;
}
# endif
@@ -304,16 +306,16 @@ int BN_print(BIO *bp, const BIGNUM *a)
int i, j, v, z = 0;
int ret = 0;
- if ((a->neg) && (BIO_write(bp, "-", 1) != 1))
+ if ((a->neg) && BIO_write(bp, "-", 1) != 1)
goto end;
- if (BN_is_zero(a) && (BIO_write(bp, "0", 1) != 1))
+ if (BN_is_zero(a) && BIO_write(bp, "0", 1) != 1)
goto end;
for (i = a->top - 1; i >= 0; i--) {
for (j = BN_BITS2 - 4; j >= 0; j -= 4) {
/* strip leading zeros */
- v = ((int)(a->d[i] >> (long)j)) & 0x0f;
- if (z || (v != 0)) {
- if (BIO_write(bp, &(Hex[v]), 1) != 1)
+ v = (int)((a->d[i] >> j) & 0x0f);
+ if (z || v != 0) {
+ if (BIO_write(bp, &Hex[v], 1) != 1)
goto end;
z = 1;
}
@@ -321,7 +323,7 @@ int BN_print(BIO *bp, const BIGNUM *a)
}
ret = 1;
end:
- return (ret);
+ return ret;
}
char *BN_options(void)
@@ -332,12 +334,12 @@ char *BN_options(void)
if (!init) {
init++;
#ifdef BN_LLONG
- BIO_snprintf(data, sizeof(data), "bn(%d,%d)",
- (int)sizeof(BN_ULLONG) * 8, (int)sizeof(BN_ULONG) * 8);
+ BIO_snprintf(data, sizeof(data), "bn(%zu,%zu)",
+ sizeof(BN_ULLONG) * 8, sizeof(BN_ULONG) * 8);
#else
- BIO_snprintf(data, sizeof(data), "bn(%d,%d)",
- (int)sizeof(BN_ULONG) * 8, (int)sizeof(BN_ULONG) * 8);
+ BIO_snprintf(data, sizeof(data), "bn(%zu,%zu)",
+ sizeof(BN_ULONG) * 8, sizeof(BN_ULONG) * 8);
#endif
}
- return (data);
+ return data;
}
diff --git a/deps/openssl/openssl/crypto/bn/bn_rand.c b/deps/openssl/openssl/crypto/bn/bn_rand.c
index 9ce4c5f606..c0d1a32292 100644
--- a/deps/openssl/openssl/crypto/bn/bn_rand.c
+++ b/deps/openssl/openssl/crypto/bn/bn_rand.c
@@ -1,5 +1,5 @@
/*
- * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -14,11 +14,14 @@
#include <openssl/rand.h>
#include <openssl/sha.h>
-static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom)
+typedef enum bnrand_flag_e {
+ NORMAL, TESTING, PRIVATE
+} BNRAND_FLAG;
+
+static int bnrand(BNRAND_FLAG flag, BIGNUM *rnd, int bits, int top, int bottom)
{
unsigned char *buf = NULL;
- int ret = 0, bit, bytes, mask;
- time_t tim;
+ int b, ret = 0, bit, bytes, mask;
if (bits == 0) {
if (top != BN_RAND_TOP_ANY || bottom != BN_RAND_BOTTOM_ANY)
@@ -40,13 +43,11 @@ static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom)
}
/* make a random number and set the top and bottom bits */
- time(&tim);
- RAND_add(&tim, sizeof(tim), 0.0);
-
- if (RAND_bytes(buf, bytes) <= 0)
+ b = flag == NORMAL ? RAND_bytes(buf, bytes) : RAND_priv_bytes(buf, bytes);
+ if (b <= 0)
goto err;
- if (pseudorand == 2) {
+ if (flag == TESTING) {
/*
* generate patterns that are more likely to trigger BN library bugs
*/
@@ -86,7 +87,7 @@ static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom)
err:
OPENSSL_clear_free(buf, bytes);
bn_check_top(rnd);
- return (ret);
+ return ret;
toosmall:
BNerr(BN_F_BNRAND, BN_R_BITS_TOO_SMALL);
@@ -95,29 +96,27 @@ toosmall:
int BN_rand(BIGNUM *rnd, int bits, int top, int bottom)
{
- return bnrand(0, rnd, bits, top, bottom);
+ return bnrand(NORMAL, rnd, bits, top, bottom);
}
-int BN_pseudo_rand(BIGNUM *rnd, int bits, int top, int bottom)
+int BN_bntest_rand(BIGNUM *rnd, int bits, int top, int bottom)
{
- return bnrand(1, rnd, bits, top, bottom);
+ return bnrand(TESTING, rnd, bits, top, bottom);
}
-int BN_bntest_rand(BIGNUM *rnd, int bits, int top, int bottom)
+int BN_priv_rand(BIGNUM *rnd, int bits, int top, int bottom)
{
- return bnrand(2, rnd, bits, top, bottom);
+ return bnrand(PRIVATE, rnd, bits, top, bottom);
}
/* random number r: 0 <= r < range */
-static int bn_rand_range(int pseudo, BIGNUM *r, const BIGNUM *range)
+static int bnrand_range(BNRAND_FLAG flag, BIGNUM *r, const BIGNUM *range)
{
- int (*bn_rand) (BIGNUM *, int, int, int) =
- pseudo ? BN_pseudo_rand : BN_rand;
int n;
int count = 100;
if (range->neg || BN_is_zero(range)) {
- BNerr(BN_F_BN_RAND_RANGE, BN_R_INVALID_RANGE);
+ BNerr(BN_F_BNRAND_RANGE, BN_R_INVALID_RANGE);
return 0;
}
@@ -133,8 +132,9 @@ static int bn_rand_range(int pseudo, BIGNUM *r, const BIGNUM *range)
* than range
*/
do {
- if (!bn_rand(r, n + 1, BN_RAND_TOP_ANY, BN_RAND_BOTTOM_ANY))
+ if (!bnrand(flag, r, n + 1, BN_RAND_TOP_ANY, BN_RAND_BOTTOM_ANY))
return 0;
+
/*
* If r < 3*range, use r := r MOD range (which is either r, r -
* range, or r - 2*range). Otherwise, iterate once more. Since
@@ -150,7 +150,7 @@ static int bn_rand_range(int pseudo, BIGNUM *r, const BIGNUM *range)
}
if (!--count) {
- BNerr(BN_F_BN_RAND_RANGE, BN_R_TOO_MANY_ITERATIONS);
+ BNerr(BN_F_BNRAND_RANGE, BN_R_TOO_MANY_ITERATIONS);
return 0;
}
@@ -159,11 +159,11 @@ static int bn_rand_range(int pseudo, BIGNUM *r, const BIGNUM *range)
} else {
do {
/* range = 11..._2 or range = 101..._2 */
- if (!bn_rand(r, n, BN_RAND_TOP_ANY, BN_RAND_BOTTOM_ANY))
+ if (!bnrand(flag, r, n, BN_RAND_TOP_ANY, BN_RAND_BOTTOM_ANY))
return 0;
if (!--count) {
- BNerr(BN_F_BN_RAND_RANGE, BN_R_TOO_MANY_ITERATIONS);
+ BNerr(BN_F_BNRAND_RANGE, BN_R_TOO_MANY_ITERATIONS);
return 0;
}
}
@@ -176,12 +176,22 @@ static int bn_rand_range(int pseudo, BIGNUM *r, const BIGNUM *range)
int BN_rand_range(BIGNUM *r, const BIGNUM *range)
{
- return bn_rand_range(0, r, range);
+ return bnrand_range(NORMAL, r, range);
+}
+
+int BN_priv_rand_range(BIGNUM *r, const BIGNUM *range)
+{
+ return bnrand_range(PRIVATE, r, range);
+}
+
+int BN_pseudo_rand(BIGNUM *rnd, int bits, int top, int bottom)
+{
+ return BN_rand(rnd, bits, top, bottom);
}
int BN_pseudo_rand_range(BIGNUM *r, const BIGNUM *range)
{
- return bn_rand_range(1, r, range);
+ return BN_rand_range(r, range);
}
/*
@@ -229,7 +239,7 @@ int BN_generate_dsa_nonce(BIGNUM *out, const BIGNUM *range,
memset(private_bytes + todo, 0, sizeof(private_bytes) - todo);
for (done = 0; done < num_k_bytes;) {
- if (RAND_bytes(random_bytes, sizeof(random_bytes)) != 1)
+ if (RAND_priv_bytes(random_bytes, sizeof(random_bytes)) != 1)
goto err;
SHA512_Init(&sha);
SHA512_Update(&sha, &done, sizeof(done));
diff --git a/deps/openssl/openssl/crypto/bn/bn_recp.c b/deps/openssl/openssl/crypto/bn/bn_recp.c
index 20585b9d4b..9ab767f42f 100644
--- a/deps/openssl/openssl/crypto/bn/bn_recp.c
+++ b/deps/openssl/openssl/crypto/bn/bn_recp.c
@@ -1,5 +1,5 @@
/*
- * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -21,22 +21,23 @@ BN_RECP_CTX *BN_RECP_CTX_new(void)
{
BN_RECP_CTX *ret;
- if ((ret = OPENSSL_zalloc(sizeof(*ret))) == NULL)
- return (NULL);
+ if ((ret = OPENSSL_zalloc(sizeof(*ret))) == NULL) {
+ BNerr(BN_F_BN_RECP_CTX_NEW, ERR_R_MALLOC_FAILURE);
+ return NULL;
+ }
bn_init(&(ret->N));
bn_init(&(ret->Nr));
ret->flags = BN_FLG_MALLOCED;
- return (ret);
+ return ret;
}
void BN_RECP_CTX_free(BN_RECP_CTX *recp)
{
if (recp == NULL)
return;
-
- BN_free(&(recp->N));
- BN_free(&(recp->Nr));
+ BN_free(&recp->N);
+ BN_free(&recp->Nr);
if (recp->flags & BN_FLG_MALLOCED)
OPENSSL_free(recp);
}
@@ -48,7 +49,7 @@ int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx)
BN_zero(&(recp->Nr));
recp->num_bits = BN_num_bits(d);
recp->shift = 0;
- return (1);
+ return 1;
}
int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y,
@@ -77,7 +78,7 @@ int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y,
err:
BN_CTX_end(ctx);
bn_check_top(r);
- return (ret);
+ return ret;
}
int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
@@ -87,17 +88,11 @@ int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
BIGNUM *a, *b, *d, *r;
BN_CTX_start(ctx);
+ d = (dv != NULL) ? dv : BN_CTX_get(ctx);
+ r = (rem != NULL) ? rem : BN_CTX_get(ctx);
a = BN_CTX_get(ctx);
b = BN_CTX_get(ctx);
- if (dv != NULL)
- d = dv;
- else
- d = BN_CTX_get(ctx);
- if (rem != NULL)
- r = rem;
- else
- r = BN_CTX_get(ctx);
- if (a == NULL || b == NULL || d == NULL || r == NULL)
+ if (b == NULL)
goto err;
if (BN_ucmp(m, &(recp->N)) < 0) {
@@ -107,7 +102,7 @@ int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
return 0;
}
BN_CTX_end(ctx);
- return (1);
+ return 1;
}
/*
@@ -167,7 +162,7 @@ int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
BN_CTX_end(ctx);
bn_check_top(dv);
bn_check_top(rem);
- return (ret);
+ return ret;
}
/*
@@ -195,5 +190,5 @@ int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx)
err:
bn_check_top(r);
BN_CTX_end(ctx);
- return (ret);
+ return ret;
}
diff --git a/deps/openssl/openssl/crypto/bn/bn_shift.c b/deps/openssl/openssl/crypto/bn/bn_shift.c
index 6a1eec80af..15d4b321ba 100644
--- a/deps/openssl/openssl/crypto/bn/bn_shift.c
+++ b/deps/openssl/openssl/crypto/bn/bn_shift.c
@@ -21,11 +21,11 @@ int BN_lshift1(BIGNUM *r, const BIGNUM *a)
if (r != a) {
r->neg = a->neg;
if (bn_wexpand(r, a->top + 1) == NULL)
- return (0);
+ return 0;
r->top = a->top;
} else {
if (bn_wexpand(r, a->top + 1) == NULL)
- return (0);
+ return 0;
}
ap = a->d;
rp = r->d;
@@ -40,7 +40,7 @@ int BN_lshift1(BIGNUM *r, const BIGNUM *a)
r->top++;
}
bn_check_top(r);
- return (1);
+ return 1;
}
int BN_rshift1(BIGNUM *r, const BIGNUM *a)
@@ -53,14 +53,14 @@ int BN_rshift1(BIGNUM *r, const BIGNUM *a)
if (BN_is_zero(a)) {
BN_zero(r);
- return (1);
+ return 1;
}
i = a->top;
ap = a->d;
j = i - (ap[i - 1] == 1);
if (a != r) {
if (bn_wexpand(r, j) == NULL)
- return (0);
+ return 0;
r->neg = a->neg;
}
rp = r->d;
@@ -77,7 +77,7 @@ int BN_rshift1(BIGNUM *r, const BIGNUM *a)
if (!r->top)
r->neg = 0; /* don't allow negative zero */
bn_check_top(r);
- return (1);
+ return 1;
}
int BN_lshift(BIGNUM *r, const BIGNUM *a, int n)
@@ -96,7 +96,7 @@ int BN_lshift(BIGNUM *r, const BIGNUM *a, int n)
nw = n / BN_BITS2;
if (bn_wexpand(r, a->top + nw + 1) == NULL)
- return (0);
+ return 0;
r->neg = a->neg;
lb = n % BN_BITS2;
rb = BN_BITS2 - lb;
@@ -116,7 +116,7 @@ int BN_lshift(BIGNUM *r, const BIGNUM *a, int n)
r->top = a->top + nw + 1;
bn_correct_top(r);
bn_check_top(r);
- return (1);
+ return 1;
}
int BN_rshift(BIGNUM *r, const BIGNUM *a, int n)
@@ -138,12 +138,12 @@ int BN_rshift(BIGNUM *r, const BIGNUM *a, int n)
lb = BN_BITS2 - rb;
if (nw >= a->top || a->top == 0) {
BN_zero(r);
- return (1);
+ return 1;
}
i = (BN_num_bits(a) - n + (BN_BITS2 - 1)) / BN_BITS2;
if (r != a) {
if (bn_wexpand(r, i) == NULL)
- return (0);
+ return 0;
r->neg = a->neg;
} else {
if (n == 0)
@@ -171,5 +171,5 @@ int BN_rshift(BIGNUM *r, const BIGNUM *a, int n)
if (!r->top)
r->neg = 0; /* don't allow negative zero */
bn_check_top(r);
- return (1);
+ return 1;
}
diff --git a/deps/openssl/openssl/crypto/bn/bn_sqr.c b/deps/openssl/openssl/crypto/bn/bn_sqr.c
index db72bf28a6..0c0a590f0c 100644
--- a/deps/openssl/openssl/crypto/bn/bn_sqr.c
+++ b/deps/openssl/openssl/crypto/bn/bn_sqr.c
@@ -42,7 +42,7 @@ int bn_sqr_fixed_top(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx)
BN_CTX_start(ctx);
rr = (a != r) ? r : BN_CTX_get(ctx);
tmp = BN_CTX_get(ctx);
- if (!rr || !tmp)
+ if (rr == NULL || tmp == NULL)
goto err;
max = 2 * al; /* Non-zero (from above) */
@@ -102,7 +102,7 @@ int bn_sqr_fixed_top(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx)
bn_check_top(rr);
bn_check_top(tmp);
BN_CTX_end(ctx);
- return (ret);
+ return ret;
}
/* tmp must have 2*n words */
diff --git a/deps/openssl/openssl/crypto/bn/bn_sqrt.c b/deps/openssl/openssl/crypto/bn/bn_sqrt.c
index 84376c78e5..b97d8ca43b 100644
--- a/deps/openssl/openssl/crypto/bn/bn_sqrt.c
+++ b/deps/openssl/openssl/crypto/bn/bn_sqrt.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2000-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2000-2018 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -39,7 +39,7 @@ BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
}
BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME);
- return (NULL);
+ return NULL;
}
if (BN_is_zero(a) || BN_is_one(a)) {
@@ -179,7 +179,7 @@ BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
if (!BN_set_word(y, i))
goto end;
} else {
- if (!BN_pseudo_rand(y, BN_num_bits(p), 0, 0))
+ if (!BN_priv_rand(y, BN_num_bits(p), 0, 0))
goto end;
if (BN_ucmp(y, p) >= 0) {
if (!(p->neg ? BN_add : BN_sub) (y, y, p))
diff --git a/deps/openssl/openssl/crypto/bn/bn_srp.c b/deps/openssl/openssl/crypto/bn/bn_srp.c
index 58b1691eee..27b6ebe518 100644
--- a/deps/openssl/openssl/crypto/bn/bn_srp.c
+++ b/deps/openssl/openssl/crypto/bn/bn_srp.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2014-2017 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -8,12 +8,12 @@
*/
#include "bn_lcl.h"
-#include "e_os.h"
+#include "internal/nelem.h"
#ifndef OPENSSL_NO_SRP
#include <openssl/srp.h>
-#include <internal/bn_srp.h>
+#include "internal/bn_srp.h"
# if (BN_BYTES == 8)
# if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
diff --git a/deps/openssl/openssl/crypto/bn/bn_word.c b/deps/openssl/openssl/crypto/bn/bn_word.c
index 1af13a53fb..262d7668fc 100644
--- a/deps/openssl/openssl/crypto/bn/bn_word.c
+++ b/deps/openssl/openssl/crypto/bn/bn_word.c
@@ -55,7 +55,7 @@ BN_ULONG BN_mod_word(const BIGNUM *a, BN_ULONG w)
(BN_ULLONG) w);
#endif
}
- return ((BN_ULONG)ret);
+ return (BN_ULONG)ret;
}
BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w)
@@ -92,7 +92,7 @@ BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w)
if (!a->top)
a->neg = 0; /* don't allow negative zero */
bn_check_top(a);
- return (ret);
+ return ret;
}
int BN_add_word(BIGNUM *a, BN_ULONG w)
@@ -115,7 +115,7 @@ int BN_add_word(BIGNUM *a, BN_ULONG w)
i = BN_sub_word(a, w);
if (!BN_is_zero(a))
a->neg = !(a->neg);
- return (i);
+ return i;
}
for (i = 0; w != 0 && i < a->top; i++) {
a->d[i] = l = (a->d[i] + w) & BN_MASK2;
@@ -128,7 +128,7 @@ int BN_add_word(BIGNUM *a, BN_ULONG w)
a->d[i] = w;
}
bn_check_top(a);
- return (1);
+ return 1;
}
int BN_sub_word(BIGNUM *a, BN_ULONG w)
@@ -153,13 +153,13 @@ int BN_sub_word(BIGNUM *a, BN_ULONG w)
a->neg = 0;
i = BN_add_word(a, w);
a->neg = 1;
- return (i);
+ return i;
}
if ((a->top == 1) && (a->d[0] < w)) {
a->d[0] = w - a->d[0];
a->neg = 1;
- return (1);
+ return 1;
}
i = 0;
for (;;) {
@@ -175,7 +175,7 @@ int BN_sub_word(BIGNUM *a, BN_ULONG w)
if ((a->d[i] == 0) && (i == (a->top - 1)))
a->top--;
bn_check_top(a);
- return (1);
+ return 1;
}
int BN_mul_word(BIGNUM *a, BN_ULONG w)
@@ -191,11 +191,11 @@ int BN_mul_word(BIGNUM *a, BN_ULONG w)
ll = bn_mul_words(a->d, a->d, a->top, w);
if (ll) {
if (bn_wexpand(a, a->top + 1) == NULL)
- return (0);
+ return 0;
a->d[a->top++] = ll;
}
}
}
bn_check_top(a);
- return (1);
+ return 1;
}
diff --git a/deps/openssl/openssl/crypto/bn/bn_x931p.c b/deps/openssl/openssl/crypto/bn/bn_x931p.c
index d01f12cadc..9eb8384fde 100644
--- a/deps/openssl/openssl/crypto/bn/bn_x931p.c
+++ b/deps/openssl/openssl/crypto/bn/bn_x931p.c
@@ -62,10 +62,10 @@ int BN_X931_derive_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2,
return 0;
BN_CTX_start(ctx);
- if (!p1)
+ if (p1 == NULL)
p1 = BN_CTX_get(ctx);
- if (!p2)
+ if (p2 == NULL)
p2 = BN_CTX_get(ctx);
t = BN_CTX_get(ctx);
@@ -173,7 +173,7 @@ int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx)
* - 1. By setting the top two bits we ensure that the lower bound is
* exceeded.
*/
- if (!BN_rand(Xp, nbits, BN_RAND_TOP_TWO, BN_RAND_BOTTOM_ANY))
+ if (!BN_priv_rand(Xp, nbits, BN_RAND_TOP_TWO, BN_RAND_BOTTOM_ANY))
goto err;
BN_CTX_start(ctx);
@@ -182,7 +182,7 @@ int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx)
goto err;
for (i = 0; i < 1000; i++) {
- if (!BN_rand(Xq, nbits, BN_RAND_TOP_TWO, BN_RAND_BOTTOM_ANY))
+ if (!BN_priv_rand(Xq, nbits, BN_RAND_TOP_TWO, BN_RAND_BOTTOM_ANY))
goto err;
/* Check that |Xp - Xq| > 2^(nbits - 100) */
@@ -227,9 +227,9 @@ int BN_X931_generate_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2,
if (Xp1 == NULL || Xp2 == NULL)
goto error;
- if (!BN_rand(Xp1, 101, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY))
+ if (!BN_priv_rand(Xp1, 101, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY))
goto error;
- if (!BN_rand(Xp2, 101, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY))
+ if (!BN_priv_rand(Xp2, 101, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY))
goto error;
if (!BN_X931_derive_prime_ex(p, p1, p2, Xp, Xp1, Xp2, e, ctx, cb))
goto error;
diff --git a/deps/openssl/openssl/crypto/bn/build.info b/deps/openssl/openssl/crypto/bn/build.info
index c608ecce82..a463eddabb 100644
--- a/deps/openssl/openssl/crypto/bn/build.info
+++ b/deps/openssl/openssl/crypto/bn/build.info
@@ -11,16 +11,16 @@ INCLUDE[../../libcrypto]=../../crypto/include
INCLUDE[bn_exp.o]=..
GENERATE[bn-586.s]=asm/bn-586.pl \
- $(PERLASM_SCHEME) $(CFLAGS) $(LIB_CFLAGS) $(PROCESSOR)
+ $(PERLASM_SCHEME) $(LIB_CFLAGS) $(LIB_CPPFLAGS) $(PROCESSOR)
DEPEND[bn-586.s]=../perlasm/x86asm.pl
GENERATE[co-586.s]=asm/co-586.pl \
- $(PERLASM_SCHEME) $(CFLAGS) $(LIB_CFLAGS) $(PROCESSOR)
+ $(PERLASM_SCHEME) $(LIB_CFLAGS) $(LIB_CPPFLAGS) $(PROCESSOR)
DEPEND[co-586.s]=../perlasm/x86asm.pl
GENERATE[x86-mont.s]=asm/x86-mont.pl \
- $(PERLASM_SCHEME) $(CFLAGS) $(LIB_CFLAGS) $(PROCESSOR)
+ $(PERLASM_SCHEME) $(LIB_CFLAGS) $(LIB_CPPFLAGS) $(PROCESSOR)
DEPEND[x86-mont.s]=../perlasm/x86asm.pl
GENERATE[x86-gf2m.s]=asm/x86-gf2m.pl \
- $(PERLASM_SCHEME) $(CFLAGS) $(LIB_CFLAGS) $(PROCESSOR)
+ $(PERLASM_SCHEME) $(LIB_CFLAGS) $(LIB_CPPFLAGS) $(PROCESSOR)
DEPEND[x86-gf2m.s]=../perlasm/x86asm.pl
GENERATE[sparcv9a-mont.S]=asm/sparcv9a-mont.pl $(PERLASM_SCHEME)
@@ -34,8 +34,10 @@ INCLUDE[sparct4-mont.o]=..
GENERATE[sparcv9-gf2m.S]=asm/sparcv9-gf2m.pl $(PERLASM_SCHEME)
INCLUDE[sparcv9-gf2m.o]=..
-GENERATE[bn-mips.s]=asm/mips.pl $(PERLASM_SCHEME)
-GENERATE[mips-mont.s]=asm/mips-mont.pl $(PERLASM_SCHEME)
+GENERATE[bn-mips.S]=asm/mips.pl $(PERLASM_SCHEME)
+INCLUDE[bn-mips.o]=..
+GENERATE[mips-mont.S]=asm/mips-mont.pl $(PERLASM_SCHEME)
+INCLUDE[mips-mont.o]=..
GENERATE[s390x-mont.S]=asm/s390x-mont.pl $(PERLASM_SCHEME)
GENERATE[s390x-gf2m.s]=asm/s390x-gf2m.pl $(PERLASM_SCHEME)
@@ -47,7 +49,7 @@ GENERATE[rsaz-x86_64.s]=asm/rsaz-x86_64.pl $(PERLASM_SCHEME)
GENERATE[rsaz-avx2.s]=asm/rsaz-avx2.pl $(PERLASM_SCHEME)
GENERATE[bn-ia64.s]=asm/ia64.S
-GENERATE[ia64-mont.s]=asm/ia64-mont.pl $(CFLAGS) $(LIB_CFLAGS)
+GENERATE[ia64-mont.s]=asm/ia64-mont.pl $(LIB_CFLAGS) $(LIB_CPPFLAGS)
GENERATE[parisc-mont.s]=asm/parisc-mont.pl $(PERLASM_SCHEME)
@@ -63,22 +65,3 @@ INCLUDE[armv4-mont.o]=..
GENERATE[armv4-gf2m.S]=asm/armv4-gf2m.pl $(PERLASM_SCHEME)
INCLUDE[armv4-gf2m.o]=..
GENERATE[armv8-mont.S]=asm/armv8-mont.pl $(PERLASM_SCHEME)
-
-OVERRIDES=bn-mips3.o pa-risc2W.o pa-risc2.c
-BEGINRAW[Makefile]
-##### BN assembler implementations
-
-{- $builddir -}/bn-mips3.o: {- $sourcedir -}/asm/mips3.s
- @if [ "$(CC)" = "gcc" ]; then \
- ABI=`expr "$(CFLAGS)" : ".*-mabi=\([n3264]*\)"` && \
- as -$$ABI -O -o $@ {- $sourcedir -}/asm/mips3.s; \
- else $(CC) -c $(CFLAGS) $(LIB_CFLAGS) -o $@ {- $sourcedir -}/asm/mips3.s; fi
-
-# GNU assembler fails to compile PA-RISC2 modules, insist on calling
-# vendor assembler...
-{- $builddir -}/pa-risc2W.o: {- $sourcedir -}/asm/pa-risc2W.s
- CC="$(CC)" $(PERL) $(SRCDIR)/util/fipsas.pl $(SRCDIR) $< /usr/ccs/bin/as -o pa-risc2W.o {- $sourcedir -}/asm/pa-risc2W.s
-{- $builddir -}/pa-risc2.o: {- $sourcedir -}/asm/pa-risc2.s
- CC="$(CC)" $(PERL) $(SRCDIR)/util/fipsas.pl $(SRCDIR) $< /usr/ccs/bin/as -o pa-risc2.o {- $sourcedir -}/asm/pa-risc2.s
-
-ENDRAW[Makefile]
diff --git a/deps/openssl/openssl/crypto/bn/rsaz_exp.c b/deps/openssl/openssl/crypto/bn/rsaz_exp.c
index 1a70f6cade..22455b8a63 100644
--- a/deps/openssl/openssl/crypto/bn/rsaz_exp.c
+++ b/deps/openssl/openssl/crypto/bn/rsaz_exp.c
@@ -1,54 +1,17 @@
/*
* Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright (c) 2012, Intel Corporation. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
* in the file LICENSE in the source distribution or at
* https://www.openssl.org/source/license.html
+ *
+ * Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1)
+ * (1) Intel Corporation, Israel Development Center, Haifa, Israel
+ * (2) University of Haifa, Israel
*/
-/*****************************************************************************
-* *
-* Copyright (c) 2012, Intel Corporation *
-* *
-* All rights reserved. *
-* *
-* Redistribution and use in source and binary forms, with or without *
-* modification, are permitted provided that the following conditions are *
-* met: *
-* *
-* * Redistributions of source code must retain the above copyright *
-* notice, this list of conditions and the following disclaimer. *
-* *
-* * Redistributions in binary form must reproduce the above copyright *
-* notice, this list of conditions and the following disclaimer in the *
-* documentation and/or other materials provided with the *
-* distribution. *
-* *
-* * Neither the name of the Intel Corporation nor the names of its *
-* contributors may be used to endorse or promote products derived from *
-* this software without specific prior written permission. *
-* *
-* *
-* THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY *
-* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE *
-* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR *
-* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR *
-* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
-* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, *
-* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR *
-* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF *
-* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
-* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
-* *
-******************************************************************************
-* Developers and authors: *
-* Shay Gueron (1, 2), and Vlad Krasnov (1) *
-* (1) Intel Corporation, Israel Development Center, Haifa, Israel *
-* (2) University of Haifa, Israel *
-*****************************************************************************/
-
#include <openssl/opensslconf.h>
#include "rsaz_exp.h"
diff --git a/deps/openssl/openssl/crypto/bn/rsaz_exp.h b/deps/openssl/openssl/crypto/bn/rsaz_exp.h
index 9501cc8089..c5864f8aaa 100644
--- a/deps/openssl/openssl/crypto/bn/rsaz_exp.h
+++ b/deps/openssl/openssl/crypto/bn/rsaz_exp.h
@@ -1,54 +1,17 @@
/*
- * Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2013-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright (c) 2012, Intel Corporation. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
* in the file LICENSE in the source distribution or at
* https://www.openssl.org/source/license.html
+ *
+ * Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1)
+ * (1) Intel Corporation, Israel Development Center, Haifa, Israel
+ * (2) University of Haifa, Israel
*/
-/*****************************************************************************
-* *
-* Copyright (c) 2012, Intel Corporation *
-* *
-* All rights reserved. *
-* *
-* Redistribution and use in source and binary forms, with or without *
-* modification, are permitted provided that the following conditions are *
-* met: *
-* *
-* * Redistributions of source code must retain the above copyright *
-* notice, this list of conditions and the following disclaimer. *
-* *
-* * Redistributions in binary form must reproduce the above copyright *
-* notice, this list of conditions and the following disclaimer in the *
-* documentation and/or other materials provided with the *
-* distribution. *
-* *
-* * Neither the name of the Intel Corporation nor the names of its *
-* contributors may be used to endorse or promote products derived from *
-* this software without specific prior written permission. *
-* *
-* *
-* THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY *
-* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE *
-* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR *
-* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR *
-* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
-* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, *
-* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR *
-* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF *
-* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
-* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
-* *
-******************************************************************************
-* Developers and authors: *
-* Shay Gueron (1, 2), and Vlad Krasnov (1) *
-* (1) Intel Corporation, Israel Development Center, Haifa, Israel *
-* (2) University of Haifa, Israel *
-*****************************************************************************/
-
#ifndef RSAZ_EXP_H
# define RSAZ_EXP_H
@@ -65,7 +28,7 @@ void RSAZ_1024_mod_exp_avx2(BN_ULONG result[16],
const BN_ULONG exponent[16],
const BN_ULONG m_norm[16], const BN_ULONG RR[16],
BN_ULONG k0);
-int rsaz_avx2_eligible();
+int rsaz_avx2_eligible(void);
void RSAZ_512_mod_exp(BN_ULONG result[8],
const BN_ULONG base_norm[8], const BN_ULONG exponent[8],