diff options
Diffstat (limited to 'deps/openssl/openssl/crypto/bn')
67 files changed, 3970 insertions, 5742 deletions
diff --git a/deps/openssl/openssl/crypto/bn/README.pod b/deps/openssl/openssl/crypto/bn/README.pod index 109ab0d914..706a140342 100644 --- a/deps/openssl/openssl/crypto/bn/README.pod +++ b/deps/openssl/openssl/crypto/bn/README.pod @@ -6,7 +6,7 @@ bn_mul_words, bn_mul_add_words, bn_sqr_words, bn_div_words, bn_add_words, bn_sub_words, bn_mul_comba4, bn_mul_comba8, bn_sqr_comba4, bn_sqr_comba8, bn_cmp_words, bn_mul_normal, bn_mul_low_normal, bn_mul_recursive, bn_mul_part_recursive, -bn_mul_low_recursive, bn_mul_high, bn_sqr_normal, bn_sqr_recursive, +bn_mul_low_recursive, bn_sqr_normal, bn_sqr_recursive, bn_expand, bn_wexpand, bn_expand2, bn_fix_top, bn_check_top, bn_print, bn_dump, bn_set_max, bn_set_high, bn_set_low - BIGNUM library internal functions @@ -41,8 +41,6 @@ library internal functions int n, int tna, int tnb, BN_ULONG *tmp); void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, BN_ULONG *tmp); - void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, - int n2, BN_ULONG *tmp); void bn_sqr_normal(BN_ULONG *r, BN_ULONG *a, int n, BN_ULONG *tmp); void bn_sqr_recursive(BN_ULONG *r, BN_ULONG *a, int n2, BN_ULONG *tmp); @@ -178,10 +176,6 @@ bn_mul_low_recursive(B<r>, B<a>, B<b>, B<n2>, B<tmp>) operates on the B<n2> word arrays B<r> and B<tmp> and the B<n2>/2 word arrays B<a> and B<b>. -bn_mul_high(B<r>, B<a>, B<b>, B<l>, B<n2>, B<tmp>) operates on the -B<n2> word arrays B<r>, B<a>, B<b> and B<l> (?) and the 3*B<n2> word -array B<tmp>. - BN_mul() calls bn_mul_normal(), or an optimized implementation if the factors have the same size: bn_mul_comba8() is used if they are 8 words long, bn_mul_recursive() if they are larger than diff --git a/deps/openssl/openssl/crypto/bn/asm/alpha-mont.pl b/deps/openssl/openssl/crypto/bn/asm/alpha-mont.pl index 9632133090..c9b962a150 100644 --- a/deps/openssl/openssl/crypto/bn/asm/alpha-mont.pl +++ b/deps/openssl/openssl/crypto/bn/asm/alpha-mont.pl @@ -8,7 +8,7 @@ # # ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. diff --git a/deps/openssl/openssl/crypto/bn/asm/armv4-gf2m.pl b/deps/openssl/openssl/crypto/bn/asm/armv4-gf2m.pl index 0bb5433075..7a0cdb2e8a 100644 --- a/deps/openssl/openssl/crypto/bn/asm/armv4-gf2m.pl +++ b/deps/openssl/openssl/crypto/bn/asm/armv4-gf2m.pl @@ -36,7 +36,7 @@ # # Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software # Polynomial Multiplication on ARM Processors using the NEON Engine. -# +# # http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf $flavour = shift; diff --git a/deps/openssl/openssl/crypto/bn/asm/armv4-mont.pl b/deps/openssl/openssl/crypto/bn/asm/armv4-mont.pl index ddee8b7fa1..6bedc62ba6 100644 --- a/deps/openssl/openssl/crypto/bn/asm/armv4-mont.pl +++ b/deps/openssl/openssl/crypto/bn/asm/armv4-mont.pl @@ -23,7 +23,7 @@ # [depending on key length, less for longer keys] on ARM920T, and # +115-80% on Intel IXP425. This is compared to pre-bn_mul_mont code # base and compiler generated code with in-lined umull and even umlal -# instructions. The latter means that this code didn't really have an +# instructions. The latter means that this code didn't really have an # "advantage" of utilizing some "secret" instruction. # # The code is interoperable with Thumb ISA and is rather compact, less diff --git a/deps/openssl/openssl/crypto/bn/asm/bn-586.pl b/deps/openssl/openssl/crypto/bn/asm/bn-586.pl index 1ca1bbf7d4..58effc8808 100644 --- a/deps/openssl/openssl/crypto/bn/asm/bn-586.pl +++ b/deps/openssl/openssl/crypto/bn/asm/bn-586.pl @@ -14,7 +14,7 @@ require "x86asm.pl"; $output = pop; open STDOUT,">$output"; -&asm_init($ARGV[0],$0); +&asm_init($ARGV[0]); $sse2=0; for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } @@ -54,7 +54,7 @@ sub bn_mul_add_words &movd("mm0",&wparam(3)); # mm0 = w &pxor("mm1","mm1"); # mm1 = carry_in &jmp(&label("maw_sse2_entry")); - + &set_label("maw_sse2_unrolled",16); &movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0] &paddq("mm1","mm3"); # mm1 = carry_in + r[0] @@ -675,20 +675,20 @@ sub bn_sub_part_words &adc($c,0); &mov(&DWP($i*4,$r,"",0),$tmp1); # *r } - + &comment(""); &add($b,32); &add($r,32); &sub($num,8); &jnz(&label("pw_neg_loop")); - + &set_label("pw_neg_finish",0); &mov($tmp2,&wparam(4)); # get dl &mov($num,0); &sub($num,$tmp2); &and($num,7); &jz(&label("pw_end")); - + for ($i=0; $i<7; $i++) { &comment("dl<0 Tail Round $i"); @@ -705,9 +705,9 @@ sub bn_sub_part_words } &jmp(&label("pw_end")); - + &set_label("pw_pos",0); - + &and($num,0xfffffff8); # num / 8 &jz(&label("pw_pos_finish")); @@ -722,18 +722,18 @@ sub bn_sub_part_words &mov(&DWP($i*4,$r,"",0),$tmp1); # *r &jnc(&label("pw_nc".$i)); } - + &comment(""); &add($a,32); &add($r,32); &sub($num,8); &jnz(&label("pw_pos_loop")); - + &set_label("pw_pos_finish",0); &mov($num,&wparam(4)); # get dl &and($num,7); &jz(&label("pw_end")); - + for ($i=0; $i<7; $i++) { &comment("dl>0 Tail Round $i"); @@ -754,17 +754,17 @@ sub bn_sub_part_words &mov(&DWP($i*4,$r,"",0),$tmp1); # *r &set_label("pw_nc".$i,0); } - + &comment(""); &add($a,32); &add($r,32); &sub($num,8); &jnz(&label("pw_nc_loop")); - + &mov($num,&wparam(4)); # get dl &and($num,7); &jz(&label("pw_nc_end")); - + for ($i=0; $i<7; $i++) { &mov($tmp1,&DWP($i*4,$a,"",0)); # *a diff --git a/deps/openssl/openssl/crypto/bn/asm/c64xplus-gf2m.pl b/deps/openssl/openssl/crypto/bn/asm/c64xplus-gf2m.pl index c0e5400807..9c46da3af8 100644 --- a/deps/openssl/openssl/crypto/bn/asm/c64xplus-gf2m.pl +++ b/deps/openssl/openssl/crypto/bn/asm/c64xplus-gf2m.pl @@ -43,7 +43,7 @@ $code.=<<___; SHRU $A,16, $Ahi ; smash $A to two halfwords || EXTU $A,16,16,$Alo - XORMPY $Alo,$B_2,$Alox2 ; 16x8 bits muliplication + XORMPY $Alo,$B_2,$Alox2 ; 16x8 bits multiplication || XORMPY $Ahi,$B_2,$Ahix2 || EXTU $B,16,24,$B_1 XORMPY $Alo,$B_0,$Alox0 diff --git a/deps/openssl/openssl/crypto/bn/asm/co-586.pl b/deps/openssl/openssl/crypto/bn/asm/co-586.pl index 60d0363660..97f5e3a19f 100644 --- a/deps/openssl/openssl/crypto/bn/asm/co-586.pl +++ b/deps/openssl/openssl/crypto/bn/asm/co-586.pl @@ -13,7 +13,7 @@ require "x86asm.pl"; $output = pop; open STDOUT,">$output"; -&asm_init($ARGV[0],$0); +&asm_init($ARGV[0]); &bn_mul_comba("bn_mul_comba8",8); &bn_mul_comba("bn_mul_comba4",4); @@ -47,7 +47,7 @@ sub mul_add_c &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b ### &adc($c2,0); - # is pos > 1, it means it is the last loop + # is pos > 1, it means it is the last loop &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[]; &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a } @@ -76,7 +76,7 @@ sub sqr_add_c &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb); ### &adc($c2,0); - # is pos > 1, it means it is the last loop + # is pos > 1, it means it is the last loop &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b } @@ -127,7 +127,7 @@ sub bn_mul_comba $c2="ebp"; $a="esi"; $b="edi"; - + $as=0; $ae=0; $bs=0; @@ -142,9 +142,9 @@ sub bn_mul_comba &push("ebx"); &xor($c0,$c0); - &mov("eax",&DWP(0,$a,"",0)); # load the first word + &mov("eax",&DWP(0,$a,"",0)); # load the first word &xor($c1,$c1); - &mov("edx",&DWP(0,$b,"",0)); # load the first second + &mov("edx",&DWP(0,$b,"",0)); # load the first second for ($i=0; $i<$tot; $i++) { @@ -152,7 +152,7 @@ sub bn_mul_comba $bi=$bs; $end=$be+1; - &comment("################## Calculate word $i"); + &comment("################## Calculate word $i"); for ($j=$bs; $j<$end; $j++) { diff --git a/deps/openssl/openssl/crypto/bn/asm/ia64-mont.pl b/deps/openssl/openssl/crypto/bn/asm/ia64-mont.pl index 0df1fad115..ec486f7779 100644 --- a/deps/openssl/openssl/crypto/bn/asm/ia64-mont.pl +++ b/deps/openssl/openssl/crypto/bn/asm/ia64-mont.pl @@ -8,7 +8,7 @@ # # ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -80,7 +80,7 @@ $code=<<___; // int bn_mul_mont (BN_ULONG *rp,const BN_ULONG *ap, // const BN_ULONG *bp,const BN_ULONG *np, -// const BN_ULONG *n0p,int num); +// const BN_ULONG *n0p,int num); .align 64 .global bn_mul_mont# .proc bn_mul_mont# @@ -203,7 +203,7 @@ bn_mul_mont_general: { .mmi; .pred.rel "mutex",p39,p41 (p39) add topbit=r0,r0 (p41) add topbit=r0,r0,1 - nop.i 0 } + nop.i 0 } { .mmi; st8 [tp_1]=n[0] add tptr=16,sp add tp_1=8,sp };; diff --git a/deps/openssl/openssl/crypto/bn/asm/ia64.S b/deps/openssl/openssl/crypto/bn/asm/ia64.S index f2404a3c1e..d235c45e2d 100644 --- a/deps/openssl/openssl/crypto/bn/asm/ia64.S +++ b/deps/openssl/openssl/crypto/bn/asm/ia64.S @@ -1,9 +1,9 @@ .explicit .text .ident "ia64.S, Version 2.1" -.ident "IA-64 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" +.ident "IA-64 ISA artwork by Andy Polyakov <appro@openssl.org>" -// Copyright 2001-2016 The OpenSSL Project Authors. All Rights Reserved. +// Copyright 2001-2018 The OpenSSL Project Authors. All Rights Reserved. // // Licensed under the OpenSSL license (the "License"). You may not use // this file except in compliance with the License. You can obtain a copy @@ -12,7 +12,7 @@ // // ==================================================================== -// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL // project. // // Rights for redistribution and usage in source and binary forms are @@ -20,7 +20,7 @@ // disclaimed. // ==================================================================== // -// Version 2.x is Itanium2 re-tune. Few words about how Itanum2 is +// Version 2.x is Itanium2 re-tune. Few words about how Itanium2 is // different from Itanium to this module viewpoint. Most notably, is it // "wider" than Itanium? Can you experience loop scalability as // discussed in commentary sections? Not really:-( Itanium2 has 6 @@ -141,7 +141,7 @@ // User Mask I want to excuse the kernel from preserving upper // (f32-f128) FP register bank over process context switch, thus // minimizing bus bandwidth consumption during the switch (i.e. -// after PKI opration completes and the program is off doing +// after PKI operation completes and the program is off doing // something else like bulk symmetric encryption). Having said // this, I also want to point out that it might be good idea // to compile the whole toolkit (as well as majority of the @@ -157,12 +157,15 @@ #else #define ADDP add #endif +#ifdef __VMS +.alias abort, "decc$abort" +#endif #if 1 // // bn_[add|sub]_words routines. // -// Loops are spinning in 2*(n+5) ticks on Itanuim (provided that the +// Loops are spinning in 2*(n+5) ticks on Itanium (provided that the // data reside in L1 cache, i.e. 2 ticks away). It's possible to // compress the epilogue and get down to 2*n+6, but at the cost of // scalability (the neat feature of this implementation is that it @@ -500,7 +503,7 @@ bn_sqr_words: // possible to compress the epilogue (I'm getting tired to write this // comment over and over) and get down to 2*n+16 at the cost of // scalability. The decision will very likely be reconsidered after the -// benchmark program is profiled. I.e. if perfomance gain on Itanium +// benchmark program is profiled. I.e. if performance gain on Itanium // will appear larger than loss on "wider" IA-64, then the loop should // be explicitly split and the epilogue compressed. .L_bn_sqr_words_ctop: @@ -936,7 +939,7 @@ bn_mul_comba8: xma.hu f118=f39,f127,f117 } { .mfi; xma.lu f117=f39,f127,f117 };;// //-------------------------------------------------// -// Leaving muliplier's heaven... Quite a ride, huh? +// Leaving multiplier's heaven... Quite a ride, huh? { .mii; getf.sig r31=f47 add r25=r25,r24 @@ -1428,6 +1431,7 @@ bn_div_words: mov ar.ec=0 // don't rotate at exit mov pr.rot=0 } { .mii; mov L=r33 // save l + mov r25=r0 // needed if abort is called on VMS mov r36=r0 };; .L_divw_shift: // -vv- note signed comparison @@ -1529,9 +1533,8 @@ bn_div_words: // output: f8 = (int)(a/b) // clobbered: f8,f9,f10,f11,pred pred=p15 -// One can argue that this snippet is copyrighted to Intel -// Corporation, as it's essentially identical to one of those -// found in "Divide, Square Root and Remainder" section at +// This snippet is based on text found in the "Divide, Square +// Root and Remainder" section at // http://www.intel.com/software/products/opensource/libraries/num.htm. // Yes, I admit that the referred code was used as template, // but after I realized that there hardly is any other instruction diff --git a/deps/openssl/openssl/crypto/bn/asm/mips-mont.pl b/deps/openssl/openssl/crypto/bn/asm/mips-mont.pl index e141e1a925..fbe5d04f71 100644 --- a/deps/openssl/openssl/crypto/bn/asm/mips-mont.pl +++ b/deps/openssl/openssl/crypto/bn/asm/mips-mont.pl @@ -56,14 +56,14 @@ $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64 if ($flavour =~ /64|n32/i) { - $PTR_ADD="dadd"; # incidentally works even on n32 - $PTR_SUB="dsub"; # incidentally works even on n32 + $PTR_ADD="daddu"; # incidentally works even on n32 + $PTR_SUB="dsubu"; # incidentally works even on n32 $REG_S="sd"; $REG_L="ld"; $SZREG=8; } else { - $PTR_ADD="add"; - $PTR_SUB="sub"; + $PTR_ADD="addu"; + $PTR_SUB="subu"; $REG_S="sw"; $REG_L="lw"; $SZREG=4; @@ -121,6 +121,8 @@ $m1=$s11; $FRAMESIZE=14; $code=<<___; +#include "mips_arch.h" + .text .set noat @@ -183,27 +185,27 @@ $code.=<<___; $PTR_SUB $sp,$num and $sp,$at - $MULTU $aj,$bi - $LD $alo,$BNSZ($ap) - $LD $nlo,$BNSZ($np) - mflo $lo0 - mfhi $hi0 - $MULTU $lo0,$n0 - mflo $m1 - - $MULTU $alo,$bi - mflo $alo - mfhi $ahi - - $MULTU $nj,$m1 - mflo $lo1 - mfhi $hi1 - $MULTU $nlo,$m1 + $MULTU ($aj,$bi) + $LD $ahi,$BNSZ($ap) + $LD $nhi,$BNSZ($np) + mflo ($lo0,$aj,$bi) + mfhi ($hi0,$aj,$bi) + $MULTU ($lo0,$n0) + mflo ($m1,$lo0,$n0) + + $MULTU ($ahi,$bi) + mflo ($alo,$ahi,$bi) + mfhi ($ahi,$ahi,$bi) + + $MULTU ($nj,$m1) + mflo ($lo1,$nj,$m1) + mfhi ($hi1,$nj,$m1) + $MULTU ($nhi,$m1) $ADDU $lo1,$lo0 sltu $at,$lo1,$lo0 $ADDU $hi1,$at - mflo $nlo - mfhi $nhi + mflo ($nlo,$nhi,$m1) + mfhi ($nhi,$nhi,$m1) move $tp,$sp li $j,2*$BNSZ @@ -215,25 +217,25 @@ $code.=<<___; $LD $aj,($aj) $LD $nj,($nj) - $MULTU $aj,$bi + $MULTU ($aj,$bi) $ADDU $lo0,$alo,$hi0 $ADDU $lo1,$nlo,$hi1 sltu $at,$lo0,$hi0 sltu $t0,$lo1,$hi1 $ADDU $hi0,$ahi,$at $ADDU $hi1,$nhi,$t0 - mflo $alo - mfhi $ahi + mflo ($alo,$aj,$bi) + mfhi ($ahi,$aj,$bi) $ADDU $lo1,$lo0 sltu $at,$lo1,$lo0 - $MULTU $nj,$m1 + $MULTU ($nj,$m1) $ADDU $hi1,$at addu $j,$BNSZ $ST $lo1,($tp) sltu $t0,$j,$num - mflo $nlo - mfhi $nhi + mflo ($nlo,$nj,$m1) + mfhi ($nhi,$nj,$m1) bnez $t0,.L1st $PTR_ADD $tp,$BNSZ @@ -263,34 +265,34 @@ $code.=<<___; $PTR_ADD $bi,$bp,$i $LD $bi,($bi) $LD $aj,($ap) - $LD $alo,$BNSZ($ap) + $LD $ahi,$BNSZ($ap) $LD $tj,($sp) - $MULTU $aj,$bi + $MULTU ($aj,$bi) $LD $nj,($np) - $LD $nlo,$BNSZ($np) - mflo $lo0 - mfhi $hi0 + $LD $nhi,$BNSZ($np) + mflo ($lo0,$aj,$bi) + mfhi ($hi0,$aj,$bi) $ADDU $lo0,$tj - $MULTU $lo0,$n0 + $MULTU ($lo0,$n0) sltu $at,$lo0,$tj $ADDU $hi0,$at - mflo $m1 + mflo ($m1,$lo0,$n0) - $MULTU $alo,$bi - mflo $alo - mfhi $ahi + $MULTU ($ahi,$bi) + mflo ($alo,$ahi,$bi) + mfhi ($ahi,$ahi,$bi) - $MULTU $nj,$m1 - mflo $lo1 - mfhi $hi1 + $MULTU ($nj,$m1) + mflo ($lo1,$nj,$m1) + mfhi ($hi1,$nj,$m1) - $MULTU $nlo,$m1 + $MULTU ($nhi,$m1) $ADDU $lo1,$lo0 sltu $at,$lo1,$lo0 $ADDU $hi1,$at - mflo $nlo - mfhi $nhi + mflo ($nlo,$nhi,$m1) + mfhi ($nhi,$nhi,$m1) move $tp,$sp li $j,2*$BNSZ @@ -303,19 +305,19 @@ $code.=<<___; $LD $aj,($aj) $LD $nj,($nj) - $MULTU $aj,$bi + $MULTU ($aj,$bi) $ADDU $lo0,$alo,$hi0 $ADDU $lo1,$nlo,$hi1 sltu $at,$lo0,$hi0 sltu $t0,$lo1,$hi1 $ADDU $hi0,$ahi,$at $ADDU $hi1,$nhi,$t0 - mflo $alo - mfhi $ahi + mflo ($alo,$aj,$bi) + mfhi ($ahi,$aj,$bi) $ADDU $lo0,$tj addu $j,$BNSZ - $MULTU $nj,$m1 + $MULTU ($nj,$m1) sltu $at,$lo0,$tj $ADDU $lo1,$lo0 $ADDU $hi0,$at @@ -323,8 +325,8 @@ $code.=<<___; $LD $tj,2*$BNSZ($tp) $ADDU $hi1,$t0 sltu $at,$j,$num - mflo $nlo - mfhi $nhi + mflo ($nlo,$nj,$m1) + mfhi ($nhi,$nj,$m1) $ST $lo1,($tp) bnez $at,.Linner $PTR_ADD $tp,$BNSZ diff --git a/deps/openssl/openssl/crypto/bn/asm/mips.pl b/deps/openssl/openssl/crypto/bn/asm/mips.pl index 420f01f3a4..da35ec1b30 100644 --- a/deps/openssl/openssl/crypto/bn/asm/mips.pl +++ b/deps/openssl/openssl/crypto/bn/asm/mips.pl @@ -8,7 +8,7 @@ # # ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. # # Rights for redistribution and usage in source and binary forms are @@ -42,7 +42,7 @@ # Performance improvement is astonishing! 'apps/openssl speed rsa dsa' # goes way over 3 times faster! # -# <appro@fy.chalmers.se> +# <appro@openssl.org> # October 2010 # @@ -109,6 +109,22 @@ $gp=$v1 if ($flavour =~ /nubi/i); $minus4=$v1; $code.=<<___; +#include "mips_arch.h" + +#if defined(_MIPS_ARCH_MIPS64R6) +# define ddivu(rs,rt) +# define mfqt(rd,rs,rt) ddivu rd,rs,rt +# define mfrm(rd,rs,rt) dmodu rd,rs,rt +#elif defined(_MIPS_ARCH_MIPS32R6) +# define divu(rs,rt) +# define mfqt(rd,rs,rt) divu rd,rs,rt +# define mfrm(rd,rs,rt) modu rd,rs,rt +#else +# define $DIVU(rs,rt) $DIVU $zero,rs,rt +# define mfqt(rd,rs,rt) mflo rd +# define mfrm(rd,rs,rt) mfhi rd +#endif + .rdata .asciiz "mips3.s, Version 1.2" .asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro\@fy.chalmers.se>" @@ -151,7 +167,7 @@ $code.=<<___; .L_bn_mul_add_words_loop: $LD $t0,0($a1) - $MULTU $t0,$a3 + $MULTU ($t0,$a3) $LD $t1,0($a0) $LD $t2,$BNSZ($a1) $LD $t3,$BNSZ($a0) @@ -161,11 +177,11 @@ $code.=<<___; sltu $v0,$t1,$v0 # All manuals say it "compares 32-bit # values", but it seems to work fine # even on 64-bit registers. - mflo $at - mfhi $t0 + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $t1,$at $ADDU $v0,$t0 - $MULTU $t2,$a3 + $MULTU ($t2,$a3) sltu $at,$t1,$at $ST $t1,0($a0) $ADDU $v0,$at @@ -174,11 +190,11 @@ $code.=<<___; $LD $ta3,3*$BNSZ($a0) $ADDU $t3,$v0 sltu $v0,$t3,$v0 - mflo $at - mfhi $t2 + mflo ($at,$t2,$a3) + mfhi ($t2,$t2,$a3) $ADDU $t3,$at $ADDU $v0,$t2 - $MULTU $ta0,$a3 + $MULTU ($ta0,$a3) sltu $at,$t3,$at $ST $t3,$BNSZ($a0) $ADDU $v0,$at @@ -188,11 +204,11 @@ $code.=<<___; $PTR_ADD $a1,4*$BNSZ $ADDU $ta1,$v0 sltu $v0,$ta1,$v0 - mflo $at - mfhi $ta0 + mflo ($at,$ta0,$a3) + mfhi ($ta0,$ta0,$a3) $ADDU $ta1,$at $ADDU $v0,$ta0 - $MULTU $ta2,$a3 + $MULTU ($ta2,$a3) sltu $at,$ta1,$at $ST $ta1,-2*$BNSZ($a0) $ADDU $v0,$at @@ -201,8 +217,8 @@ $code.=<<___; and $ta0,$a2,$minus4 $ADDU $ta3,$v0 sltu $v0,$ta3,$v0 - mflo $at - mfhi $ta2 + mflo ($at,$ta2,$a3) + mfhi ($ta2,$ta2,$a3) $ADDU $ta3,$at $ADDU $v0,$ta2 sltu $at,$ta3,$at @@ -217,13 +233,13 @@ $code.=<<___; .L_bn_mul_add_words_tail: .set reorder $LD $t0,0($a1) - $MULTU $t0,$a3 + $MULTU ($t0,$a3) $LD $t1,0($a0) subu $a2,1 $ADDU $t1,$v0 sltu $v0,$t1,$v0 - mflo $at - mfhi $t0 + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $t1,$at $ADDU $v0,$t0 sltu $at,$t1,$at @@ -232,13 +248,13 @@ $code.=<<___; beqz $a2,.L_bn_mul_add_words_return $LD $t0,$BNSZ($a1) - $MULTU $t0,$a3 + $MULTU ($t0,$a3) $LD $t1,$BNSZ($a0) subu $a2,1 $ADDU $t1,$v0 sltu $v0,$t1,$v0 - mflo $at - mfhi $t0 + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $t1,$at $ADDU $v0,$t0 sltu $at,$t1,$at @@ -247,12 +263,12 @@ $code.=<<___; beqz $a2,.L_bn_mul_add_words_return $LD $t0,2*$BNSZ($a1) - $MULTU $t0,$a3 + $MULTU ($t0,$a3) $LD $t1,2*$BNSZ($a0) $ADDU $t1,$v0 sltu $v0,$t1,$v0 - mflo $at - mfhi $t0 + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $t1,$at $ADDU $v0,$t0 sltu $at,$t1,$at @@ -310,40 +326,40 @@ $code.=<<___; .L_bn_mul_words_loop: $LD $t0,0($a1) - $MULTU $t0,$a3 + $MULTU ($t0,$a3) $LD $t2,$BNSZ($a1) $LD $ta0,2*$BNSZ($a1) $LD $ta2,3*$BNSZ($a1) - mflo $at - mfhi $t0 + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $v0,$at sltu $t1,$v0,$at - $MULTU $t2,$a3 + $MULTU ($t2,$a3) $ST $v0,0($a0) $ADDU $v0,$t1,$t0 subu $a2,4 $PTR_ADD $a0,4*$BNSZ $PTR_ADD $a1,4*$BNSZ - mflo $at - mfhi $t2 + mflo ($at,$t2,$a3) + mfhi ($t2,$t2,$a3) $ADDU $v0,$at sltu $t3,$v0,$at - $MULTU $ta0,$a3 + $MULTU ($ta0,$a3) $ST $v0,-3*$BNSZ($a0) $ADDU $v0,$t3,$t2 - mflo $at - mfhi $ta0 + mflo ($at,$ta0,$a3) + mfhi ($ta0,$ta0,$a3) $ADDU $v0,$at sltu $ta1,$v0,$at - $MULTU $ta2,$a3 + $MULTU ($ta2,$a3) $ST $v0,-2*$BNSZ($a0) $ADDU $v0,$ta1,$ta0 and $ta0,$a2,$minus4 - mflo $at - mfhi $ta2 + mflo ($at,$ta2,$a3) + mfhi ($ta2,$ta2,$a3) $ADDU $v0,$at sltu $ta3,$v0,$at $ST $v0,-$BNSZ($a0) @@ -357,10 +373,10 @@ $code.=<<___; .L_bn_mul_words_tail: .set reorder $LD $t0,0($a1) - $MULTU $t0,$a3 + $MULTU ($t0,$a3) subu $a2,1 - mflo $at - mfhi $t0 + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $v0,$at sltu $t1,$v0,$at $ST $v0,0($a0) @@ -368,10 +384,10 @@ $code.=<<___; beqz $a2,.L_bn_mul_words_return $LD $t0,$BNSZ($a1) - $MULTU $t0,$a3 + $MULTU ($t0,$a3) subu $a2,1 - mflo $at - mfhi $t0 + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $v0,$at sltu $t1,$v0,$at $ST $v0,$BNSZ($a0) @@ -379,9 +395,9 @@ $code.=<<___; beqz $a2,.L_bn_mul_words_return $LD $t0,2*$BNSZ($a1) - $MULTU $t0,$a3 - mflo $at - mfhi $t0 + $MULTU ($t0,$a3) + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $v0,$at sltu $t1,$v0,$at $ST $v0,2*$BNSZ($a0) @@ -438,35 +454,35 @@ $code.=<<___; .L_bn_sqr_words_loop: $LD $t0,0($a1) - $MULTU $t0,$t0 + $MULTU ($t0,$t0) $LD $t2,$BNSZ($a1) $LD $ta0,2*$BNSZ($a1) $LD $ta2,3*$BNSZ($a1) - mflo $t1 - mfhi $t0 + mflo ($t1,$t0,$t0) + mfhi ($t0,$t0,$t0) $ST $t1,0($a0) $ST $t0,$BNSZ($a0) - $MULTU $t2,$t2 + $MULTU ($t2,$t2) subu $a2,4 $PTR_ADD $a0,8*$BNSZ $PTR_ADD $a1,4*$BNSZ - mflo $t3 - mfhi $t2 + mflo ($t3,$t2,$t2) + mfhi ($t2,$t2,$t2) $ST $t3,-6*$BNSZ($a0) $ST $t2,-5*$BNSZ($a0) - $MULTU $ta0,$ta0 - mflo $ta1 - mfhi $ta0 + $MULTU ($ta0,$ta0) + mflo ($ta1,$ta0,$ta0) + mfhi ($ta0,$ta0,$ta0) $ST $ta1,-4*$BNSZ($a0) $ST $ta0,-3*$BNSZ($a0) - $MULTU $ta2,$ta2 + $MULTU ($ta2,$ta2) and $ta0,$a2,$minus4 - mflo $ta3 - mfhi $ta2 + mflo ($ta3,$ta2,$ta2) + mfhi ($ta2,$ta2,$ta2) $ST $ta3,-2*$BNSZ($a0) .set noreorder @@ -479,27 +495,27 @@ $code.=<<___; .L_bn_sqr_words_tail: .set reorder $LD $t0,0($a1) - $MULTU $t0,$t0 + $MULTU ($t0,$t0) subu $a2,1 - mflo $t1 - mfhi $t0 + mflo ($t1,$t0,$t0) + mfhi ($t0,$t0,$t0) $ST $t1,0($a0) $ST $t0,$BNSZ($a0) beqz $a2,.L_bn_sqr_words_return $LD $t0,$BNSZ($a1) - $MULTU $t0,$t0 + $MULTU ($t0,$t0) subu $a2,1 - mflo $t1 - mfhi $t0 + mflo ($t1,$t0,$t0) + mfhi ($t0,$t0,$t0) $ST $t1,2*$BNSZ($a0) $ST $t0,3*$BNSZ($a0) beqz $a2,.L_bn_sqr_words_return $LD $t0,2*$BNSZ($a1) - $MULTU $t0,$t0 - mflo $t1 - mfhi $t0 + $MULTU ($t0,$t0) + mflo ($t1,$t0,$t0) + mfhi ($t0,$t0,$t0) $ST $t1,4*$BNSZ($a0) $ST $t0,5*$BNSZ($a0) @@ -587,13 +603,13 @@ $code.=<<___; sltu $v0,$t2,$ta2 $ST $t2,-2*$BNSZ($a0) $ADDU $v0,$t8 - + $ADDU $ta3,$t3 sltu $t9,$ta3,$t3 $ADDU $t3,$ta3,$v0 sltu $v0,$t3,$ta3 $ST $t3,-$BNSZ($a0) - + .set noreorder bgtz $at,.L_bn_add_words_loop $ADDU $v0,$t9 @@ -792,7 +808,7 @@ bn_div_3_words: # so that we can save two arguments # and return address in registers # instead of stack:-) - + $LD $a0,($a3) move $ta2,$a1 bne $a0,$a2,bn_div_3_words_internal @@ -823,11 +839,11 @@ $code.=<<___; move $ta3,$ra bal bn_div_words_internal move $ra,$ta3 - $MULTU $ta2,$v0 + $MULTU ($ta2,$v0) $LD $t2,-2*$BNSZ($a3) move $ta0,$zero - mfhi $t1 - mflo $t0 + mfhi ($t1,$ta2,$v0) + mflo ($t0,$ta2,$v0) sltu $t8,$t1,$a1 .L_bn_div_3_words_inner_loop: bnez $t8,.L_bn_div_3_words_inner_loop_done @@ -930,15 +946,15 @@ $code.=<<___; $SRL $HH,$a0,4*$BNSZ # bits $SRL $QT,4*$BNSZ # q=0xffffffff beq $DH,$HH,.L_bn_div_words_skip_div1 - $DIVU $zero,$a0,$DH - mflo $QT + $DIVU ($a0,$DH) + mfqt ($QT,$a0,$DH) .L_bn_div_words_skip_div1: - $MULTU $a2,$QT + $MULTU ($a2,$QT) $SLL $t3,$a0,4*$BNSZ # bits $SRL $at,$a1,4*$BNSZ # bits or $t3,$at - mflo $t0 - mfhi $t1 + mflo ($t0,$a2,$QT) + mfhi ($t1,$a2,$QT) .L_bn_div_words_inner_loop1: sltu $t2,$t3,$t0 seq $t8,$HH,$t1 @@ -963,15 +979,15 @@ $code.=<<___; $SRL $HH,$a0,4*$BNSZ # bits $SRL $QT,4*$BNSZ # q=0xffffffff beq $DH,$HH,.L_bn_div_words_skip_div2 - $DIVU $zero,$a0,$DH - mflo $QT + $DIVU ($a0,$DH) + mfqt ($QT,$a0,$DH) .L_bn_div_words_skip_div2: - $MULTU $a2,$QT + $MULTU ($a2,$QT) $SLL $t3,$a0,4*$BNSZ # bits $SRL $at,$a1,4*$BNSZ # bits or $t3,$at - mflo $t0 - mfhi $t1 + mflo ($t0,$a2,$QT) + mfhi ($t1,$a2,$QT) .L_bn_div_words_inner_loop2: sltu $t2,$t3,$t0 seq $t8,$HH,$t1 @@ -1070,592 +1086,592 @@ $code.=<<___; $LD $b_0,0($a2) $LD $a_1,$BNSZ($a1) $LD $a_2,2*$BNSZ($a1) - $MULTU $a_0,$b_0 # mul_add_c(a[0],b[0],c1,c2,c3); + $MULTU ($a_0,$b_0) # mul_add_c(a[0],b[0],c1,c2,c3); $LD $a_3,3*$BNSZ($a1) $LD $b_1,$BNSZ($a2) $LD $b_2,2*$BNSZ($a2) $LD $b_3,3*$BNSZ($a2) - mflo $c_1 - mfhi $c_2 + mflo ($c_1,$a_0,$b_0) + mfhi ($c_2,$a_0,$b_0) $LD $a_4,4*$BNSZ($a1) $LD $a_5,5*$BNSZ($a1) - $MULTU $a_0,$b_1 # mul_add_c(a[0],b[1],c2,c3,c1); + $MULTU ($a_0,$b_1) # mul_add_c(a[0],b[1],c2,c3,c1); $LD $a_6,6*$BNSZ($a1) $LD $a_7,7*$BNSZ($a1) $LD $b_4,4*$BNSZ($a2) $LD $b_5,5*$BNSZ($a2) - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_1) + mfhi ($t_2,$a_0,$b_1) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_1,$b_0 # mul_add_c(a[1],b[0],c2,c3,c1); + $MULTU ($a_1,$b_0) # mul_add_c(a[1],b[0],c2,c3,c1); $ADDU $c_3,$t_2,$at $LD $b_6,6*$BNSZ($a2) $LD $b_7,7*$BNSZ($a2) $ST $c_1,0($a0) # r[0]=c1; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_0) + mfhi ($t_2,$a_1,$b_0) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_2,$b_0 # mul_add_c(a[2],b[0],c3,c1,c2); + $MULTU ($a_2,$b_0) # mul_add_c(a[2],b[0],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $c_1,$c_3,$t_2 $ST $c_2,$BNSZ($a0) # r[1]=c2; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_0) + mfhi ($t_2,$a_2,$b_0) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_1,$b_1 # mul_add_c(a[1],b[1],c3,c1,c2); + $MULTU ($a_1,$b_1) # mul_add_c(a[1],b[1],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_1) + mfhi ($t_2,$a_1,$b_1) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_0,$b_2 # mul_add_c(a[0],b[2],c3,c1,c2); + $MULTU ($a_0,$b_2) # mul_add_c(a[0],b[2],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $c_2,$c_1,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_2) + mfhi ($t_2,$a_0,$b_2) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_0,$b_3 # mul_add_c(a[0],b[3],c1,c2,c3); + $MULTU ($a_0,$b_3) # mul_add_c(a[0],b[3],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,2*$BNSZ($a0) # r[2]=c3; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_3) + mfhi ($t_2,$a_0,$b_3) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_1,$b_2 # mul_add_c(a[1],b[2],c1,c2,c3); + $MULTU ($a_1,$b_2) # mul_add_c(a[1],b[2],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $c_3,$c_2,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_2) + mfhi ($t_2,$a_1,$b_2) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_2,$b_1 # mul_add_c(a[2],b[1],c1,c2,c3); + $MULTU ($a_2,$b_1) # mul_add_c(a[2],b[1],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_1) + mfhi ($t_2,$a_2,$b_1) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_3,$b_0 # mul_add_c(a[3],b[0],c1,c2,c3); + $MULTU ($a_3,$b_0) # mul_add_c(a[3],b[0],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_0) + mfhi ($t_2,$a_3,$b_0) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_4,$b_0 # mul_add_c(a[4],b[0],c2,c3,c1); + $MULTU ($a_4,$b_0) # mul_add_c(a[4],b[0],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at $ST $c_1,3*$BNSZ($a0) # r[3]=c1; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_4,$b_0) + mfhi ($t_2,$a_4,$b_0) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_3,$b_1 # mul_add_c(a[3],b[1],c2,c3,c1); + $MULTU ($a_3,$b_1) # mul_add_c(a[3],b[1],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $c_1,$c_3,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_1) + mfhi ($t_2,$a_3,$b_1) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_2,$b_2 # mul_add_c(a[2],b[2],c2,c3,c1); + $MULTU ($a_2,$b_2) # mul_add_c(a[2],b[2],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_2) + mfhi ($t_2,$a_2,$b_2) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_1,$b_3 # mul_add_c(a[1],b[3],c2,c3,c1); + $MULTU ($a_1,$b_3) # mul_add_c(a[1],b[3],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_3) + mfhi ($t_2,$a_1,$b_3) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_0,$b_4 # mul_add_c(a[0],b[4],c2,c3,c1); + $MULTU ($a_0,$b_4) # mul_add_c(a[0],b[4],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_4) + mfhi ($t_2,$a_0,$b_4) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_0,$b_5 # mul_add_c(a[0],b[5],c3,c1,c2); + $MULTU ($a_0,$b_5) # mul_add_c(a[0],b[5],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,4*$BNSZ($a0) # r[4]=c2; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_5) + mfhi ($t_2,$a_0,$b_5) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_1,$b_4 # mul_add_c(a[1],b[4],c3,c1,c2); + $MULTU ($a_1,$b_4) # mul_add_c(a[1],b[4],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $c_2,$c_1,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_4) + mfhi ($t_2,$a_1,$b_4) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_2,$b_3 # mul_add_c(a[2],b[3],c3,c1,c2); + $MULTU ($a_2,$b_3) # mul_add_c(a[2],b[3],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_3) + mfhi ($t_2,$a_2,$b_3) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_3,$b_2 # mul_add_c(a[3],b[2],c3,c1,c2); + $MULTU ($a_3,$b_2) # mul_add_c(a[3],b[2],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_2) + mfhi ($t_2,$a_3,$b_2) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_4,$b_1 # mul_add_c(a[4],b[1],c3,c1,c2); + $MULTU ($a_4,$b_1) # mul_add_c(a[4],b[1],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_4,$b_1) + mfhi ($t_2,$a_4,$b_1) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_5,$b_0 # mul_add_c(a[5],b[0],c3,c1,c2); + $MULTU ($a_5,$b_0) # mul_add_c(a[5],b[0],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_5,$b_0) + mfhi ($t_2,$a_5,$b_0) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_6,$b_0 # mul_add_c(a[6],b[0],c1,c2,c3); + $MULTU ($a_6,$b_0) # mul_add_c(a[6],b[0],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,5*$BNSZ($a0) # r[5]=c3; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_6,$b_0) + mfhi ($t_2,$a_6,$b_0) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_5,$b_1 # mul_add_c(a[5],b[1],c1,c2,c3); + $MULTU ($a_5,$b_1) # mul_add_c(a[5],b[1],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $c_3,$c_2,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_5,$b_1) + mfhi ($t_2,$a_5,$b_1) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_4,$b_2 # mul_add_c(a[4],b[2],c1,c2,c3); + $MULTU ($a_4,$b_2) # mul_add_c(a[4],b[2],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_4,$b_2) + mfhi ($t_2,$a_4,$b_2) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_3,$b_3 # mul_add_c(a[3],b[3],c1,c2,c3); + $MULTU ($a_3,$b_3) # mul_add_c(a[3],b[3],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_3) + mfhi ($t_2,$a_3,$b_3) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_2,$b_4 # mul_add_c(a[2],b[4],c1,c2,c3); + $MULTU ($a_2,$b_4) # mul_add_c(a[2],b[4],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_4) + mfhi ($t_2,$a_2,$b_4) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_1,$b_5 # mul_add_c(a[1],b[5],c1,c2,c3); + $MULTU ($a_1,$b_5) # mul_add_c(a[1],b[5],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_5) + mfhi ($t_2,$a_1,$b_5) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_0,$b_6 # mul_add_c(a[0],b[6],c1,c2,c3); + $MULTU ($a_0,$b_6) # mul_add_c(a[0],b[6],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_6) + mfhi ($t_2,$a_0,$b_6) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_0,$b_7 # mul_add_c(a[0],b[7],c2,c3,c1); + $MULTU ($a_0,$b_7) # mul_add_c(a[0],b[7],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at $ST $c_1,6*$BNSZ($a0) # r[6]=c1; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_7) + mfhi ($t_2,$a_0,$b_7) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_1,$b_6 # mul_add_c(a[1],b[6],c2,c3,c1); + $MULTU ($a_1,$b_6) # mul_add_c(a[1],b[6],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $c_1,$c_3,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_6) + mfhi ($t_2,$a_1,$b_6) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_2,$b_5 # mul_add_c(a[2],b[5],c2,c3,c1); + $MULTU ($a_2,$b_5) # mul_add_c(a[2],b[5],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_5) + mfhi ($t_2,$a_2,$b_5) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_3,$b_4 # mul_add_c(a[3],b[4],c2,c3,c1); + $MULTU ($a_3,$b_4) # mul_add_c(a[3],b[4],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_4) + mfhi ($t_2,$a_3,$b_4) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_4,$b_3 # mul_add_c(a[4],b[3],c2,c3,c1); + $MULTU ($a_4,$b_3) # mul_add_c(a[4],b[3],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_4,$b_3) + mfhi ($t_2,$a_4,$b_3) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_5,$b_2 # mul_add_c(a[5],b[2],c2,c3,c1); + $MULTU ($a_5,$b_2) # mul_add_c(a[5],b[2],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_5,$b_2) + mfhi ($t_2,$a_5,$b_2) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_6,$b_1 # mul_add_c(a[6],b[1],c2,c3,c1); + $MULTU ($a_6,$b_1) # mul_add_c(a[6],b[1],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_6,$b_1) + mfhi ($t_2,$a_6,$b_1) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_7,$b_0 # mul_add_c(a[7],b[0],c2,c3,c1); + $MULTU ($a_7,$b_0) # mul_add_c(a[7],b[0],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_7,$b_0) + mfhi ($t_2,$a_7,$b_0) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_7,$b_1 # mul_add_c(a[7],b[1],c3,c1,c2); + $MULTU ($a_7,$b_1) # mul_add_c(a[7],b[1],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,7*$BNSZ($a0) # r[7]=c2; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_7,$b_1) + mfhi ($t_2,$a_7,$b_1) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_6,$b_2 # mul_add_c(a[6],b[2],c3,c1,c2); + $MULTU ($a_6,$b_2) # mul_add_c(a[6],b[2],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $c_2,$c_1,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_6,$b_2) + mfhi ($t_2,$a_6,$b_2) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_5,$b_3 # mul_add_c(a[5],b[3],c3,c1,c2); + $MULTU ($a_5,$b_3) # mul_add_c(a[5],b[3],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_5,$b_3) + mfhi ($t_2,$a_5,$b_3) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_4,$b_4 # mul_add_c(a[4],b[4],c3,c1,c2); + $MULTU ($a_4,$b_4) # mul_add_c(a[4],b[4],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_4,$b_4) + mfhi ($t_2,$a_4,$b_4) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_3,$b_5 # mul_add_c(a[3],b[5],c3,c1,c2); + $MULTU ($a_3,$b_5) # mul_add_c(a[3],b[5],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_5) + mfhi ($t_2,$a_3,$b_5) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_2,$b_6 # mul_add_c(a[2],b[6],c3,c1,c2); + $MULTU ($a_2,$b_6) # mul_add_c(a[2],b[6],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_6) + mfhi ($t_2,$a_2,$b_6) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_1,$b_7 # mul_add_c(a[1],b[7],c3,c1,c2); + $MULTU ($a_1,$b_7) # mul_add_c(a[1],b[7],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_7) + mfhi ($t_2,$a_1,$b_7) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_2,$b_7 # mul_add_c(a[2],b[7],c1,c2,c3); + $MULTU ($a_2,$b_7) # mul_add_c(a[2],b[7],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,8*$BNSZ($a0) # r[8]=c3; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_7) + mfhi ($t_2,$a_2,$b_7) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_3,$b_6 # mul_add_c(a[3],b[6],c1,c2,c3); + $MULTU ($a_3,$b_6) # mul_add_c(a[3],b[6],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $c_3,$c_2,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_6) + mfhi ($t_2,$a_3,$b_6) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_4,$b_5 # mul_add_c(a[4],b[5],c1,c2,c3); + $MULTU ($a_4,$b_5) # mul_add_c(a[4],b[5],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_4,$b_5) + mfhi ($t_2,$a_4,$b_5) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_5,$b_4 # mul_add_c(a[5],b[4],c1,c2,c3); + $MULTU ($a_5,$b_4) # mul_add_c(a[5],b[4],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_5,$b_4) + mfhi ($t_2,$a_5,$b_4) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_6,$b_3 # mul_add_c(a[6],b[3],c1,c2,c3); + $MULTU ($a_6,$b_3) # mul_add_c(a[6],b[3],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_6,$b_3) + mfhi ($t_2,$a_6,$b_3) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_7,$b_2 # mul_add_c(a[7],b[2],c1,c2,c3); + $MULTU ($a_7,$b_2) # mul_add_c(a[7],b[2],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_7,$b_2) + mfhi ($t_2,$a_7,$b_2) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_7,$b_3 # mul_add_c(a[7],b[3],c2,c3,c1); + $MULTU ($a_7,$b_3) # mul_add_c(a[7],b[3],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at $ST $c_1,9*$BNSZ($a0) # r[9]=c1; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_7,$b_3) + mfhi ($t_2,$a_7,$b_3) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_6,$b_4 # mul_add_c(a[6],b[4],c2,c3,c1); + $MULTU ($a_6,$b_4) # mul_add_c(a[6],b[4],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $c_1,$c_3,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_6,$b_4) + mfhi ($t_2,$a_6,$b_4) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_5,$b_5 # mul_add_c(a[5],b[5],c2,c3,c1); + $MULTU ($a_5,$b_5) # mul_add_c(a[5],b[5],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_5,$b_5) + mfhi ($t_2,$a_5,$b_5) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_4,$b_6 # mul_add_c(a[4],b[6],c2,c3,c1); + $MULTU ($a_4,$b_6) # mul_add_c(a[4],b[6],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_4,$b_6) + mfhi ($t_2,$a_4,$b_6) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_3,$b_7 # mul_add_c(a[3],b[7],c2,c3,c1); + $MULTU ($a_3,$b_7) # mul_add_c(a[3],b[7],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_7) + mfhi ($t_2,$a_3,$b_7) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_4,$b_7 # mul_add_c(a[4],b[7],c3,c1,c2); + $MULTU ($a_4,$b_7) # mul_add_c(a[4],b[7],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,10*$BNSZ($a0) # r[10]=c2; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_4,$b_7) + mfhi ($t_2,$a_4,$b_7) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_5,$b_6 # mul_add_c(a[5],b[6],c3,c1,c2); + $MULTU ($a_5,$b_6) # mul_add_c(a[5],b[6],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $c_2,$c_1,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_5,$b_6) + mfhi ($t_2,$a_5,$b_6) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_6,$b_5 # mul_add_c(a[6],b[5],c3,c1,c2); + $MULTU ($a_6,$b_5) # mul_add_c(a[6],b[5],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_6,$b_5) + mfhi ($t_2,$a_6,$b_5) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_7,$b_4 # mul_add_c(a[7],b[4],c3,c1,c2); + $MULTU ($a_7,$b_4) # mul_add_c(a[7],b[4],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_7,$b_4) + mfhi ($t_2,$a_7,$b_4) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_7,$b_5 # mul_add_c(a[7],b[5],c1,c2,c3); + $MULTU ($a_7,$b_5) # mul_add_c(a[7],b[5],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,11*$BNSZ($a0) # r[11]=c3; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_7,$b_5) + mfhi ($t_2,$a_7,$b_5) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_6,$b_6 # mul_add_c(a[6],b[6],c1,c2,c3); + $MULTU ($a_6,$b_6) # mul_add_c(a[6],b[6],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $c_3,$c_2,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_6,$b_6) + mfhi ($t_2,$a_6,$b_6) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_5,$b_7 # mul_add_c(a[5],b[7],c1,c2,c3); + $MULTU ($a_5,$b_7) # mul_add_c(a[5],b[7],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_5,$b_7) + mfhi ($t_2,$a_5,$b_7) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_6,$b_7 # mul_add_c(a[6],b[7],c2,c3,c1); + $MULTU ($a_6,$b_7) # mul_add_c(a[6],b[7],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at $ST $c_1,12*$BNSZ($a0) # r[12]=c1; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_6,$b_7) + mfhi ($t_2,$a_6,$b_7) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_7,$b_6 # mul_add_c(a[7],b[6],c2,c3,c1); + $MULTU ($a_7,$b_6) # mul_add_c(a[7],b[6],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $c_1,$c_3,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_7,$b_6) + mfhi ($t_2,$a_7,$b_6) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_7,$b_7 # mul_add_c(a[7],b[7],c3,c1,c2); + $MULTU ($a_7,$b_7) # mul_add_c(a[7],b[7],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,13*$BNSZ($a0) # r[13]=c2; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_7,$b_7) + mfhi ($t_2,$a_7,$b_7) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 $ADDU $t_2,$at @@ -1716,144 +1732,144 @@ $code.=<<___; $LD $b_0,0($a2) $LD $a_1,$BNSZ($a1) $LD $a_2,2*$BNSZ($a1) - $MULTU $a_0,$b_0 # mul_add_c(a[0],b[0],c1,c2,c3); + $MULTU ($a_0,$b_0) # mul_add_c(a[0],b[0],c1,c2,c3); $LD $a_3,3*$BNSZ($a1) $LD $b_1,$BNSZ($a2) $LD $b_2,2*$BNSZ($a2) $LD $b_3,3*$BNSZ($a2) - mflo $c_1 - mfhi $c_2 + mflo ($c_1,$a_0,$b_0) + mfhi ($c_2,$a_0,$b_0) $ST $c_1,0($a0) - $MULTU $a_0,$b_1 # mul_add_c(a[0],b[1],c2,c3,c1); - mflo $t_1 - mfhi $t_2 + $MULTU ($a_0,$b_1) # mul_add_c(a[0],b[1],c2,c3,c1); + mflo ($t_1,$a_0,$b_1) + mfhi ($t_2,$a_0,$b_1) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_1,$b_0 # mul_add_c(a[1],b[0],c2,c3,c1); + $MULTU ($a_1,$b_0) # mul_add_c(a[1],b[0],c2,c3,c1); $ADDU $c_3,$t_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_0) + mfhi ($t_2,$a_1,$b_0) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_2,$b_0 # mul_add_c(a[2],b[0],c3,c1,c2); + $MULTU ($a_2,$b_0) # mul_add_c(a[2],b[0],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $c_1,$c_3,$t_2 $ST $c_2,$BNSZ($a0) - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_0) + mfhi ($t_2,$a_2,$b_0) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_1,$b_1 # mul_add_c(a[1],b[1],c3,c1,c2); + $MULTU ($a_1,$b_1) # mul_add_c(a[1],b[1],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_1) + mfhi ($t_2,$a_1,$b_1) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_0,$b_2 # mul_add_c(a[0],b[2],c3,c1,c2); + $MULTU ($a_0,$b_2) # mul_add_c(a[0],b[2],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $c_2,$c_1,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_2) + mfhi ($t_2,$a_0,$b_2) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_0,$b_3 # mul_add_c(a[0],b[3],c1,c2,c3); + $MULTU ($a_0,$b_3) # mul_add_c(a[0],b[3],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,2*$BNSZ($a0) - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_3) + mfhi ($t_2,$a_0,$b_3) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_1,$b_2 # mul_add_c(a[1],b[2],c1,c2,c3); + $MULTU ($a_1,$b_2) # mul_add_c(a[1],b[2],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $c_3,$c_2,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_2) + mfhi ($t_2,$a_1,$b_2) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_2,$b_1 # mul_add_c(a[2],b[1],c1,c2,c3); + $MULTU ($a_2,$b_1) # mul_add_c(a[2],b[1],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_1) + mfhi ($t_2,$a_2,$b_1) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_3,$b_0 # mul_add_c(a[3],b[0],c1,c2,c3); + $MULTU ($a_3,$b_0) # mul_add_c(a[3],b[0],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_0) + mfhi ($t_2,$a_3,$b_0) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_3,$b_1 # mul_add_c(a[3],b[1],c2,c3,c1); + $MULTU ($a_3,$b_1) # mul_add_c(a[3],b[1],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at $ST $c_1,3*$BNSZ($a0) - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_1) + mfhi ($t_2,$a_3,$b_1) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_2,$b_2 # mul_add_c(a[2],b[2],c2,c3,c1); + $MULTU ($a_2,$b_2) # mul_add_c(a[2],b[2],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $c_1,$c_3,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_2) + mfhi ($t_2,$a_2,$b_2) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_1,$b_3 # mul_add_c(a[1],b[3],c2,c3,c1); + $MULTU ($a_1,$b_3) # mul_add_c(a[1],b[3],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_3) + mfhi ($t_2,$a_1,$b_3) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_2,$b_3 # mul_add_c(a[2],b[3],c3,c1,c2); + $MULTU ($a_2,$b_3) # mul_add_c(a[2],b[3],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,4*$BNSZ($a0) - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_3) + mfhi ($t_2,$a_2,$b_3) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_3,$b_2 # mul_add_c(a[3],b[2],c3,c1,c2); + $MULTU ($a_3,$b_2) # mul_add_c(a[3],b[2],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $c_2,$c_1,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_2) + mfhi ($t_2,$a_3,$b_2) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_3,$b_3 # mul_add_c(a[3],b[3],c1,c2,c3); + $MULTU ($a_3,$b_3) # mul_add_c(a[3],b[3],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,5*$BNSZ($a0) - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_3) + mfhi ($t_2,$a_3,$b_3) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 $ADDU $t_2,$at @@ -1888,11 +1904,9 @@ my ($hi,$lo,$c0,$c1,$c2, # commented as "forward multiplication" below]; )=@_; $code.=<<___; - mflo $lo - mfhi $hi $ADDU $c0,$lo sltu $at,$c0,$lo - $MULTU $an,$bn # forward multiplication + $MULTU ($an,$bn) # forward multiplication $ADDU $c0,$lo $ADDU $at,$hi sltu $lo,$c0,$lo @@ -1902,15 +1916,17 @@ ___ $code.=<<___ if (!$warm); sltu $c2,$c1,$at $ADDU $c1,$hi - sltu $hi,$c1,$hi - $ADDU $c2,$hi ___ $code.=<<___ if ($warm); sltu $at,$c1,$at $ADDU $c1,$hi $ADDU $c2,$at +___ +$code.=<<___; sltu $hi,$c1,$hi $ADDU $c2,$hi + mflo ($lo,$an,$bn) + mfhi ($hi,$an,$bn) ___ } @@ -1940,21 +1956,21 @@ $code.=<<___; $LD $a_2,2*$BNSZ($a1) $LD $a_3,3*$BNSZ($a1) - $MULTU $a_0,$a_0 # mul_add_c(a[0],b[0],c1,c2,c3); + $MULTU ($a_0,$a_0) # mul_add_c(a[0],b[0],c1,c2,c3); $LD $a_4,4*$BNSZ($a1) $LD $a_5,5*$BNSZ($a1) $LD $a_6,6*$BNSZ($a1) $LD $a_7,7*$BNSZ($a1) - mflo $c_1 - mfhi $c_2 + mflo ($c_1,$a_0,$a_0) + mfhi ($c_2,$a_0,$a_0) $ST $c_1,0($a0) - $MULTU $a_0,$a_1 # mul_add_c2(a[0],b[1],c2,c3,c1); - mflo $t_1 - mfhi $t_2 + $MULTU ($a_0,$a_1) # mul_add_c2(a[0],b[1],c2,c3,c1); + mflo ($t_1,$a_0,$a_1) + mfhi ($t_2,$a_0,$a_1) slt $c_1,$t_2,$zero $SLL $t_2,1 - $MULTU $a_2,$a_0 # mul_add_c2(a[2],b[0],c3,c1,c2); + $MULTU ($a_2,$a_0) # mul_add_c2(a[2],b[0],c3,c1,c2); slt $a2,$t_1,$zero $ADDU $t_2,$a2 $SLL $t_1,1 @@ -1962,20 +1978,22 @@ $code.=<<___; sltu $at,$c_2,$t_1 $ADDU $c_3,$t_2,$at $ST $c_2,$BNSZ($a0) + mflo ($t_1,$a_2,$a_0) + mfhi ($t_2,$a_2,$a_0) ___ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2); $code.=<<___; - mflo $t_1 - mfhi $t_2 $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_0,$a_3 # mul_add_c2(a[0],b[3],c1,c2,c3); + $MULTU ($a_0,$a_3) # mul_add_c2(a[0],b[3],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,2*$BNSZ($a0) + mflo ($t_1,$a_0,$a_3) + mfhi ($t_2,$a_0,$a_3) ___ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, $a_1,$a_2); # mul_add_c2(a[1],b[2],c1,c2,c3); @@ -1989,16 +2007,16 @@ ___ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1); $code.=<<___; - mflo $t_1 - mfhi $t_2 $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_0,$a_5 # mul_add_c2(a[0],b[5],c3,c1,c2); + $MULTU ($a_0,$a_5) # mul_add_c2(a[0],b[5],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,4*$BNSZ($a0) + mflo ($t_1,$a_0,$a_5) + mfhi ($t_2,$a_0,$a_5) ___ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, $a_1,$a_4); # mul_add_c2(a[1],b[4],c3,c1,c2); @@ -2016,16 +2034,16 @@ ___ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3); $code.=<<___; - mflo $t_1 - mfhi $t_2 $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_0,$a_7 # mul_add_c2(a[0],b[7],c2,c3,c1); + $MULTU ($a_0,$a_7) # mul_add_c2(a[0],b[7],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at $ST $c_1,6*$BNSZ($a0) + mflo ($t_1,$a_0,$a_7) + mfhi ($t_2,$a_0,$a_7) ___ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, $a_1,$a_6); # mul_add_c2(a[1],b[6],c2,c3,c1); @@ -2045,16 +2063,16 @@ ___ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, $a_4,$a_4); # mul_add_c(a[4],b[4],c3,c1,c2); $code.=<<___; - mflo $t_1 - mfhi $t_2 $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_2,$a_7 # mul_add_c2(a[2],b[7],c1,c2,c3); + $MULTU ($a_2,$a_7) # mul_add_c2(a[2],b[7],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,8*$BNSZ($a0) + mflo ($t_1,$a_2,$a_7) + mfhi ($t_2,$a_2,$a_7) ___ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, $a_3,$a_6); # mul_add_c2(a[3],b[6],c1,c2,c3); @@ -2070,16 +2088,16 @@ ___ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, $a_5,$a_5); # mul_add_c(a[5],b[5],c2,c3,c1); $code.=<<___; - mflo $t_1 - mfhi $t_2 $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_4,$a_7 # mul_add_c2(a[4],b[7],c3,c1,c2); + $MULTU ($a_4,$a_7) # mul_add_c2(a[4],b[7],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,10*$BNSZ($a0) + mflo ($t_1,$a_4,$a_7) + mfhi ($t_2,$a_4,$a_7) ___ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, $a_5,$a_6); # mul_add_c2(a[5],b[6],c3,c1,c2); @@ -2091,24 +2109,22 @@ ___ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, $a_6,$a_6); # mul_add_c(a[6],b[6],c1,c2,c3); $code.=<<___; - mflo $t_1 - mfhi $t_2 $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_6,$a_7 # mul_add_c2(a[6],b[7],c2,c3,c1); + $MULTU ($a_6,$a_7) # mul_add_c2(a[6],b[7],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at $ST $c_1,12*$BNSZ($a0) + mflo ($t_1,$a_6,$a_7) + mfhi ($t_2,$a_6,$a_7) ___ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, $a_7,$a_7); # mul_add_c(a[7],b[7],c3,c1,c2); $code.=<<___; $ST $c_2,13*$BNSZ($a0) - mflo $t_1 - mfhi $t_2 $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 $ADDU $t_2,$at @@ -2152,19 +2168,19 @@ $code.=<<___; .set reorder $LD $a_0,0($a1) $LD $a_1,$BNSZ($a1) - $MULTU $a_0,$a_0 # mul_add_c(a[0],b[0],c1,c2,c3); + $MULTU ($a_0,$a_0) # mul_add_c(a[0],b[0],c1,c2,c3); $LD $a_2,2*$BNSZ($a1) $LD $a_3,3*$BNSZ($a1) - mflo $c_1 - mfhi $c_2 + mflo ($c_1,$a_0,$a_0) + mfhi ($c_2,$a_0,$a_0) $ST $c_1,0($a0) - $MULTU $a_0,$a_1 # mul_add_c2(a[0],b[1],c2,c3,c1); - mflo $t_1 - mfhi $t_2 + $MULTU ($a_0,$a_1) # mul_add_c2(a[0],b[1],c2,c3,c1); + mflo ($t_1,$a_0,$a_1) + mfhi ($t_2,$a_0,$a_1) slt $c_1,$t_2,$zero $SLL $t_2,1 - $MULTU $a_2,$a_0 # mul_add_c2(a[2],b[0],c3,c1,c2); + $MULTU ($a_2,$a_0) # mul_add_c2(a[2],b[0],c3,c1,c2); slt $a2,$t_1,$zero $ADDU $t_2,$a2 $SLL $t_1,1 @@ -2172,20 +2188,22 @@ $code.=<<___; sltu $at,$c_2,$t_1 $ADDU $c_3,$t_2,$at $ST $c_2,$BNSZ($a0) + mflo ($t_1,$a_2,$a_0) + mfhi ($t_2,$a_2,$a_0) ___ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2); $code.=<<___; - mflo $t_1 - mfhi $t_2 $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_0,$a_3 # mul_add_c2(a[0],b[3],c1,c2,c3); + $MULTU ($a_0,$a_3) # mul_add_c2(a[0],b[3],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,2*$BNSZ($a0) + mflo ($t_1,$a_0,$a_3) + mfhi ($t_2,$a_0,$a_3) ___ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, $a_1,$a_2); # mul_add_c2(a2[1],b[2],c1,c2,c3); @@ -2197,24 +2215,22 @@ ___ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1); $code.=<<___; - mflo $t_1 - mfhi $t_2 $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2); + $MULTU ($a_2,$a_3) # mul_add_c2(a[2],b[3],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,4*$BNSZ($a0) + mflo ($t_1,$a_2,$a_3) + mfhi ($t_2,$a_2,$a_3) ___ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3); $code.=<<___; $ST $c_3,5*$BNSZ($a0) - mflo $t_1 - mfhi $t_2 $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 $ADDU $t_2,$at diff --git a/deps/openssl/openssl/crypto/bn/asm/pa-risc2.s b/deps/openssl/openssl/crypto/bn/asm/pa-risc2.s deleted file mode 100644 index 413eac7123..0000000000 --- a/deps/openssl/openssl/crypto/bn/asm/pa-risc2.s +++ /dev/null @@ -1,1624 +0,0 @@ -; Copyright 1998-2016 The OpenSSL Project Authors. All Rights Reserved. -; -; Licensed under the OpenSSL license (the "License"). You may not use -; this file except in compliance with the License. You can obtain a copy -; in the file LICENSE in the source distribution or at -; https://www.openssl.org/source/license.html -; -; PA-RISC 2.0 implementation of bn_asm code, based on the -; 64-bit version of the code. This code is effectively the -; same as the 64-bit version except the register model is -; slightly different given all values must be 32-bit between -; function calls. Thus the 64-bit return values are returned -; in %ret0 and %ret1 vs just %ret0 as is done in 64-bit -; -; -; This code is approximately 2x faster than the C version -; for RSA/DSA. -; -; See http://devresource.hp.com/ for more details on the PA-RISC -; architecture. Also see the book "PA-RISC 2.0 Architecture" -; by Gerry Kane for information on the instruction set architecture. -; -; Code written by Chris Ruemmler (with some help from the HP C -; compiler). -; -; The code compiles with HP's assembler -; - - .level 2.0N - .space $TEXT$ - .subspa $CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY - -; -; Global Register definitions used for the routines. -; -; Some information about HP's runtime architecture for 32-bits. -; -; "Caller save" means the calling function must save the register -; if it wants the register to be preserved. -; "Callee save" means if a function uses the register, it must save -; the value before using it. -; -; For the floating point registers -; -; "caller save" registers: fr4-fr11, fr22-fr31 -; "callee save" registers: fr12-fr21 -; "special" registers: fr0-fr3 (status and exception registers) -; -; For the integer registers -; value zero : r0 -; "caller save" registers: r1,r19-r26 -; "callee save" registers: r3-r18 -; return register : r2 (rp) -; return values ; r28,r29 (ret0,ret1) -; Stack pointer ; r30 (sp) -; millicode return ptr ; r31 (also a caller save register) - - -; -; Arguments to the routines -; -r_ptr .reg %r26 -a_ptr .reg %r25 -b_ptr .reg %r24 -num .reg %r24 -n .reg %r23 - -; -; Note that the "w" argument for bn_mul_add_words and bn_mul_words -; is passed on the stack at a delta of -56 from the top of stack -; as the routine is entered. -; - -; -; Globals used in some routines -; - -top_overflow .reg %r23 -high_mask .reg %r22 ; value 0xffffffff80000000L - - -;------------------------------------------------------------------------------ -; -; bn_mul_add_words -; -;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr, -; int num, BN_ULONG w) -; -; arg0 = r_ptr -; arg1 = a_ptr -; arg3 = num -; -56(sp) = w -; -; Local register definitions -; - -fm1 .reg %fr22 -fm .reg %fr23 -ht_temp .reg %fr24 -ht_temp_1 .reg %fr25 -lt_temp .reg %fr26 -lt_temp_1 .reg %fr27 -fm1_1 .reg %fr28 -fm_1 .reg %fr29 - -fw_h .reg %fr7L -fw_l .reg %fr7R -fw .reg %fr7 - -fht_0 .reg %fr8L -flt_0 .reg %fr8R -t_float_0 .reg %fr8 - -fht_1 .reg %fr9L -flt_1 .reg %fr9R -t_float_1 .reg %fr9 - -tmp_0 .reg %r31 -tmp_1 .reg %r21 -m_0 .reg %r20 -m_1 .reg %r19 -ht_0 .reg %r1 -ht_1 .reg %r3 -lt_0 .reg %r4 -lt_1 .reg %r5 -m1_0 .reg %r6 -m1_1 .reg %r7 -rp_val .reg %r8 -rp_val_1 .reg %r9 - -bn_mul_add_words - .export bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN - .proc - .callinfo frame=128 - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - NOP ; Needed to make the loop 16-byte aligned - NOP ; needed to make the loop 16-byte aligned - - STD %r5,16(%sp) ; save r5 - NOP - STD %r6,24(%sp) ; save r6 - STD %r7,32(%sp) ; save r7 - - STD %r8,40(%sp) ; save r8 - STD %r9,48(%sp) ; save r9 - COPY %r0,%ret1 ; return 0 by default - DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 - - CMPIB,>= 0,num,bn_mul_add_words_exit ; if (num <= 0) then exit - LDO 128(%sp),%sp ; bump stack - - ; - ; The loop is unrolled twice, so if there is only 1 number - ; then go straight to the cleanup code. - ; - CMPIB,= 1,num,bn_mul_add_words_single_top - FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l) - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; - ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus - ; two 32-bit mutiplies can be issued per cycle. - ; -bn_mul_add_words_unroll2 - - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) - LDD 0(r_ptr),rp_val ; rp[0] - LDD 8(r_ptr),rp_val_1 ; rp[1] - - XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l - XMPYU fht_1,fw_l,fm1_1 ; m1[1] = fht_1*fw_l - FSTD fm1,-16(%sp) ; -16(sp) = m1[0] - FSTD fm1_1,-48(%sp) ; -48(sp) = m1[1] - - XMPYU flt_0,fw_h,fm ; m[0] = flt_0*fw_h - XMPYU flt_1,fw_h,fm_1 ; m[1] = flt_1*fw_h - FSTD fm,-8(%sp) ; -8(sp) = m[0] - FSTD fm_1,-40(%sp) ; -40(sp) = m[1] - - XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h - XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp_1 = fht_1*fw_h - FSTD ht_temp,-24(%sp) ; -24(sp) = ht_temp - FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht_temp_1 - - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l - FSTD lt_temp,-32(%sp) ; -32(sp) = lt_temp - FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt_temp_1 - - LDD -8(%sp),m_0 ; m[0] - LDD -40(%sp),m_1 ; m[1] - LDD -16(%sp),m1_0 ; m1[0] - LDD -48(%sp),m1_1 ; m1[1] - - LDD -24(%sp),ht_0 ; ht[0] - LDD -56(%sp),ht_1 ; ht[1] - ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m[0] + m1[0]; - ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m[1] + m1[1]; - - LDD -32(%sp),lt_0 - LDD -64(%sp),lt_1 - CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m[0] < m1[0]) - ADD,L ht_0,top_overflow,ht_0 ; ht[0] += (1<<32) - - CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m[1] < m1[1]) - ADD,L ht_1,top_overflow,ht_1 ; ht[1] += (1<<32) - EXTRD,U tmp_0,31,32,m_0 ; m[0]>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1[0] = m[0]<<32 - - EXTRD,U tmp_1,31,32,m_1 ; m[1]>>32 - DEPD,Z tmp_1,31,32,m1_1 ; m1[1] = m[1]<<32 - ADD,L ht_0,m_0,ht_0 ; ht[0]+= (m[0]>>32) - ADD,L ht_1,m_1,ht_1 ; ht[1]+= (m[1]>>32) - - ADD lt_0,m1_0,lt_0 ; lt[0] = lt[0]+m1[0]; - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - ADD lt_1,m1_1,lt_1 ; lt[1] = lt[1]+m1[1]; - ADD,DC ht_1,%r0,ht_1 ; ht[1]++ - - ADD %ret1,lt_0,lt_0 ; lt[0] = lt[0] + c; - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - ADD lt_0,rp_val,lt_0 ; lt[0] = lt[0]+rp[0] - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - - LDO -2(num),num ; num = num - 2; - ADD ht_0,lt_1,lt_1 ; lt[1] = lt[1] + ht_0 (c); - ADD,DC ht_1,%r0,ht_1 ; ht[1]++ - STD lt_0,0(r_ptr) ; rp[0] = lt[0] - - ADD lt_1,rp_val_1,lt_1 ; lt[1] = lt[1]+rp[1] - ADD,DC ht_1,%r0,%ret1 ; ht[1]++ - LDO 16(a_ptr),a_ptr ; a_ptr += 2 - - STD lt_1,8(r_ptr) ; rp[1] = lt[1] - CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do - LDO 16(r_ptr),r_ptr ; r_ptr += 2 - - CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one - - ; - ; Top of loop aligned on 64-byte boundary - ; -bn_mul_add_words_single_top - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - LDD 0(r_ptr),rp_val ; rp[0] - LDO 8(a_ptr),a_ptr ; a_ptr++ - XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l - FSTD fm1,-16(%sp) ; -16(sp) = m1 - XMPYU flt_0,fw_h,fm ; m = lt*fw_h - FSTD fm,-8(%sp) ; -8(sp) = m - XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h - FSTD ht_temp,-24(%sp) ; -24(sp) = ht - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - FSTD lt_temp,-32(%sp) ; -32(sp) = lt - - LDD -8(%sp),m_0 - LDD -16(%sp),m1_0 ; m1 = temp1 - ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; - LDD -24(%sp),ht_0 - LDD -32(%sp),lt_0 - - CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) - ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) - - EXTRD,U tmp_0,31,32,m_0 ; m>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 - - ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) - ADD lt_0,m1_0,tmp_0 ; tmp_0 = lt+m1; - ADD,DC ht_0,%r0,ht_0 ; ht++ - ADD %ret1,tmp_0,lt_0 ; lt = lt + c; - ADD,DC ht_0,%r0,ht_0 ; ht++ - ADD lt_0,rp_val,lt_0 ; lt = lt+rp[0] - ADD,DC ht_0,%r0,%ret1 ; ht++ - STD lt_0,0(r_ptr) ; rp[0] = lt - -bn_mul_add_words_exit - .EXIT - - EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 - LDD -80(%sp),%r9 ; restore r9 - LDD -88(%sp),%r8 ; restore r8 - LDD -96(%sp),%r7 ; restore r7 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 ; restore r3 - .PROCEND ;in=23,24,25,26,29;out=28; - -;---------------------------------------------------------------------------- -; -;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) -; -; arg0 = rp -; arg1 = ap -; arg3 = num -; w on stack at -56(sp) - -bn_mul_words - .proc - .callinfo frame=128 - .entry - .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - NOP - STD %r5,16(%sp) ; save r5 - - STD %r6,24(%sp) ; save r6 - STD %r7,32(%sp) ; save r7 - COPY %r0,%ret1 ; return 0 by default - DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 - - CMPIB,>= 0,num,bn_mul_words_exit - LDO 128(%sp),%sp ; bump stack - - ; - ; See if only 1 word to do, thus just do cleanup - ; - CMPIB,= 1,num,bn_mul_words_single_top - FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l) - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; - ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus - ; two 32-bit mutiplies can be issued per cycle. - ; -bn_mul_words_unroll2 - - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) - XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l - XMPYU fht_1,fw_l,fm1_1 ; m1[1] = ht*fw_l - - FSTD fm1,-16(%sp) ; -16(sp) = m1 - FSTD fm1_1,-48(%sp) ; -48(sp) = m1 - XMPYU flt_0,fw_h,fm ; m = lt*fw_h - XMPYU flt_1,fw_h,fm_1 ; m = lt*fw_h - - FSTD fm,-8(%sp) ; -8(sp) = m - FSTD fm_1,-40(%sp) ; -40(sp) = m - XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h - XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp = ht*fw_h - - FSTD ht_temp,-24(%sp) ; -24(sp) = ht - FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l - - FSTD lt_temp,-32(%sp) ; -32(sp) = lt - FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt - LDD -8(%sp),m_0 - LDD -40(%sp),m_1 - - LDD -16(%sp),m1_0 - LDD -48(%sp),m1_1 - LDD -24(%sp),ht_0 - LDD -56(%sp),ht_1 - - ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m + m1; - ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m + m1; - LDD -32(%sp),lt_0 - LDD -64(%sp),lt_1 - - CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m < m1) - ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) - CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m < m1) - ADD,L ht_1,top_overflow,ht_1 ; ht += (1<<32) - - EXTRD,U tmp_0,31,32,m_0 ; m>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 - EXTRD,U tmp_1,31,32,m_1 ; m>>32 - DEPD,Z tmp_1,31,32,m1_1 ; m1 = m<<32 - - ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) - ADD,L ht_1,m_1,ht_1 ; ht+= (m>>32) - ADD lt_0,m1_0,lt_0 ; lt = lt+m1; - ADD,DC ht_0,%r0,ht_0 ; ht++ - - ADD lt_1,m1_1,lt_1 ; lt = lt+m1; - ADD,DC ht_1,%r0,ht_1 ; ht++ - ADD %ret1,lt_0,lt_0 ; lt = lt + c (ret1); - ADD,DC ht_0,%r0,ht_0 ; ht++ - - ADD ht_0,lt_1,lt_1 ; lt = lt + c (ht_0) - ADD,DC ht_1,%r0,ht_1 ; ht++ - STD lt_0,0(r_ptr) ; rp[0] = lt - STD lt_1,8(r_ptr) ; rp[1] = lt - - COPY ht_1,%ret1 ; carry = ht - LDO -2(num),num ; num = num - 2; - LDO 16(a_ptr),a_ptr ; ap += 2 - CMPIB,<= 2,num,bn_mul_words_unroll2 - LDO 16(r_ptr),r_ptr ; rp++ - - CMPIB,=,N 0,num,bn_mul_words_exit ; are we done? - - ; - ; Top of loop aligned on 64-byte boundary - ; -bn_mul_words_single_top - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - - XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l - FSTD fm1,-16(%sp) ; -16(sp) = m1 - XMPYU flt_0,fw_h,fm ; m = lt*fw_h - FSTD fm,-8(%sp) ; -8(sp) = m - XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h - FSTD ht_temp,-24(%sp) ; -24(sp) = ht - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - FSTD lt_temp,-32(%sp) ; -32(sp) = lt - - LDD -8(%sp),m_0 - LDD -16(%sp),m1_0 - ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; - LDD -24(%sp),ht_0 - LDD -32(%sp),lt_0 - - CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) - ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) - - EXTRD,U tmp_0,31,32,m_0 ; m>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 - - ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) - ADD lt_0,m1_0,lt_0 ; lt= lt+m1; - ADD,DC ht_0,%r0,ht_0 ; ht++ - - ADD %ret1,lt_0,lt_0 ; lt = lt + c; - ADD,DC ht_0,%r0,ht_0 ; ht++ - - COPY ht_0,%ret1 ; copy carry - STD lt_0,0(r_ptr) ; rp[0] = lt - -bn_mul_words_exit - .EXIT - EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 - LDD -96(%sp),%r7 ; restore r7 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 ; restore r3 - .PROCEND - -;---------------------------------------------------------------------------- -; -;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num) -; -; arg0 = rp -; arg1 = ap -; arg2 = num -; - -bn_sqr_words - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - NOP - STD %r5,16(%sp) ; save r5 - - CMPIB,>= 0,num,bn_sqr_words_exit - LDO 128(%sp),%sp ; bump stack - - ; - ; If only 1, the goto straight to cleanup - ; - CMPIB,= 1,num,bn_sqr_words_single_top - DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; - -bn_sqr_words_unroll2 - FLDD 0(a_ptr),t_float_0 ; a[0] - FLDD 8(a_ptr),t_float_1 ; a[1] - XMPYU fht_0,flt_0,fm ; m[0] - XMPYU fht_1,flt_1,fm_1 ; m[1] - - FSTD fm,-24(%sp) ; store m[0] - FSTD fm_1,-56(%sp) ; store m[1] - XMPYU flt_0,flt_0,lt_temp ; lt[0] - XMPYU flt_1,flt_1,lt_temp_1 ; lt[1] - - FSTD lt_temp,-16(%sp) ; store lt[0] - FSTD lt_temp_1,-48(%sp) ; store lt[1] - XMPYU fht_0,fht_0,ht_temp ; ht[0] - XMPYU fht_1,fht_1,ht_temp_1 ; ht[1] - - FSTD ht_temp,-8(%sp) ; store ht[0] - FSTD ht_temp_1,-40(%sp) ; store ht[1] - LDD -24(%sp),m_0 - LDD -56(%sp),m_1 - - AND m_0,high_mask,tmp_0 ; m[0] & Mask - AND m_1,high_mask,tmp_1 ; m[1] & Mask - DEPD,Z m_0,30,31,m_0 ; m[0] << 32+1 - DEPD,Z m_1,30,31,m_1 ; m[1] << 32+1 - - LDD -16(%sp),lt_0 - LDD -48(%sp),lt_1 - EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m[0]&Mask >> 32-1 - EXTRD,U tmp_1,32,33,tmp_1 ; tmp_1 = m[1]&Mask >> 32-1 - - LDD -8(%sp),ht_0 - LDD -40(%sp),ht_1 - ADD,L ht_0,tmp_0,ht_0 ; ht[0] += tmp_0 - ADD,L ht_1,tmp_1,ht_1 ; ht[1] += tmp_1 - - ADD lt_0,m_0,lt_0 ; lt = lt+m - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - STD lt_0,0(r_ptr) ; rp[0] = lt[0] - STD ht_0,8(r_ptr) ; rp[1] = ht[1] - - ADD lt_1,m_1,lt_1 ; lt = lt+m - ADD,DC ht_1,%r0,ht_1 ; ht[1]++ - STD lt_1,16(r_ptr) ; rp[2] = lt[1] - STD ht_1,24(r_ptr) ; rp[3] = ht[1] - - LDO -2(num),num ; num = num - 2; - LDO 16(a_ptr),a_ptr ; ap += 2 - CMPIB,<= 2,num,bn_sqr_words_unroll2 - LDO 32(r_ptr),r_ptr ; rp += 4 - - CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done? - - ; - ; Top of loop aligned on 64-byte boundary - ; -bn_sqr_words_single_top - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - - XMPYU fht_0,flt_0,fm ; m - FSTD fm,-24(%sp) ; store m - - XMPYU flt_0,flt_0,lt_temp ; lt - FSTD lt_temp,-16(%sp) ; store lt - - XMPYU fht_0,fht_0,ht_temp ; ht - FSTD ht_temp,-8(%sp) ; store ht - - LDD -24(%sp),m_0 ; load m - AND m_0,high_mask,tmp_0 ; m & Mask - DEPD,Z m_0,30,31,m_0 ; m << 32+1 - LDD -16(%sp),lt_0 ; lt - - LDD -8(%sp),ht_0 ; ht - EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m&Mask >> 32-1 - ADD m_0,lt_0,lt_0 ; lt = lt+m - ADD,L ht_0,tmp_0,ht_0 ; ht += tmp_0 - ADD,DC ht_0,%r0,ht_0 ; ht++ - - STD lt_0,0(r_ptr) ; rp[0] = lt - STD ht_0,8(r_ptr) ; rp[1] = ht - -bn_sqr_words_exit - .EXIT - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - .PROCEND ;in=23,24,25,26,29;out=28; - - -;---------------------------------------------------------------------------- -; -;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) -; -; arg0 = rp -; arg1 = ap -; arg2 = bp -; arg3 = n - -t .reg %r22 -b .reg %r21 -l .reg %r20 - -bn_add_words - .proc - .entry - .callinfo - .EXPORT bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .align 64 - - CMPIB,>= 0,n,bn_add_words_exit - COPY %r0,%ret1 ; return 0 by default - - ; - ; If 2 or more numbers do the loop - ; - CMPIB,= 1,n,bn_add_words_single_top - NOP - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; -bn_add_words_unroll2 - LDD 0(a_ptr),t - LDD 0(b_ptr),b - ADD t,%ret1,t ; t = t+c; - ADD,DC %r0,%r0,%ret1 ; set c to carry - ADD t,b,l ; l = t + b[0] - ADD,DC %ret1,%r0,%ret1 ; c+= carry - STD l,0(r_ptr) - - LDD 8(a_ptr),t - LDD 8(b_ptr),b - ADD t,%ret1,t ; t = t+c; - ADD,DC %r0,%r0,%ret1 ; set c to carry - ADD t,b,l ; l = t + b[0] - ADD,DC %ret1,%r0,%ret1 ; c+= carry - STD l,8(r_ptr) - - LDO -2(n),n - LDO 16(a_ptr),a_ptr - LDO 16(b_ptr),b_ptr - - CMPIB,<= 2,n,bn_add_words_unroll2 - LDO 16(r_ptr),r_ptr - - CMPIB,=,N 0,n,bn_add_words_exit ; are we done? - -bn_add_words_single_top - LDD 0(a_ptr),t - LDD 0(b_ptr),b - - ADD t,%ret1,t ; t = t+c; - ADD,DC %r0,%r0,%ret1 ; set c to carry (could use CMPCLR??) - ADD t,b,l ; l = t + b[0] - ADD,DC %ret1,%r0,%ret1 ; c+= carry - STD l,0(r_ptr) - -bn_add_words_exit - .EXIT - BVE (%rp) - EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 - .PROCEND ;in=23,24,25,26,29;out=28; - -;---------------------------------------------------------------------------- -; -;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) -; -; arg0 = rp -; arg1 = ap -; arg2 = bp -; arg3 = n - -t1 .reg %r22 -t2 .reg %r21 -sub_tmp1 .reg %r20 -sub_tmp2 .reg %r19 - - -bn_sub_words - .proc - .callinfo - .EXPORT bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - CMPIB,>= 0,n,bn_sub_words_exit - COPY %r0,%ret1 ; return 0 by default - - ; - ; If 2 or more numbers do the loop - ; - CMPIB,= 1,n,bn_sub_words_single_top - NOP - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; -bn_sub_words_unroll2 - LDD 0(a_ptr),t1 - LDD 0(b_ptr),t2 - SUB t1,t2,sub_tmp1 ; t3 = t1-t2; - SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c; - - CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 - LDO 1(%r0),sub_tmp2 - - CMPCLR,*= t1,t2,%r0 - COPY sub_tmp2,%ret1 - STD sub_tmp1,0(r_ptr) - - LDD 8(a_ptr),t1 - LDD 8(b_ptr),t2 - SUB t1,t2,sub_tmp1 ; t3 = t1-t2; - SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c; - CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 - LDO 1(%r0),sub_tmp2 - - CMPCLR,*= t1,t2,%r0 - COPY sub_tmp2,%ret1 - STD sub_tmp1,8(r_ptr) - - LDO -2(n),n - LDO 16(a_ptr),a_ptr - LDO 16(b_ptr),b_ptr - - CMPIB,<= 2,n,bn_sub_words_unroll2 - LDO 16(r_ptr),r_ptr - - CMPIB,=,N 0,n,bn_sub_words_exit ; are we done? - -bn_sub_words_single_top - LDD 0(a_ptr),t1 - LDD 0(b_ptr),t2 - SUB t1,t2,sub_tmp1 ; t3 = t1-t2; - SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c; - CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 - LDO 1(%r0),sub_tmp2 - - CMPCLR,*= t1,t2,%r0 - COPY sub_tmp2,%ret1 - - STD sub_tmp1,0(r_ptr) - -bn_sub_words_exit - .EXIT - BVE (%rp) - EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 - .PROCEND ;in=23,24,25,26,29;out=28; - -;------------------------------------------------------------------------------ -; -; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d) -; -; arg0 = h -; arg1 = l -; arg2 = d -; -; This is mainly just output from the HP C compiler. -; -;------------------------------------------------------------------------------ -bn_div_words - .PROC - .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR,LONG_RETURN - .IMPORT BN_num_bits_word,CODE - ;--- not PIC .IMPORT __iob,DATA - ;--- not PIC .IMPORT fprintf,CODE - .IMPORT abort,CODE - .IMPORT $$div2U,MILLICODE - .CALLINFO CALLER,FRAME=144,ENTRY_GR=%r9,SAVE_RP,ARGS_SAVED,ORDERING_AWARE - .ENTRY - STW %r2,-20(%r30) ;offset 0x8ec - STW,MA %r3,192(%r30) ;offset 0x8f0 - STW %r4,-188(%r30) ;offset 0x8f4 - DEPD %r5,31,32,%r6 ;offset 0x8f8 - STD %r6,-184(%r30) ;offset 0x8fc - DEPD %r7,31,32,%r8 ;offset 0x900 - STD %r8,-176(%r30) ;offset 0x904 - STW %r9,-168(%r30) ;offset 0x908 - LDD -248(%r30),%r3 ;offset 0x90c - COPY %r26,%r4 ;offset 0x910 - COPY %r24,%r5 ;offset 0x914 - DEPD %r25,31,32,%r4 ;offset 0x918 - CMPB,*<> %r3,%r0,$0006000C ;offset 0x91c - DEPD %r23,31,32,%r5 ;offset 0x920 - MOVIB,TR -1,%r29,$00060002 ;offset 0x924 - EXTRD,U %r29,31,32,%r28 ;offset 0x928 -$0006002A - LDO -1(%r29),%r29 ;offset 0x92c - SUB %r23,%r7,%r23 ;offset 0x930 -$00060024 - SUB %r4,%r31,%r25 ;offset 0x934 - AND %r25,%r19,%r26 ;offset 0x938 - CMPB,*<>,N %r0,%r26,$00060046 ;offset 0x93c - DEPD,Z %r25,31,32,%r20 ;offset 0x940 - OR %r20,%r24,%r21 ;offset 0x944 - CMPB,*<<,N %r21,%r23,$0006002A ;offset 0x948 - SUB %r31,%r2,%r31 ;offset 0x94c -$00060046 -$0006002E - DEPD,Z %r23,31,32,%r25 ;offset 0x950 - EXTRD,U %r23,31,32,%r26 ;offset 0x954 - AND %r25,%r19,%r24 ;offset 0x958 - ADD,L %r31,%r26,%r31 ;offset 0x95c - CMPCLR,*>>= %r5,%r24,%r0 ;offset 0x960 - LDO 1(%r31),%r31 ;offset 0x964 -$00060032 - CMPB,*<<=,N %r31,%r4,$00060036 ;offset 0x968 - LDO -1(%r29),%r29 ;offset 0x96c - ADD,L %r4,%r3,%r4 ;offset 0x970 -$00060036 - ADDIB,=,N -1,%r8,$D0 ;offset 0x974 - SUB %r5,%r24,%r28 ;offset 0x978 -$0006003A - SUB %r4,%r31,%r24 ;offset 0x97c - SHRPD %r24,%r28,32,%r4 ;offset 0x980 - DEPD,Z %r29,31,32,%r9 ;offset 0x984 - DEPD,Z %r28,31,32,%r5 ;offset 0x988 -$0006001C - EXTRD,U %r4,31,32,%r31 ;offset 0x98c - CMPB,*<>,N %r31,%r2,$00060020 ;offset 0x990 - MOVB,TR %r6,%r29,$D1 ;offset 0x994 - STD %r29,-152(%r30) ;offset 0x998 -$0006000C - EXTRD,U %r3,31,32,%r25 ;offset 0x99c - COPY %r3,%r26 ;offset 0x9a0 - EXTRD,U %r3,31,32,%r9 ;offset 0x9a4 - EXTRD,U %r4,31,32,%r8 ;offset 0x9a8 - .CALL ARGW0=GR,ARGW1=GR,RTNVAL=GR ;in=25,26;out=28; - B,L BN_num_bits_word,%r2 ;offset 0x9ac - EXTRD,U %r5,31,32,%r7 ;offset 0x9b0 - LDI 64,%r20 ;offset 0x9b4 - DEPD %r7,31,32,%r5 ;offset 0x9b8 - DEPD %r8,31,32,%r4 ;offset 0x9bc - DEPD %r9,31,32,%r3 ;offset 0x9c0 - CMPB,= %r28,%r20,$00060012 ;offset 0x9c4 - COPY %r28,%r24 ;offset 0x9c8 - MTSARCM %r24 ;offset 0x9cc - DEPDI,Z -1,%sar,1,%r19 ;offset 0x9d0 - CMPB,*>>,N %r4,%r19,$D2 ;offset 0x9d4 -$00060012 - SUBI 64,%r24,%r31 ;offset 0x9d8 - CMPCLR,*<< %r4,%r3,%r0 ;offset 0x9dc - SUB %r4,%r3,%r4 ;offset 0x9e0 -$00060016 - CMPB,= %r31,%r0,$0006001A ;offset 0x9e4 - COPY %r0,%r9 ;offset 0x9e8 - MTSARCM %r31 ;offset 0x9ec - DEPD,Z %r3,%sar,64,%r3 ;offset 0x9f0 - SUBI 64,%r31,%r26 ;offset 0x9f4 - MTSAR %r26 ;offset 0x9f8 - SHRPD %r4,%r5,%sar,%r4 ;offset 0x9fc - MTSARCM %r31 ;offset 0xa00 - DEPD,Z %r5,%sar,64,%r5 ;offset 0xa04 -$0006001A - DEPDI,Z -1,31,32,%r19 ;offset 0xa08 - AND %r3,%r19,%r29 ;offset 0xa0c - EXTRD,U %r29,31,32,%r2 ;offset 0xa10 - DEPDI,Z -1,63,32,%r6 ;offset 0xa14 - MOVIB,TR 2,%r8,$0006001C ;offset 0xa18 - EXTRD,U %r3,63,32,%r7 ;offset 0xa1c -$D2 - ;--- not PIC ADDIL LR'__iob-$global$,%r27,%r1 ;offset 0xa20 - ;--- not PIC LDIL LR'C$7,%r21 ;offset 0xa24 - ;--- not PIC LDO RR'__iob-$global$+32(%r1),%r26 ;offset 0xa28 - ;--- not PIC .CALL ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR ;in=24,25,26;out=28; - ;--- not PIC B,L fprintf,%r2 ;offset 0xa2c - ;--- not PIC LDO RR'C$7(%r21),%r25 ;offset 0xa30 - .CALL ; - B,L abort,%r2 ;offset 0xa34 - NOP ;offset 0xa38 - B $D3 ;offset 0xa3c - LDW -212(%r30),%r2 ;offset 0xa40 -$00060020 - COPY %r4,%r26 ;offset 0xa44 - EXTRD,U %r4,31,32,%r25 ;offset 0xa48 - COPY %r2,%r24 ;offset 0xa4c - .CALL ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL) - B,L $$div2U,%r31 ;offset 0xa50 - EXTRD,U %r2,31,32,%r23 ;offset 0xa54 - DEPD %r28,31,32,%r29 ;offset 0xa58 -$00060022 - STD %r29,-152(%r30) ;offset 0xa5c -$D1 - AND %r5,%r19,%r24 ;offset 0xa60 - EXTRD,U %r24,31,32,%r24 ;offset 0xa64 - STW %r2,-160(%r30) ;offset 0xa68 - STW %r7,-128(%r30) ;offset 0xa6c - FLDD -152(%r30),%fr4 ;offset 0xa70 - FLDD -152(%r30),%fr7 ;offset 0xa74 - FLDW -160(%r30),%fr8L ;offset 0xa78 - FLDW -128(%r30),%fr5L ;offset 0xa7c - XMPYU %fr8L,%fr7L,%fr10 ;offset 0xa80 - FSTD %fr10,-136(%r30) ;offset 0xa84 - XMPYU %fr8L,%fr7R,%fr22 ;offset 0xa88 - FSTD %fr22,-144(%r30) ;offset 0xa8c - XMPYU %fr5L,%fr4L,%fr11 ;offset 0xa90 - XMPYU %fr5L,%fr4R,%fr23 ;offset 0xa94 - FSTD %fr11,-112(%r30) ;offset 0xa98 - FSTD %fr23,-120(%r30) ;offset 0xa9c - LDD -136(%r30),%r28 ;offset 0xaa0 - DEPD,Z %r28,31,32,%r31 ;offset 0xaa4 - LDD -144(%r30),%r20 ;offset 0xaa8 - ADD,L %r20,%r31,%r31 ;offset 0xaac - LDD -112(%r30),%r22 ;offset 0xab0 - DEPD,Z %r22,31,32,%r22 ;offset 0xab4 - LDD -120(%r30),%r21 ;offset 0xab8 - B $00060024 ;offset 0xabc - ADD,L %r21,%r22,%r23 ;offset 0xac0 -$D0 - OR %r9,%r29,%r29 ;offset 0xac4 -$00060040 - EXTRD,U %r29,31,32,%r28 ;offset 0xac8 -$00060002 -$L2 - LDW -212(%r30),%r2 ;offset 0xacc -$D3 - LDW -168(%r30),%r9 ;offset 0xad0 - LDD -176(%r30),%r8 ;offset 0xad4 - EXTRD,U %r8,31,32,%r7 ;offset 0xad8 - LDD -184(%r30),%r6 ;offset 0xadc - EXTRD,U %r6,31,32,%r5 ;offset 0xae0 - LDW -188(%r30),%r4 ;offset 0xae4 - BVE (%r2) ;offset 0xae8 - .EXIT - LDW,MB -192(%r30),%r3 ;offset 0xaec - .PROCEND ;in=23,25;out=28,29;fpin=105,107; - - - - -;---------------------------------------------------------------------------- -; -; Registers to hold 64-bit values to manipulate. The "L" part -; of the register corresponds to the upper 32-bits, while the "R" -; part corresponds to the lower 32-bits -; -; Note, that when using b6 and b7, the code must save these before -; using them because they are callee save registers -; -; -; Floating point registers to use to save values that -; are manipulated. These don't collide with ftemp1-6 and -; are all caller save registers -; -a0 .reg %fr22 -a0L .reg %fr22L -a0R .reg %fr22R - -a1 .reg %fr23 -a1L .reg %fr23L -a1R .reg %fr23R - -a2 .reg %fr24 -a2L .reg %fr24L -a2R .reg %fr24R - -a3 .reg %fr25 -a3L .reg %fr25L -a3R .reg %fr25R - -a4 .reg %fr26 -a4L .reg %fr26L -a4R .reg %fr26R - -a5 .reg %fr27 -a5L .reg %fr27L -a5R .reg %fr27R - -a6 .reg %fr28 -a6L .reg %fr28L -a6R .reg %fr28R - -a7 .reg %fr29 -a7L .reg %fr29L -a7R .reg %fr29R - -b0 .reg %fr30 -b0L .reg %fr30L -b0R .reg %fr30R - -b1 .reg %fr31 -b1L .reg %fr31L -b1R .reg %fr31R - -; -; Temporary floating point variables, these are all caller save -; registers -; -ftemp1 .reg %fr4 -ftemp2 .reg %fr5 -ftemp3 .reg %fr6 -ftemp4 .reg %fr7 - -; -; The B set of registers when used. -; - -b2 .reg %fr8 -b2L .reg %fr8L -b2R .reg %fr8R - -b3 .reg %fr9 -b3L .reg %fr9L -b3R .reg %fr9R - -b4 .reg %fr10 -b4L .reg %fr10L -b4R .reg %fr10R - -b5 .reg %fr11 -b5L .reg %fr11L -b5R .reg %fr11R - -b6 .reg %fr12 -b6L .reg %fr12L -b6R .reg %fr12R - -b7 .reg %fr13 -b7L .reg %fr13L -b7R .reg %fr13R - -c1 .reg %r21 ; only reg -temp1 .reg %r20 ; only reg -temp2 .reg %r19 ; only reg -temp3 .reg %r31 ; only reg - -m1 .reg %r28 -c2 .reg %r23 -high_one .reg %r1 -ht .reg %r6 -lt .reg %r5 -m .reg %r4 -c3 .reg %r3 - -SQR_ADD_C .macro A0L,A0R,C1,C2,C3 - XMPYU A0L,A0R,ftemp1 ; m - FSTD ftemp1,-24(%sp) ; store m - - XMPYU A0R,A0R,ftemp2 ; lt - FSTD ftemp2,-16(%sp) ; store lt - - XMPYU A0L,A0L,ftemp3 ; ht - FSTD ftemp3,-8(%sp) ; store ht - - LDD -24(%sp),m ; load m - AND m,high_mask,temp2 ; m & Mask - DEPD,Z m,30,31,temp3 ; m << 32+1 - LDD -16(%sp),lt ; lt - - LDD -8(%sp),ht ; ht - EXTRD,U temp2,32,33,temp1 ; temp1 = m&Mask >> 32-1 - ADD temp3,lt,lt ; lt = lt+m - ADD,L ht,temp1,ht ; ht += temp1 - ADD,DC ht,%r0,ht ; ht++ - - ADD C1,lt,C1 ; c1=c1+lt - ADD,DC ht,%r0,ht ; ht++ - - ADD C2,ht,C2 ; c2=c2+ht - ADD,DC C3,%r0,C3 ; c3++ -.endm - -SQR_ADD_C2 .macro A0L,A0R,A1L,A1R,C1,C2,C3 - XMPYU A0L,A1R,ftemp1 ; m1 = bl*ht - FSTD ftemp1,-16(%sp) ; - XMPYU A0R,A1L,ftemp2 ; m = bh*lt - FSTD ftemp2,-8(%sp) ; - XMPYU A0R,A1R,ftemp3 ; lt = bl*lt - FSTD ftemp3,-32(%sp) - XMPYU A0L,A1L,ftemp4 ; ht = bh*ht - FSTD ftemp4,-24(%sp) ; - - LDD -8(%sp),m ; r21 = m - LDD -16(%sp),m1 ; r19 = m1 - ADD,L m,m1,m ; m+m1 - - DEPD,Z m,31,32,temp3 ; (m+m1<<32) - LDD -24(%sp),ht ; r24 = ht - - CMPCLR,*>>= m,m1,%r0 ; if (m < m1) - ADD,L ht,high_one,ht ; ht+=high_one - - EXTRD,U m,31,32,temp1 ; m >> 32 - LDD -32(%sp),lt ; lt - ADD,L ht,temp1,ht ; ht+= m>>32 - ADD lt,temp3,lt ; lt = lt+m1 - ADD,DC ht,%r0,ht ; ht++ - - ADD ht,ht,ht ; ht=ht+ht; - ADD,DC C3,%r0,C3 ; add in carry (c3++) - - ADD lt,lt,lt ; lt=lt+lt; - ADD,DC ht,%r0,ht ; add in carry (ht++) - - ADD C1,lt,C1 ; c1=c1+lt - ADD,DC,*NUV ht,%r0,ht ; add in carry (ht++) - LDO 1(C3),C3 ; bump c3 if overflow,nullify otherwise - - ADD C2,ht,C2 ; c2 = c2 + ht - ADD,DC C3,%r0,C3 ; add in carry (c3++) -.endm - -; -;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) -; arg0 = r_ptr -; arg1 = a_ptr -; - -bn_sqr_comba8 - .PROC - .CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .ENTRY - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - FLDD 32(a_ptr),a4 - FLDD 40(a_ptr),a5 - FLDD 48(a_ptr),a6 - FLDD 56(a_ptr),a7 - - SQR_ADD_C a0L,a0R,c1,c2,c3 - STD c1,0(r_ptr) ; r[0] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 - STD c2,8(r_ptr) ; r[1] = c2; - COPY %r0,c2 - - SQR_ADD_C a1L,a1R,c3,c1,c2 - SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 - STD c3,16(r_ptr) ; r[2] = c3; - COPY %r0,c3 - - SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 - SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 - STD c1,24(r_ptr) ; r[3] = c1; - COPY %r0,c1 - - SQR_ADD_C a2L,a2R,c2,c3,c1 - SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 - SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1 - STD c2,32(r_ptr) ; r[4] = c2; - COPY %r0,c2 - - SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2 - SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2 - SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 - STD c3,40(r_ptr) ; r[5] = c3; - COPY %r0,c3 - - SQR_ADD_C a3L,a3R,c1,c2,c3 - SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3 - SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3 - SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3 - STD c1,48(r_ptr) ; r[6] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1 - SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1 - SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1 - SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1 - STD c2,56(r_ptr) ; r[7] = c2; - COPY %r0,c2 - - SQR_ADD_C a4L,a4R,c3,c1,c2 - SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2 - SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2 - SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2 - STD c3,64(r_ptr) ; r[8] = c3; - COPY %r0,c3 - - SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3 - SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3 - SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3 - STD c1,72(r_ptr) ; r[9] = c1; - COPY %r0,c1 - - SQR_ADD_C a5L,a5R,c2,c3,c1 - SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1 - SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1 - STD c2,80(r_ptr) ; r[10] = c2; - COPY %r0,c2 - - SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2 - SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2 - STD c3,88(r_ptr) ; r[11] = c3; - COPY %r0,c3 - - SQR_ADD_C a6L,a6R,c1,c2,c3 - SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3 - STD c1,96(r_ptr) ; r[12] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1 - STD c2,104(r_ptr) ; r[13] = c2; - COPY %r0,c2 - - SQR_ADD_C a7L,a7R,c3,c1,c2 - STD c3, 112(r_ptr) ; r[14] = c3 - STD c1, 120(r_ptr) ; r[15] = c1 - - .EXIT - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - -;----------------------------------------------------------------------------- -; -;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) -; arg0 = r_ptr -; arg1 = a_ptr -; - -bn_sqr_comba4 - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - FLDD 32(a_ptr),a4 - FLDD 40(a_ptr),a5 - FLDD 48(a_ptr),a6 - FLDD 56(a_ptr),a7 - - SQR_ADD_C a0L,a0R,c1,c2,c3 - - STD c1,0(r_ptr) ; r[0] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 - - STD c2,8(r_ptr) ; r[1] = c2; - COPY %r0,c2 - - SQR_ADD_C a1L,a1R,c3,c1,c2 - SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 - - STD c3,16(r_ptr) ; r[2] = c3; - COPY %r0,c3 - - SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 - SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 - - STD c1,24(r_ptr) ; r[3] = c1; - COPY %r0,c1 - - SQR_ADD_C a2L,a2R,c2,c3,c1 - SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 - - STD c2,32(r_ptr) ; r[4] = c2; - COPY %r0,c2 - - SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 - STD c3,40(r_ptr) ; r[5] = c3; - COPY %r0,c3 - - SQR_ADD_C a3L,a3R,c1,c2,c3 - STD c1,48(r_ptr) ; r[6] = c1; - STD c2,56(r_ptr) ; r[7] = c2; - - .EXIT - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - - -;--------------------------------------------------------------------------- - -MUL_ADD_C .macro A0L,A0R,B0L,B0R,C1,C2,C3 - XMPYU A0L,B0R,ftemp1 ; m1 = bl*ht - FSTD ftemp1,-16(%sp) ; - XMPYU A0R,B0L,ftemp2 ; m = bh*lt - FSTD ftemp2,-8(%sp) ; - XMPYU A0R,B0R,ftemp3 ; lt = bl*lt - FSTD ftemp3,-32(%sp) - XMPYU A0L,B0L,ftemp4 ; ht = bh*ht - FSTD ftemp4,-24(%sp) ; - - LDD -8(%sp),m ; r21 = m - LDD -16(%sp),m1 ; r19 = m1 - ADD,L m,m1,m ; m+m1 - - DEPD,Z m,31,32,temp3 ; (m+m1<<32) - LDD -24(%sp),ht ; r24 = ht - - CMPCLR,*>>= m,m1,%r0 ; if (m < m1) - ADD,L ht,high_one,ht ; ht+=high_one - - EXTRD,U m,31,32,temp1 ; m >> 32 - LDD -32(%sp),lt ; lt - ADD,L ht,temp1,ht ; ht+= m>>32 - ADD lt,temp3,lt ; lt = lt+m1 - ADD,DC ht,%r0,ht ; ht++ - - ADD C1,lt,C1 ; c1=c1+lt - ADD,DC ht,%r0,ht ; bump c3 if overflow,nullify otherwise - - ADD C2,ht,C2 ; c2 = c2 + ht - ADD,DC C3,%r0,C3 ; add in carry (c3++) -.endm - - -; -;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) -; arg0 = r_ptr -; arg1 = a_ptr -; arg2 = b_ptr -; - -bn_mul_comba8 - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - FSTD %fr12,32(%sp) ; save r6 - FSTD %fr13,40(%sp) ; save r7 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - FLDD 32(a_ptr),a4 - FLDD 40(a_ptr),a5 - FLDD 48(a_ptr),a6 - FLDD 56(a_ptr),a7 - - FLDD 0(b_ptr),b0 - FLDD 8(b_ptr),b1 - FLDD 16(b_ptr),b2 - FLDD 24(b_ptr),b3 - FLDD 32(b_ptr),b4 - FLDD 40(b_ptr),b5 - FLDD 48(b_ptr),b6 - FLDD 56(b_ptr),b7 - - MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 - STD c1,0(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 - STD c2,8(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 - STD c3,16(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 - MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 - STD c1,24(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1 - MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 - MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1 - STD c2,32(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2 - MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 - MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 - MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2 - STD c3,40(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3 - MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3 - MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3 - MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3 - MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3 - STD c1,48(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1 - MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1 - MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1 - MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1 - MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1 - MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1 - STD c2,56(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2 - MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2 - MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2 - MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2 - MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2 - STD c3,64(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3 - MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3 - MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3 - MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3 - STD c1,72(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1 - MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1 - MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1 - MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1 - MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1 - STD c2,80(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2 - MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2 - MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2 - MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2 - STD c3,88(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3 - MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3 - MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3 - STD c1,96(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1 - MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1 - STD c2,104(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2 - STD c3,112(r_ptr) - STD c1,120(r_ptr) - - .EXIT - FLDD -88(%sp),%fr13 - FLDD -96(%sp),%fr12 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - -;----------------------------------------------------------------------------- -; -;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) -; arg0 = r_ptr -; arg1 = a_ptr -; arg2 = b_ptr -; - -bn_mul_comba4 - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - FSTD %fr12,32(%sp) ; save r6 - FSTD %fr13,40(%sp) ; save r7 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - - FLDD 0(b_ptr),b0 - FLDD 8(b_ptr),b1 - FLDD 16(b_ptr),b2 - FLDD 24(b_ptr),b3 - - MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 - STD c1,0(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 - STD c2,8(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 - STD c3,16(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 - MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 - STD c1,24(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 - STD c2,32(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 - MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 - STD c3,40(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 - STD c1,48(r_ptr) - STD c2,56(r_ptr) - - .EXIT - FLDD -88(%sp),%fr13 - FLDD -96(%sp),%fr12 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - - -;--- not PIC .SPACE $TEXT$ -;--- not PIC .SUBSPA $CODE$ -;--- not PIC .SPACE $PRIVATE$,SORT=16 -;--- not PIC .IMPORT $global$,DATA -;--- not PIC .SPACE $TEXT$ -;--- not PIC .SUBSPA $CODE$ -;--- not PIC .SUBSPA $LIT$,ACCESS=0x2c -;--- not PIC C$7 -;--- not PIC .ALIGN 8 -;--- not PIC .STRINGZ "Division would overflow (%d)\n" - .END diff --git a/deps/openssl/openssl/crypto/bn/asm/pa-risc2W.s b/deps/openssl/openssl/crypto/bn/asm/pa-risc2W.s deleted file mode 100644 index 97381172e7..0000000000 --- a/deps/openssl/openssl/crypto/bn/asm/pa-risc2W.s +++ /dev/null @@ -1,1612 +0,0 @@ -; Copyright 2000-2016 The OpenSSL Project Authors. All Rights Reserved. -; -; Licensed under the OpenSSL license (the "License"). You may not use -; this file except in compliance with the License. You can obtain a copy -; in the file LICENSE in the source distribution or at -; https://www.openssl.org/source/license.html - -; -; PA-RISC 64-bit implementation of bn_asm code -; -; This code is approximately 2x faster than the C version -; for RSA/DSA. -; -; See http://devresource.hp.com/ for more details on the PA-RISC -; architecture. Also see the book "PA-RISC 2.0 Architecture" -; by Gerry Kane for information on the instruction set architecture. -; -; Code written by Chris Ruemmler (with some help from the HP C -; compiler). -; -; The code compiles with HP's assembler -; - - .level 2.0W - .space $TEXT$ - .subspa $CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY - -; -; Global Register definitions used for the routines. -; -; Some information about HP's runtime architecture for 64-bits. -; -; "Caller save" means the calling function must save the register -; if it wants the register to be preserved. -; "Callee save" means if a function uses the register, it must save -; the value before using it. -; -; For the floating point registers -; -; "caller save" registers: fr4-fr11, fr22-fr31 -; "callee save" registers: fr12-fr21 -; "special" registers: fr0-fr3 (status and exception registers) -; -; For the integer registers -; value zero : r0 -; "caller save" registers: r1,r19-r26 -; "callee save" registers: r3-r18 -; return register : r2 (rp) -; return values ; r28 (ret0,ret1) -; Stack pointer ; r30 (sp) -; global data pointer ; r27 (dp) -; argument pointer ; r29 (ap) -; millicode return ptr ; r31 (also a caller save register) - - -; -; Arguments to the routines -; -r_ptr .reg %r26 -a_ptr .reg %r25 -b_ptr .reg %r24 -num .reg %r24 -w .reg %r23 -n .reg %r23 - - -; -; Globals used in some routines -; - -top_overflow .reg %r29 -high_mask .reg %r22 ; value 0xffffffff80000000L - - -;------------------------------------------------------------------------------ -; -; bn_mul_add_words -; -;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr, -; int num, BN_ULONG w) -; -; arg0 = r_ptr -; arg1 = a_ptr -; arg2 = num -; arg3 = w -; -; Local register definitions -; - -fm1 .reg %fr22 -fm .reg %fr23 -ht_temp .reg %fr24 -ht_temp_1 .reg %fr25 -lt_temp .reg %fr26 -lt_temp_1 .reg %fr27 -fm1_1 .reg %fr28 -fm_1 .reg %fr29 - -fw_h .reg %fr7L -fw_l .reg %fr7R -fw .reg %fr7 - -fht_0 .reg %fr8L -flt_0 .reg %fr8R -t_float_0 .reg %fr8 - -fht_1 .reg %fr9L -flt_1 .reg %fr9R -t_float_1 .reg %fr9 - -tmp_0 .reg %r31 -tmp_1 .reg %r21 -m_0 .reg %r20 -m_1 .reg %r19 -ht_0 .reg %r1 -ht_1 .reg %r3 -lt_0 .reg %r4 -lt_1 .reg %r5 -m1_0 .reg %r6 -m1_1 .reg %r7 -rp_val .reg %r8 -rp_val_1 .reg %r9 - -bn_mul_add_words - .export bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN - .proc - .callinfo frame=128 - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - NOP ; Needed to make the loop 16-byte aligned - NOP ; Needed to make the loop 16-byte aligned - - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - STD %r7,32(%sp) ; save r7 - STD %r8,40(%sp) ; save r8 - - STD %r9,48(%sp) ; save r9 - COPY %r0,%ret0 ; return 0 by default - DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 - STD w,56(%sp) ; store w on stack - - CMPIB,>= 0,num,bn_mul_add_words_exit ; if (num <= 0) then exit - LDO 128(%sp),%sp ; bump stack - - ; - ; The loop is unrolled twice, so if there is only 1 number - ; then go straight to the cleanup code. - ; - CMPIB,= 1,num,bn_mul_add_words_single_top - FLDD -72(%sp),fw ; load up w into fp register fw (fw_h/fw_l) - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; - ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus - ; two 32-bit mutiplies can be issued per cycle. - ; -bn_mul_add_words_unroll2 - - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) - LDD 0(r_ptr),rp_val ; rp[0] - LDD 8(r_ptr),rp_val_1 ; rp[1] - - XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l - XMPYU fht_1,fw_l,fm1_1 ; m1[1] = fht_1*fw_l - FSTD fm1,-16(%sp) ; -16(sp) = m1[0] - FSTD fm1_1,-48(%sp) ; -48(sp) = m1[1] - - XMPYU flt_0,fw_h,fm ; m[0] = flt_0*fw_h - XMPYU flt_1,fw_h,fm_1 ; m[1] = flt_1*fw_h - FSTD fm,-8(%sp) ; -8(sp) = m[0] - FSTD fm_1,-40(%sp) ; -40(sp) = m[1] - - XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h - XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp_1 = fht_1*fw_h - FSTD ht_temp,-24(%sp) ; -24(sp) = ht_temp - FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht_temp_1 - - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l - FSTD lt_temp,-32(%sp) ; -32(sp) = lt_temp - FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt_temp_1 - - LDD -8(%sp),m_0 ; m[0] - LDD -40(%sp),m_1 ; m[1] - LDD -16(%sp),m1_0 ; m1[0] - LDD -48(%sp),m1_1 ; m1[1] - - LDD -24(%sp),ht_0 ; ht[0] - LDD -56(%sp),ht_1 ; ht[1] - ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m[0] + m1[0]; - ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m[1] + m1[1]; - - LDD -32(%sp),lt_0 - LDD -64(%sp),lt_1 - CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m[0] < m1[0]) - ADD,L ht_0,top_overflow,ht_0 ; ht[0] += (1<<32) - - CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m[1] < m1[1]) - ADD,L ht_1,top_overflow,ht_1 ; ht[1] += (1<<32) - EXTRD,U tmp_0,31,32,m_0 ; m[0]>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1[0] = m[0]<<32 - - EXTRD,U tmp_1,31,32,m_1 ; m[1]>>32 - DEPD,Z tmp_1,31,32,m1_1 ; m1[1] = m[1]<<32 - ADD,L ht_0,m_0,ht_0 ; ht[0]+= (m[0]>>32) - ADD,L ht_1,m_1,ht_1 ; ht[1]+= (m[1]>>32) - - ADD lt_0,m1_0,lt_0 ; lt[0] = lt[0]+m1[0]; - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - ADD lt_1,m1_1,lt_1 ; lt[1] = lt[1]+m1[1]; - ADD,DC ht_1,%r0,ht_1 ; ht[1]++ - - ADD %ret0,lt_0,lt_0 ; lt[0] = lt[0] + c; - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - ADD lt_0,rp_val,lt_0 ; lt[0] = lt[0]+rp[0] - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - - LDO -2(num),num ; num = num - 2; - ADD ht_0,lt_1,lt_1 ; lt[1] = lt[1] + ht_0 (c); - ADD,DC ht_1,%r0,ht_1 ; ht[1]++ - STD lt_0,0(r_ptr) ; rp[0] = lt[0] - - ADD lt_1,rp_val_1,lt_1 ; lt[1] = lt[1]+rp[1] - ADD,DC ht_1,%r0,%ret0 ; ht[1]++ - LDO 16(a_ptr),a_ptr ; a_ptr += 2 - - STD lt_1,8(r_ptr) ; rp[1] = lt[1] - CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do - LDO 16(r_ptr),r_ptr ; r_ptr += 2 - - CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one - - ; - ; Top of loop aligned on 64-byte boundary - ; -bn_mul_add_words_single_top - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - LDD 0(r_ptr),rp_val ; rp[0] - LDO 8(a_ptr),a_ptr ; a_ptr++ - XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l - FSTD fm1,-16(%sp) ; -16(sp) = m1 - XMPYU flt_0,fw_h,fm ; m = lt*fw_h - FSTD fm,-8(%sp) ; -8(sp) = m - XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h - FSTD ht_temp,-24(%sp) ; -24(sp) = ht - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - FSTD lt_temp,-32(%sp) ; -32(sp) = lt - - LDD -8(%sp),m_0 - LDD -16(%sp),m1_0 ; m1 = temp1 - ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; - LDD -24(%sp),ht_0 - LDD -32(%sp),lt_0 - - CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) - ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) - - EXTRD,U tmp_0,31,32,m_0 ; m>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 - - ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) - ADD lt_0,m1_0,tmp_0 ; tmp_0 = lt+m1; - ADD,DC ht_0,%r0,ht_0 ; ht++ - ADD %ret0,tmp_0,lt_0 ; lt = lt + c; - ADD,DC ht_0,%r0,ht_0 ; ht++ - ADD lt_0,rp_val,lt_0 ; lt = lt+rp[0] - ADD,DC ht_0,%r0,%ret0 ; ht++ - STD lt_0,0(r_ptr) ; rp[0] = lt - -bn_mul_add_words_exit - .EXIT - LDD -80(%sp),%r9 ; restore r9 - LDD -88(%sp),%r8 ; restore r8 - LDD -96(%sp),%r7 ; restore r7 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 ; restore r3 - .PROCEND ;in=23,24,25,26,29;out=28; - -;---------------------------------------------------------------------------- -; -;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) -; -; arg0 = rp -; arg1 = ap -; arg2 = num -; arg3 = w - -bn_mul_words - .proc - .callinfo frame=128 - .entry - .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - - STD %r7,32(%sp) ; save r7 - COPY %r0,%ret0 ; return 0 by default - DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 - STD w,56(%sp) ; w on stack - - CMPIB,>= 0,num,bn_mul_words_exit - LDO 128(%sp),%sp ; bump stack - - ; - ; See if only 1 word to do, thus just do cleanup - ; - CMPIB,= 1,num,bn_mul_words_single_top - FLDD -72(%sp),fw ; load up w into fp register fw (fw_h/fw_l) - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; - ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus - ; two 32-bit mutiplies can be issued per cycle. - ; -bn_mul_words_unroll2 - - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) - XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l - XMPYU fht_1,fw_l,fm1_1 ; m1[1] = ht*fw_l - - FSTD fm1,-16(%sp) ; -16(sp) = m1 - FSTD fm1_1,-48(%sp) ; -48(sp) = m1 - XMPYU flt_0,fw_h,fm ; m = lt*fw_h - XMPYU flt_1,fw_h,fm_1 ; m = lt*fw_h - - FSTD fm,-8(%sp) ; -8(sp) = m - FSTD fm_1,-40(%sp) ; -40(sp) = m - XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h - XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp = ht*fw_h - - FSTD ht_temp,-24(%sp) ; -24(sp) = ht - FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l - - FSTD lt_temp,-32(%sp) ; -32(sp) = lt - FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt - LDD -8(%sp),m_0 - LDD -40(%sp),m_1 - - LDD -16(%sp),m1_0 - LDD -48(%sp),m1_1 - LDD -24(%sp),ht_0 - LDD -56(%sp),ht_1 - - ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m + m1; - ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m + m1; - LDD -32(%sp),lt_0 - LDD -64(%sp),lt_1 - - CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m < m1) - ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) - CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m < m1) - ADD,L ht_1,top_overflow,ht_1 ; ht += (1<<32) - - EXTRD,U tmp_0,31,32,m_0 ; m>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 - EXTRD,U tmp_1,31,32,m_1 ; m>>32 - DEPD,Z tmp_1,31,32,m1_1 ; m1 = m<<32 - - ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) - ADD,L ht_1,m_1,ht_1 ; ht+= (m>>32) - ADD lt_0,m1_0,lt_0 ; lt = lt+m1; - ADD,DC ht_0,%r0,ht_0 ; ht++ - - ADD lt_1,m1_1,lt_1 ; lt = lt+m1; - ADD,DC ht_1,%r0,ht_1 ; ht++ - ADD %ret0,lt_0,lt_0 ; lt = lt + c (ret0); - ADD,DC ht_0,%r0,ht_0 ; ht++ - - ADD ht_0,lt_1,lt_1 ; lt = lt + c (ht_0) - ADD,DC ht_1,%r0,ht_1 ; ht++ - STD lt_0,0(r_ptr) ; rp[0] = lt - STD lt_1,8(r_ptr) ; rp[1] = lt - - COPY ht_1,%ret0 ; carry = ht - LDO -2(num),num ; num = num - 2; - LDO 16(a_ptr),a_ptr ; ap += 2 - CMPIB,<= 2,num,bn_mul_words_unroll2 - LDO 16(r_ptr),r_ptr ; rp++ - - CMPIB,=,N 0,num,bn_mul_words_exit ; are we done? - - ; - ; Top of loop aligned on 64-byte boundary - ; -bn_mul_words_single_top - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - - XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l - FSTD fm1,-16(%sp) ; -16(sp) = m1 - XMPYU flt_0,fw_h,fm ; m = lt*fw_h - FSTD fm,-8(%sp) ; -8(sp) = m - XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h - FSTD ht_temp,-24(%sp) ; -24(sp) = ht - XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l - FSTD lt_temp,-32(%sp) ; -32(sp) = lt - - LDD -8(%sp),m_0 - LDD -16(%sp),m1_0 - ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; - LDD -24(%sp),ht_0 - LDD -32(%sp),lt_0 - - CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) - ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) - - EXTRD,U tmp_0,31,32,m_0 ; m>>32 - DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 - - ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) - ADD lt_0,m1_0,lt_0 ; lt= lt+m1; - ADD,DC ht_0,%r0,ht_0 ; ht++ - - ADD %ret0,lt_0,lt_0 ; lt = lt + c; - ADD,DC ht_0,%r0,ht_0 ; ht++ - - COPY ht_0,%ret0 ; copy carry - STD lt_0,0(r_ptr) ; rp[0] = lt - -bn_mul_words_exit - .EXIT - LDD -96(%sp),%r7 ; restore r7 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 ; restore r3 - .PROCEND ;in=23,24,25,26,29;out=28; - -;---------------------------------------------------------------------------- -; -;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num) -; -; arg0 = rp -; arg1 = ap -; arg2 = num -; - -bn_sqr_words - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - NOP - STD %r5,16(%sp) ; save r5 - - CMPIB,>= 0,num,bn_sqr_words_exit - LDO 128(%sp),%sp ; bump stack - - ; - ; If only 1, the goto straight to cleanup - ; - CMPIB,= 1,num,bn_sqr_words_single_top - DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; - -bn_sqr_words_unroll2 - FLDD 0(a_ptr),t_float_0 ; a[0] - FLDD 8(a_ptr),t_float_1 ; a[1] - XMPYU fht_0,flt_0,fm ; m[0] - XMPYU fht_1,flt_1,fm_1 ; m[1] - - FSTD fm,-24(%sp) ; store m[0] - FSTD fm_1,-56(%sp) ; store m[1] - XMPYU flt_0,flt_0,lt_temp ; lt[0] - XMPYU flt_1,flt_1,lt_temp_1 ; lt[1] - - FSTD lt_temp,-16(%sp) ; store lt[0] - FSTD lt_temp_1,-48(%sp) ; store lt[1] - XMPYU fht_0,fht_0,ht_temp ; ht[0] - XMPYU fht_1,fht_1,ht_temp_1 ; ht[1] - - FSTD ht_temp,-8(%sp) ; store ht[0] - FSTD ht_temp_1,-40(%sp) ; store ht[1] - LDD -24(%sp),m_0 - LDD -56(%sp),m_1 - - AND m_0,high_mask,tmp_0 ; m[0] & Mask - AND m_1,high_mask,tmp_1 ; m[1] & Mask - DEPD,Z m_0,30,31,m_0 ; m[0] << 32+1 - DEPD,Z m_1,30,31,m_1 ; m[1] << 32+1 - - LDD -16(%sp),lt_0 - LDD -48(%sp),lt_1 - EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m[0]&Mask >> 32-1 - EXTRD,U tmp_1,32,33,tmp_1 ; tmp_1 = m[1]&Mask >> 32-1 - - LDD -8(%sp),ht_0 - LDD -40(%sp),ht_1 - ADD,L ht_0,tmp_0,ht_0 ; ht[0] += tmp_0 - ADD,L ht_1,tmp_1,ht_1 ; ht[1] += tmp_1 - - ADD lt_0,m_0,lt_0 ; lt = lt+m - ADD,DC ht_0,%r0,ht_0 ; ht[0]++ - STD lt_0,0(r_ptr) ; rp[0] = lt[0] - STD ht_0,8(r_ptr) ; rp[1] = ht[1] - - ADD lt_1,m_1,lt_1 ; lt = lt+m - ADD,DC ht_1,%r0,ht_1 ; ht[1]++ - STD lt_1,16(r_ptr) ; rp[2] = lt[1] - STD ht_1,24(r_ptr) ; rp[3] = ht[1] - - LDO -2(num),num ; num = num - 2; - LDO 16(a_ptr),a_ptr ; ap += 2 - CMPIB,<= 2,num,bn_sqr_words_unroll2 - LDO 32(r_ptr),r_ptr ; rp += 4 - - CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done? - - ; - ; Top of loop aligned on 64-byte boundary - ; -bn_sqr_words_single_top - FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) - - XMPYU fht_0,flt_0,fm ; m - FSTD fm,-24(%sp) ; store m - - XMPYU flt_0,flt_0,lt_temp ; lt - FSTD lt_temp,-16(%sp) ; store lt - - XMPYU fht_0,fht_0,ht_temp ; ht - FSTD ht_temp,-8(%sp) ; store ht - - LDD -24(%sp),m_0 ; load m - AND m_0,high_mask,tmp_0 ; m & Mask - DEPD,Z m_0,30,31,m_0 ; m << 32+1 - LDD -16(%sp),lt_0 ; lt - - LDD -8(%sp),ht_0 ; ht - EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m&Mask >> 32-1 - ADD m_0,lt_0,lt_0 ; lt = lt+m - ADD,L ht_0,tmp_0,ht_0 ; ht += tmp_0 - ADD,DC ht_0,%r0,ht_0 ; ht++ - - STD lt_0,0(r_ptr) ; rp[0] = lt - STD ht_0,8(r_ptr) ; rp[1] = ht - -bn_sqr_words_exit - .EXIT - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - .PROCEND ;in=23,24,25,26,29;out=28; - - -;---------------------------------------------------------------------------- -; -;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) -; -; arg0 = rp -; arg1 = ap -; arg2 = bp -; arg3 = n - -t .reg %r22 -b .reg %r21 -l .reg %r20 - -bn_add_words - .proc - .entry - .callinfo - .EXPORT bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .align 64 - - CMPIB,>= 0,n,bn_add_words_exit - COPY %r0,%ret0 ; return 0 by default - - ; - ; If 2 or more numbers do the loop - ; - CMPIB,= 1,n,bn_add_words_single_top - NOP - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; -bn_add_words_unroll2 - LDD 0(a_ptr),t - LDD 0(b_ptr),b - ADD t,%ret0,t ; t = t+c; - ADD,DC %r0,%r0,%ret0 ; set c to carry - ADD t,b,l ; l = t + b[0] - ADD,DC %ret0,%r0,%ret0 ; c+= carry - STD l,0(r_ptr) - - LDD 8(a_ptr),t - LDD 8(b_ptr),b - ADD t,%ret0,t ; t = t+c; - ADD,DC %r0,%r0,%ret0 ; set c to carry - ADD t,b,l ; l = t + b[0] - ADD,DC %ret0,%r0,%ret0 ; c+= carry - STD l,8(r_ptr) - - LDO -2(n),n - LDO 16(a_ptr),a_ptr - LDO 16(b_ptr),b_ptr - - CMPIB,<= 2,n,bn_add_words_unroll2 - LDO 16(r_ptr),r_ptr - - CMPIB,=,N 0,n,bn_add_words_exit ; are we done? - -bn_add_words_single_top - LDD 0(a_ptr),t - LDD 0(b_ptr),b - - ADD t,%ret0,t ; t = t+c; - ADD,DC %r0,%r0,%ret0 ; set c to carry (could use CMPCLR??) - ADD t,b,l ; l = t + b[0] - ADD,DC %ret0,%r0,%ret0 ; c+= carry - STD l,0(r_ptr) - -bn_add_words_exit - .EXIT - BVE (%rp) - NOP - .PROCEND ;in=23,24,25,26,29;out=28; - -;---------------------------------------------------------------------------- -; -;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) -; -; arg0 = rp -; arg1 = ap -; arg2 = bp -; arg3 = n - -t1 .reg %r22 -t2 .reg %r21 -sub_tmp1 .reg %r20 -sub_tmp2 .reg %r19 - - -bn_sub_words - .proc - .callinfo - .EXPORT bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - CMPIB,>= 0,n,bn_sub_words_exit - COPY %r0,%ret0 ; return 0 by default - - ; - ; If 2 or more numbers do the loop - ; - CMPIB,= 1,n,bn_sub_words_single_top - NOP - - ; - ; This loop is unrolled 2 times (64-byte aligned as well) - ; -bn_sub_words_unroll2 - LDD 0(a_ptr),t1 - LDD 0(b_ptr),t2 - SUB t1,t2,sub_tmp1 ; t3 = t1-t2; - SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c; - - CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 - LDO 1(%r0),sub_tmp2 - - CMPCLR,*= t1,t2,%r0 - COPY sub_tmp2,%ret0 - STD sub_tmp1,0(r_ptr) - - LDD 8(a_ptr),t1 - LDD 8(b_ptr),t2 - SUB t1,t2,sub_tmp1 ; t3 = t1-t2; - SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c; - CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 - LDO 1(%r0),sub_tmp2 - - CMPCLR,*= t1,t2,%r0 - COPY sub_tmp2,%ret0 - STD sub_tmp1,8(r_ptr) - - LDO -2(n),n - LDO 16(a_ptr),a_ptr - LDO 16(b_ptr),b_ptr - - CMPIB,<= 2,n,bn_sub_words_unroll2 - LDO 16(r_ptr),r_ptr - - CMPIB,=,N 0,n,bn_sub_words_exit ; are we done? - -bn_sub_words_single_top - LDD 0(a_ptr),t1 - LDD 0(b_ptr),t2 - SUB t1,t2,sub_tmp1 ; t3 = t1-t2; - SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c; - CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 - LDO 1(%r0),sub_tmp2 - - CMPCLR,*= t1,t2,%r0 - COPY sub_tmp2,%ret0 - - STD sub_tmp1,0(r_ptr) - -bn_sub_words_exit - .EXIT - BVE (%rp) - NOP - .PROCEND ;in=23,24,25,26,29;out=28; - -;------------------------------------------------------------------------------ -; -; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d) -; -; arg0 = h -; arg1 = l -; arg2 = d -; -; This is mainly just modified assembly from the compiler, thus the -; lack of variable names. -; -;------------------------------------------------------------------------------ -bn_div_words - .proc - .callinfo CALLER,FRAME=272,ENTRY_GR=%r10,SAVE_RP,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .IMPORT BN_num_bits_word,CODE,NO_RELOCATION - .IMPORT __iob,DATA - .IMPORT fprintf,CODE,NO_RELOCATION - .IMPORT abort,CODE,NO_RELOCATION - .IMPORT $$div2U,MILLICODE - .entry - STD %r2,-16(%r30) - STD,MA %r3,352(%r30) - STD %r4,-344(%r30) - STD %r5,-336(%r30) - STD %r6,-328(%r30) - STD %r7,-320(%r30) - STD %r8,-312(%r30) - STD %r9,-304(%r30) - STD %r10,-296(%r30) - - STD %r27,-288(%r30) ; save gp - - COPY %r24,%r3 ; save d - COPY %r26,%r4 ; save h (high 64-bits) - LDO -1(%r0),%ret0 ; return -1 by default - - CMPB,*= %r0,%arg2,$D3 ; if (d == 0) - COPY %r25,%r5 ; save l (low 64-bits) - - LDO -48(%r30),%r29 ; create ap - .CALL ;in=26,29;out=28; - B,L BN_num_bits_word,%r2 - COPY %r3,%r26 - LDD -288(%r30),%r27 ; restore gp - LDI 64,%r21 - - CMPB,= %r21,%ret0,$00000012 ;if (i == 64) (forward) - COPY %ret0,%r24 ; i - MTSARCM %r24 - DEPDI,Z -1,%sar,1,%r29 - CMPB,*<<,N %r29,%r4,bn_div_err_case ; if (h > 1<<i) (forward) - -$00000012 - SUBI 64,%r24,%r31 ; i = 64 - i; - CMPCLR,*<< %r4,%r3,%r0 ; if (h >= d) - SUB %r4,%r3,%r4 ; h -= d - CMPB,= %r31,%r0,$0000001A ; if (i) - COPY %r0,%r10 ; ret = 0 - MTSARCM %r31 ; i to shift - DEPD,Z %r3,%sar,64,%r3 ; d <<= i; - SUBI 64,%r31,%r19 ; 64 - i; redundent - MTSAR %r19 ; (64 -i) to shift - SHRPD %r4,%r5,%sar,%r4 ; l>> (64-i) - MTSARCM %r31 ; i to shift - DEPD,Z %r5,%sar,64,%r5 ; l <<= i; - -$0000001A - DEPDI,Z -1,31,32,%r19 - EXTRD,U %r3,31,32,%r6 ; dh=(d&0xfff)>>32 - EXTRD,U %r3,63,32,%r8 ; dl = d&0xffffff - LDO 2(%r0),%r9 - STD %r3,-280(%r30) ; "d" to stack - -$0000001C - DEPDI,Z -1,63,32,%r29 ; - EXTRD,U %r4,31,32,%r31 ; h >> 32 - CMPB,*=,N %r31,%r6,$D2 ; if ((h>>32) != dh)(forward) div - COPY %r4,%r26 - EXTRD,U %r4,31,32,%r25 - COPY %r6,%r24 - .CALL ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL) - B,L $$div2U,%r2 - EXTRD,U %r6,31,32,%r23 - DEPD %r28,31,32,%r29 -$D2 - STD %r29,-272(%r30) ; q - AND %r5,%r19,%r24 ; t & 0xffffffff00000000; - EXTRD,U %r24,31,32,%r24 ; ??? - FLDD -272(%r30),%fr7 ; q - FLDD -280(%r30),%fr8 ; d - XMPYU %fr8L,%fr7L,%fr10 - FSTD %fr10,-256(%r30) - XMPYU %fr8L,%fr7R,%fr22 - FSTD %fr22,-264(%r30) - XMPYU %fr8R,%fr7L,%fr11 - XMPYU %fr8R,%fr7R,%fr23 - FSTD %fr11,-232(%r30) - FSTD %fr23,-240(%r30) - LDD -256(%r30),%r28 - DEPD,Z %r28,31,32,%r2 - LDD -264(%r30),%r20 - ADD,L %r20,%r2,%r31 - LDD -232(%r30),%r22 - DEPD,Z %r22,31,32,%r22 - LDD -240(%r30),%r21 - B $00000024 ; enter loop - ADD,L %r21,%r22,%r23 - -$0000002A - LDO -1(%r29),%r29 - SUB %r23,%r8,%r23 -$00000024 - SUB %r4,%r31,%r25 - AND %r25,%r19,%r26 - CMPB,*<>,N %r0,%r26,$00000046 ; (forward) - DEPD,Z %r25,31,32,%r20 - OR %r20,%r24,%r21 - CMPB,*<<,N %r21,%r23,$0000002A ;(backward) - SUB %r31,%r6,%r31 -;-------------Break path--------------------- - -$00000046 - DEPD,Z %r23,31,32,%r25 ;tl - EXTRD,U %r23,31,32,%r26 ;t - AND %r25,%r19,%r24 ;tl = (tl<<32)&0xfffffff0000000L - ADD,L %r31,%r26,%r31 ;th += t; - CMPCLR,*>>= %r5,%r24,%r0 ;if (l<tl) - LDO 1(%r31),%r31 ; th++; - CMPB,*<<=,N %r31,%r4,$00000036 ;if (n < th) (forward) - LDO -1(%r29),%r29 ;q--; - ADD,L %r4,%r3,%r4 ;h += d; -$00000036 - ADDIB,=,N -1,%r9,$D1 ;if (--count == 0) break (forward) - SUB %r5,%r24,%r28 ; l -= tl; - SUB %r4,%r31,%r24 ; h -= th; - SHRPD %r24,%r28,32,%r4 ; h = ((h<<32)|(l>>32)); - DEPD,Z %r29,31,32,%r10 ; ret = q<<32 - b $0000001C - DEPD,Z %r28,31,32,%r5 ; l = l << 32 - -$D1 - OR %r10,%r29,%r28 ; ret |= q -$D3 - LDD -368(%r30),%r2 -$D0 - LDD -296(%r30),%r10 - LDD -304(%r30),%r9 - LDD -312(%r30),%r8 - LDD -320(%r30),%r7 - LDD -328(%r30),%r6 - LDD -336(%r30),%r5 - LDD -344(%r30),%r4 - BVE (%r2) - .EXIT - LDD,MB -352(%r30),%r3 - -bn_div_err_case - MFIA %r6 - ADDIL L'bn_div_words-bn_div_err_case,%r6,%r1 - LDO R'bn_div_words-bn_div_err_case(%r1),%r6 - ADDIL LT'__iob,%r27,%r1 - LDD RT'__iob(%r1),%r26 - ADDIL L'C$4-bn_div_words,%r6,%r1 - LDO R'C$4-bn_div_words(%r1),%r25 - LDO 64(%r26),%r26 - .CALL ;in=24,25,26,29;out=28; - B,L fprintf,%r2 - LDO -48(%r30),%r29 - LDD -288(%r30),%r27 - .CALL ;in=29; - B,L abort,%r2 - LDO -48(%r30),%r29 - LDD -288(%r30),%r27 - B $D0 - LDD -368(%r30),%r2 - .PROCEND ;in=24,25,26,29;out=28; - -;---------------------------------------------------------------------------- -; -; Registers to hold 64-bit values to manipulate. The "L" part -; of the register corresponds to the upper 32-bits, while the "R" -; part corresponds to the lower 32-bits -; -; Note, that when using b6 and b7, the code must save these before -; using them because they are callee save registers -; -; -; Floating point registers to use to save values that -; are manipulated. These don't collide with ftemp1-6 and -; are all caller save registers -; -a0 .reg %fr22 -a0L .reg %fr22L -a0R .reg %fr22R - -a1 .reg %fr23 -a1L .reg %fr23L -a1R .reg %fr23R - -a2 .reg %fr24 -a2L .reg %fr24L -a2R .reg %fr24R - -a3 .reg %fr25 -a3L .reg %fr25L -a3R .reg %fr25R - -a4 .reg %fr26 -a4L .reg %fr26L -a4R .reg %fr26R - -a5 .reg %fr27 -a5L .reg %fr27L -a5R .reg %fr27R - -a6 .reg %fr28 -a6L .reg %fr28L -a6R .reg %fr28R - -a7 .reg %fr29 -a7L .reg %fr29L -a7R .reg %fr29R - -b0 .reg %fr30 -b0L .reg %fr30L -b0R .reg %fr30R - -b1 .reg %fr31 -b1L .reg %fr31L -b1R .reg %fr31R - -; -; Temporary floating point variables, these are all caller save -; registers -; -ftemp1 .reg %fr4 -ftemp2 .reg %fr5 -ftemp3 .reg %fr6 -ftemp4 .reg %fr7 - -; -; The B set of registers when used. -; - -b2 .reg %fr8 -b2L .reg %fr8L -b2R .reg %fr8R - -b3 .reg %fr9 -b3L .reg %fr9L -b3R .reg %fr9R - -b4 .reg %fr10 -b4L .reg %fr10L -b4R .reg %fr10R - -b5 .reg %fr11 -b5L .reg %fr11L -b5R .reg %fr11R - -b6 .reg %fr12 -b6L .reg %fr12L -b6R .reg %fr12R - -b7 .reg %fr13 -b7L .reg %fr13L -b7R .reg %fr13R - -c1 .reg %r21 ; only reg -temp1 .reg %r20 ; only reg -temp2 .reg %r19 ; only reg -temp3 .reg %r31 ; only reg - -m1 .reg %r28 -c2 .reg %r23 -high_one .reg %r1 -ht .reg %r6 -lt .reg %r5 -m .reg %r4 -c3 .reg %r3 - -SQR_ADD_C .macro A0L,A0R,C1,C2,C3 - XMPYU A0L,A0R,ftemp1 ; m - FSTD ftemp1,-24(%sp) ; store m - - XMPYU A0R,A0R,ftemp2 ; lt - FSTD ftemp2,-16(%sp) ; store lt - - XMPYU A0L,A0L,ftemp3 ; ht - FSTD ftemp3,-8(%sp) ; store ht - - LDD -24(%sp),m ; load m - AND m,high_mask,temp2 ; m & Mask - DEPD,Z m,30,31,temp3 ; m << 32+1 - LDD -16(%sp),lt ; lt - - LDD -8(%sp),ht ; ht - EXTRD,U temp2,32,33,temp1 ; temp1 = m&Mask >> 32-1 - ADD temp3,lt,lt ; lt = lt+m - ADD,L ht,temp1,ht ; ht += temp1 - ADD,DC ht,%r0,ht ; ht++ - - ADD C1,lt,C1 ; c1=c1+lt - ADD,DC ht,%r0,ht ; ht++ - - ADD C2,ht,C2 ; c2=c2+ht - ADD,DC C3,%r0,C3 ; c3++ -.endm - -SQR_ADD_C2 .macro A0L,A0R,A1L,A1R,C1,C2,C3 - XMPYU A0L,A1R,ftemp1 ; m1 = bl*ht - FSTD ftemp1,-16(%sp) ; - XMPYU A0R,A1L,ftemp2 ; m = bh*lt - FSTD ftemp2,-8(%sp) ; - XMPYU A0R,A1R,ftemp3 ; lt = bl*lt - FSTD ftemp3,-32(%sp) - XMPYU A0L,A1L,ftemp4 ; ht = bh*ht - FSTD ftemp4,-24(%sp) ; - - LDD -8(%sp),m ; r21 = m - LDD -16(%sp),m1 ; r19 = m1 - ADD,L m,m1,m ; m+m1 - - DEPD,Z m,31,32,temp3 ; (m+m1<<32) - LDD -24(%sp),ht ; r24 = ht - - CMPCLR,*>>= m,m1,%r0 ; if (m < m1) - ADD,L ht,high_one,ht ; ht+=high_one - - EXTRD,U m,31,32,temp1 ; m >> 32 - LDD -32(%sp),lt ; lt - ADD,L ht,temp1,ht ; ht+= m>>32 - ADD lt,temp3,lt ; lt = lt+m1 - ADD,DC ht,%r0,ht ; ht++ - - ADD ht,ht,ht ; ht=ht+ht; - ADD,DC C3,%r0,C3 ; add in carry (c3++) - - ADD lt,lt,lt ; lt=lt+lt; - ADD,DC ht,%r0,ht ; add in carry (ht++) - - ADD C1,lt,C1 ; c1=c1+lt - ADD,DC,*NUV ht,%r0,ht ; add in carry (ht++) - LDO 1(C3),C3 ; bump c3 if overflow,nullify otherwise - - ADD C2,ht,C2 ; c2 = c2 + ht - ADD,DC C3,%r0,C3 ; add in carry (c3++) -.endm - -; -;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) -; arg0 = r_ptr -; arg1 = a_ptr -; - -bn_sqr_comba8 - .PROC - .CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .ENTRY - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - FLDD 32(a_ptr),a4 - FLDD 40(a_ptr),a5 - FLDD 48(a_ptr),a6 - FLDD 56(a_ptr),a7 - - SQR_ADD_C a0L,a0R,c1,c2,c3 - STD c1,0(r_ptr) ; r[0] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 - STD c2,8(r_ptr) ; r[1] = c2; - COPY %r0,c2 - - SQR_ADD_C a1L,a1R,c3,c1,c2 - SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 - STD c3,16(r_ptr) ; r[2] = c3; - COPY %r0,c3 - - SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 - SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 - STD c1,24(r_ptr) ; r[3] = c1; - COPY %r0,c1 - - SQR_ADD_C a2L,a2R,c2,c3,c1 - SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 - SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1 - STD c2,32(r_ptr) ; r[4] = c2; - COPY %r0,c2 - - SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2 - SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2 - SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 - STD c3,40(r_ptr) ; r[5] = c3; - COPY %r0,c3 - - SQR_ADD_C a3L,a3R,c1,c2,c3 - SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3 - SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3 - SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3 - STD c1,48(r_ptr) ; r[6] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1 - SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1 - SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1 - SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1 - STD c2,56(r_ptr) ; r[7] = c2; - COPY %r0,c2 - - SQR_ADD_C a4L,a4R,c3,c1,c2 - SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2 - SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2 - SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2 - STD c3,64(r_ptr) ; r[8] = c3; - COPY %r0,c3 - - SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3 - SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3 - SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3 - STD c1,72(r_ptr) ; r[9] = c1; - COPY %r0,c1 - - SQR_ADD_C a5L,a5R,c2,c3,c1 - SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1 - SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1 - STD c2,80(r_ptr) ; r[10] = c2; - COPY %r0,c2 - - SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2 - SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2 - STD c3,88(r_ptr) ; r[11] = c3; - COPY %r0,c3 - - SQR_ADD_C a6L,a6R,c1,c2,c3 - SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3 - STD c1,96(r_ptr) ; r[12] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1 - STD c2,104(r_ptr) ; r[13] = c2; - COPY %r0,c2 - - SQR_ADD_C a7L,a7R,c3,c1,c2 - STD c3, 112(r_ptr) ; r[14] = c3 - STD c1, 120(r_ptr) ; r[15] = c1 - - .EXIT - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - -;----------------------------------------------------------------------------- -; -;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) -; arg0 = r_ptr -; arg1 = a_ptr -; - -bn_sqr_comba4 - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - FLDD 32(a_ptr),a4 - FLDD 40(a_ptr),a5 - FLDD 48(a_ptr),a6 - FLDD 56(a_ptr),a7 - - SQR_ADD_C a0L,a0R,c1,c2,c3 - - STD c1,0(r_ptr) ; r[0] = c1; - COPY %r0,c1 - - SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 - - STD c2,8(r_ptr) ; r[1] = c2; - COPY %r0,c2 - - SQR_ADD_C a1L,a1R,c3,c1,c2 - SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 - - STD c3,16(r_ptr) ; r[2] = c3; - COPY %r0,c3 - - SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 - SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 - - STD c1,24(r_ptr) ; r[3] = c1; - COPY %r0,c1 - - SQR_ADD_C a2L,a2R,c2,c3,c1 - SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 - - STD c2,32(r_ptr) ; r[4] = c2; - COPY %r0,c2 - - SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 - STD c3,40(r_ptr) ; r[5] = c3; - COPY %r0,c3 - - SQR_ADD_C a3L,a3R,c1,c2,c3 - STD c1,48(r_ptr) ; r[6] = c1; - STD c2,56(r_ptr) ; r[7] = c2; - - .EXIT - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - - -;--------------------------------------------------------------------------- - -MUL_ADD_C .macro A0L,A0R,B0L,B0R,C1,C2,C3 - XMPYU A0L,B0R,ftemp1 ; m1 = bl*ht - FSTD ftemp1,-16(%sp) ; - XMPYU A0R,B0L,ftemp2 ; m = bh*lt - FSTD ftemp2,-8(%sp) ; - XMPYU A0R,B0R,ftemp3 ; lt = bl*lt - FSTD ftemp3,-32(%sp) - XMPYU A0L,B0L,ftemp4 ; ht = bh*ht - FSTD ftemp4,-24(%sp) ; - - LDD -8(%sp),m ; r21 = m - LDD -16(%sp),m1 ; r19 = m1 - ADD,L m,m1,m ; m+m1 - - DEPD,Z m,31,32,temp3 ; (m+m1<<32) - LDD -24(%sp),ht ; r24 = ht - - CMPCLR,*>>= m,m1,%r0 ; if (m < m1) - ADD,L ht,high_one,ht ; ht+=high_one - - EXTRD,U m,31,32,temp1 ; m >> 32 - LDD -32(%sp),lt ; lt - ADD,L ht,temp1,ht ; ht+= m>>32 - ADD lt,temp3,lt ; lt = lt+m1 - ADD,DC ht,%r0,ht ; ht++ - - ADD C1,lt,C1 ; c1=c1+lt - ADD,DC ht,%r0,ht ; bump c3 if overflow,nullify otherwise - - ADD C2,ht,C2 ; c2 = c2 + ht - ADD,DC C3,%r0,C3 ; add in carry (c3++) -.endm - - -; -;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) -; arg0 = r_ptr -; arg1 = a_ptr -; arg2 = b_ptr -; - -bn_mul_comba8 - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - FSTD %fr12,32(%sp) ; save r6 - FSTD %fr13,40(%sp) ; save r7 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - FLDD 32(a_ptr),a4 - FLDD 40(a_ptr),a5 - FLDD 48(a_ptr),a6 - FLDD 56(a_ptr),a7 - - FLDD 0(b_ptr),b0 - FLDD 8(b_ptr),b1 - FLDD 16(b_ptr),b2 - FLDD 24(b_ptr),b3 - FLDD 32(b_ptr),b4 - FLDD 40(b_ptr),b5 - FLDD 48(b_ptr),b6 - FLDD 56(b_ptr),b7 - - MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 - STD c1,0(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 - STD c2,8(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 - STD c3,16(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 - MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 - STD c1,24(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1 - MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 - MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1 - STD c2,32(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2 - MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 - MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 - MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2 - STD c3,40(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3 - MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3 - MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3 - MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3 - MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3 - STD c1,48(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1 - MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1 - MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1 - MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1 - MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1 - MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1 - STD c2,56(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2 - MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2 - MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2 - MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2 - MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2 - STD c3,64(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3 - MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3 - MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3 - MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3 - STD c1,72(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1 - MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1 - MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1 - MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1 - MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1 - STD c2,80(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2 - MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2 - MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2 - MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2 - STD c3,88(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3 - MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3 - MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3 - STD c1,96(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1 - MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1 - STD c2,104(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2 - STD c3,112(r_ptr) - STD c1,120(r_ptr) - - .EXIT - FLDD -88(%sp),%fr13 - FLDD -96(%sp),%fr12 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - -;----------------------------------------------------------------------------- -; -;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) -; arg0 = r_ptr -; arg1 = a_ptr -; arg2 = b_ptr -; - -bn_mul_comba4 - .proc - .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE - .EXPORT bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN - .entry - .align 64 - - STD %r3,0(%sp) ; save r3 - STD %r4,8(%sp) ; save r4 - STD %r5,16(%sp) ; save r5 - STD %r6,24(%sp) ; save r6 - FSTD %fr12,32(%sp) ; save r6 - FSTD %fr13,40(%sp) ; save r7 - - ; - ; Zero out carries - ; - COPY %r0,c1 - COPY %r0,c2 - COPY %r0,c3 - - LDO 128(%sp),%sp ; bump stack - DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 - - ; - ; Load up all of the values we are going to use - ; - FLDD 0(a_ptr),a0 - FLDD 8(a_ptr),a1 - FLDD 16(a_ptr),a2 - FLDD 24(a_ptr),a3 - - FLDD 0(b_ptr),b0 - FLDD 8(b_ptr),b1 - FLDD 16(b_ptr),b2 - FLDD 24(b_ptr),b3 - - MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 - STD c1,0(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 - STD c2,8(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 - MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 - MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 - STD c3,16(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 - MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 - MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 - MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 - STD c1,24(r_ptr) - COPY %r0,c1 - - MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 - MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 - MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 - STD c2,32(r_ptr) - COPY %r0,c2 - - MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 - MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 - STD c3,40(r_ptr) - COPY %r0,c3 - - MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 - STD c1,48(r_ptr) - STD c2,56(r_ptr) - - .EXIT - FLDD -88(%sp),%fr13 - FLDD -96(%sp),%fr12 - LDD -104(%sp),%r6 ; restore r6 - LDD -112(%sp),%r5 ; restore r5 - LDD -120(%sp),%r4 ; restore r4 - BVE (%rp) - LDD,MB -128(%sp),%r3 - - .PROCEND - - - .SPACE $TEXT$ - .SUBSPA $CODE$ - .SPACE $PRIVATE$,SORT=16 - .IMPORT $global$,DATA - .SPACE $TEXT$ - .SUBSPA $CODE$ - .SUBSPA $LIT$,ACCESS=0x2c -C$4 - .ALIGN 8 - .STRINGZ "Division would overflow (%d)\n" - .END diff --git a/deps/openssl/openssl/crypto/bn/asm/parisc-mont.pl b/deps/openssl/openssl/crypto/bn/asm/parisc-mont.pl index cd9926a25f..aa9f626ed2 100644 --- a/deps/openssl/openssl/crypto/bn/asm/parisc-mont.pl +++ b/deps/openssl/openssl/crypto/bn/asm/parisc-mont.pl @@ -8,7 +8,7 @@ # ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -21,7 +21,7 @@ # optimal in respect to instruction set capabilities. Fair comparison # with vendor compiler is problematic, because OpenSSL doesn't define # BN_LLONG [presumably] for historical reasons, which drives compiler -# toward 4 times 16x16=32-bit multiplicatons [plus complementary +# toward 4 times 16x16=32-bit multiplications [plus complementary # shifts and additions] instead. This means that you should observe # several times improvement over code generated by vendor compiler # for PA-RISC 1.1, but the "baseline" is far from optimal. The actual @@ -864,7 +864,7 @@ L\$copy_pa11 comiclr,= 0,$hi1,%r0 copy $ti0,$hi0 addib,<> 4,$idx,L\$copy_pa11 - stws,ma $hi0,4($rp) + stws,ma $hi0,4($rp) nop ; alignment L\$done @@ -984,6 +984,11 @@ sub assemble { ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args"; } +if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` + =~ /GNU assembler/) { + $gnuas = 1; +} + foreach (split("\n",$code)) { s/\`([^\`]*)\`/eval $1/ge; # flip word order in 64-bit mode... @@ -991,7 +996,10 @@ foreach (split("\n",$code)) { # assemble 2.0 instructions in 32-bit mode... s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($BN_SZ==4); - s/\bbv\b/bve/gm if ($SIZE_T==8); + s/(\.LEVEL\s+2\.0)W/$1w/ if ($gnuas && $SIZE_T==8); + s/\.SPACE\s+\$TEXT\$/.text/ if ($gnuas && $SIZE_T==8); + s/\.SUBSPA.*// if ($gnuas && $SIZE_T==8); + s/\bbv\b/bve/ if ($SIZE_T==8); print $_,"\n"; } diff --git a/deps/openssl/openssl/crypto/bn/asm/ppc-mont.pl b/deps/openssl/openssl/crypto/bn/asm/ppc-mont.pl index 9d14a12156..ec7e019a43 100644 --- a/deps/openssl/openssl/crypto/bn/asm/ppc-mont.pl +++ b/deps/openssl/openssl/crypto/bn/asm/ppc-mont.pl @@ -8,7 +8,7 @@ # ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -26,11 +26,21 @@ # So far RSA *sign* performance improvement over pre-bn_mul_mont asm # for 64-bit application running on PPC970/G5 is: # -# 512-bit +65% +# 512-bit +65% # 1024-bit +35% # 2048-bit +18% # 4096-bit +4% +# September 2016 +# +# Add multiplication procedure operating on lengths divisible by 4 +# and squaring procedure operating on lengths divisible by 8. Length +# is expressed in number of limbs. RSA private key operations are +# ~35-50% faster (more for longer keys) on contemporary high-end POWER +# processors in 64-bit builds, [mysteriously enough] more in 32-bit +# builds. On low-end 32-bit processors performance improvement turned +# to be marginal... + $flavour = shift; if ($flavour =~ /32/) { @@ -49,7 +59,8 @@ if ($flavour =~ /32/) { $UMULL= "mullw"; # unsigned multiply low $UMULH= "mulhwu"; # unsigned multiply high $UCMP= "cmplw"; # unsigned compare - $SHRI= "srwi"; # unsigned shift right by immediate + $SHRI= "srwi"; # unsigned shift right by immediate + $SHLI= "slwi"; # unsigned shift left by immediate $PUSH= $ST; $POP= $LD; } elsif ($flavour =~ /64/) { @@ -69,7 +80,8 @@ if ($flavour =~ /32/) { $UMULL= "mulld"; # unsigned multiply low $UMULH= "mulhdu"; # unsigned multiply high $UCMP= "cmpld"; # unsigned compare - $SHRI= "srdi"; # unsigned shift right by immediate + $SHRI= "srdi"; # unsigned shift right by immediate + $SHLI= "sldi"; # unsigned shift left by immediate $PUSH= $ST; $POP= $LD; } else { die "nonsense $flavour"; } @@ -86,43 +98,44 @@ open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; $sp="r1"; $toc="r2"; -$rp="r3"; $ovf="r3"; +$rp="r3"; $ap="r4"; $bp="r5"; $np="r6"; $n0="r7"; $num="r8"; -$rp="r9"; # $rp is reassigned -$aj="r10"; -$nj="r11"; -$tj="r12"; + +{ +my $ovf=$rp; +my $rp="r9"; # $rp is reassigned +my $aj="r10"; +my $nj="r11"; +my $tj="r12"; # non-volatile registers -$i="r20"; -$j="r21"; -$tp="r22"; -$m0="r23"; -$m1="r24"; -$lo0="r25"; -$hi0="r26"; -$lo1="r27"; -$hi1="r28"; -$alo="r29"; -$ahi="r30"; -$nlo="r31"; +my $i="r20"; +my $j="r21"; +my $tp="r22"; +my $m0="r23"; +my $m1="r24"; +my $lo0="r25"; +my $hi0="r26"; +my $lo1="r27"; +my $hi1="r28"; +my $alo="r29"; +my $ahi="r30"; +my $nlo="r31"; # -$nhi="r0"; +my $nhi="r0"; $code=<<___; .machine "any" .text .globl .bn_mul_mont_int -.align 4 +.align 5 .bn_mul_mont_int: - cmpwi $num,4 mr $rp,r3 ; $rp is reassigned li r3,0 - bltlr ___ $code.=<<___ if ($BNSZ==4); cmpwi $num,32 ; longer key performance is not better @@ -334,7 +347,1641 @@ Lcopy: ; conditional copy .byte 0,12,4,0,0x80,12,6,0 .long 0 .size .bn_mul_mont_int,.-.bn_mul_mont_int +___ +} +if (1) { +my ($a0,$a1,$a2,$a3, + $t0,$t1,$t2,$t3, + $m0,$m1,$m2,$m3, + $acc0,$acc1,$acc2,$acc3,$acc4, + $bi,$mi,$tp,$ap_end,$cnt) = map("r$_",(9..12,14..31)); +my ($carry,$zero) = ($rp,"r0"); + +# sp----------->+-------------------------------+ +# | saved sp | +# +-------------------------------+ +# . . +# +8*size_t +-------------------------------+ +# | 4 "n0*t0" | +# . . +# . . +# +12*size_t +-------------------------------+ +# | size_t tmp[num] | +# . . +# . . +# . . +# +-------------------------------+ +# | topmost carry | +# . . +# -18*size_t +-------------------------------+ +# | 18 saved gpr, r14-r31 | +# . . +# . . +# +-------------------------------+ +$code.=<<___; +.globl .bn_mul4x_mont_int +.align 5 +.bn_mul4x_mont_int: + andi. r0,$num,7 + bne .Lmul4x_do + $UCMP $ap,$bp + bne .Lmul4x_do + b .Lsqr8x_do +.Lmul4x_do: + slwi $num,$num,`log($SIZE_T)/log(2)` + mr $a0,$sp + li $a1,-32*$SIZE_T + sub $a1,$a1,$num + $STUX $sp,$sp,$a1 # alloca + + $PUSH r14,-$SIZE_T*18($a0) + $PUSH r15,-$SIZE_T*17($a0) + $PUSH r16,-$SIZE_T*16($a0) + $PUSH r17,-$SIZE_T*15($a0) + $PUSH r18,-$SIZE_T*14($a0) + $PUSH r19,-$SIZE_T*13($a0) + $PUSH r20,-$SIZE_T*12($a0) + $PUSH r21,-$SIZE_T*11($a0) + $PUSH r22,-$SIZE_T*10($a0) + $PUSH r23,-$SIZE_T*9($a0) + $PUSH r24,-$SIZE_T*8($a0) + $PUSH r25,-$SIZE_T*7($a0) + $PUSH r26,-$SIZE_T*6($a0) + $PUSH r27,-$SIZE_T*5($a0) + $PUSH r28,-$SIZE_T*4($a0) + $PUSH r29,-$SIZE_T*3($a0) + $PUSH r30,-$SIZE_T*2($a0) + $PUSH r31,-$SIZE_T*1($a0) + + subi $ap,$ap,$SIZE_T # bias by -1 + subi $np,$np,$SIZE_T # bias by -1 + subi $rp,$rp,$SIZE_T # bias by -1 + $LD $n0,0($n0) # *n0 + + add $t0,$bp,$num + add $ap_end,$ap,$num + subi $t0,$t0,$SIZE_T*4 # &b[num-4] + + $LD $bi,$SIZE_T*0($bp) # b[0] + li $acc0,0 + $LD $a0,$SIZE_T*1($ap) # a[0..3] + li $acc1,0 + $LD $a1,$SIZE_T*2($ap) + li $acc2,0 + $LD $a2,$SIZE_T*3($ap) + li $acc3,0 + $LDU $a3,$SIZE_T*4($ap) + $LD $m0,$SIZE_T*1($np) # n[0..3] + $LD $m1,$SIZE_T*2($np) + $LD $m2,$SIZE_T*3($np) + $LDU $m3,$SIZE_T*4($np) + + $PUSH $rp,$SIZE_T*6($sp) # offload rp and &b[num-4] + $PUSH $t0,$SIZE_T*7($sp) + li $carry,0 + addic $tp,$sp,$SIZE_T*7 # &t[-1], clear carry bit + li $cnt,0 + li $zero,0 + b .Loop_mul4x_1st_reduction + +.align 5 +.Loop_mul4x_1st_reduction: + $UMULL $t0,$a0,$bi # lo(a[0..3]*b[0]) + addze $carry,$carry # modulo-scheduled + $UMULL $t1,$a1,$bi + addi $cnt,$cnt,$SIZE_T + $UMULL $t2,$a2,$bi + andi. $cnt,$cnt,$SIZE_T*4-1 + $UMULL $t3,$a3,$bi + addc $acc0,$acc0,$t0 + $UMULH $t0,$a0,$bi # hi(a[0..3]*b[0]) + adde $acc1,$acc1,$t1 + $UMULH $t1,$a1,$bi + adde $acc2,$acc2,$t2 + $UMULL $mi,$acc0,$n0 # t[0]*n0 + adde $acc3,$acc3,$t3 + $UMULH $t2,$a2,$bi + addze $acc4,$zero + $UMULH $t3,$a3,$bi + $LDX $bi,$bp,$cnt # next b[i] (or b[0]) + addc $acc1,$acc1,$t0 + # (*) mul $t0,$m0,$mi # lo(n[0..3]*t[0]*n0) + $STU $mi,$SIZE_T($tp) # put aside t[0]*n0 for tail processing + adde $acc2,$acc2,$t1 + $UMULL $t1,$m1,$mi + adde $acc3,$acc3,$t2 + $UMULL $t2,$m2,$mi + adde $acc4,$acc4,$t3 # can't overflow + $UMULL $t3,$m3,$mi + # (*) addc $acc0,$acc0,$t0 + # (*) As for removal of first multiplication and addition + # instructions. The outcome of first addition is + # guaranteed to be zero, which leaves two computationally + # significant outcomes: it either carries or not. Then + # question is when does it carry? Is there alternative + # way to deduce it? If you follow operations, you can + # observe that condition for carry is quite simple: + # $acc0 being non-zero. So that carry can be calculated + # by adding -1 to $acc0. That's what next instruction does. + addic $acc0,$acc0,-1 # (*), discarded + $UMULH $t0,$m0,$mi # hi(n[0..3]*t[0]*n0) + adde $acc0,$acc1,$t1 + $UMULH $t1,$m1,$mi + adde $acc1,$acc2,$t2 + $UMULH $t2,$m2,$mi + adde $acc2,$acc3,$t3 + $UMULH $t3,$m3,$mi + adde $acc3,$acc4,$carry + addze $carry,$zero + addc $acc0,$acc0,$t0 + adde $acc1,$acc1,$t1 + adde $acc2,$acc2,$t2 + adde $acc3,$acc3,$t3 + #addze $carry,$carry + bne .Loop_mul4x_1st_reduction + + $UCMP $ap_end,$ap + beq .Lmul4x4_post_condition + + $LD $a0,$SIZE_T*1($ap) # a[4..7] + $LD $a1,$SIZE_T*2($ap) + $LD $a2,$SIZE_T*3($ap) + $LDU $a3,$SIZE_T*4($ap) + $LD $mi,$SIZE_T*8($sp) # a[0]*n0 + $LD $m0,$SIZE_T*1($np) # n[4..7] + $LD $m1,$SIZE_T*2($np) + $LD $m2,$SIZE_T*3($np) + $LDU $m3,$SIZE_T*4($np) + b .Loop_mul4x_1st_tail + +.align 5 +.Loop_mul4x_1st_tail: + $UMULL $t0,$a0,$bi # lo(a[4..7]*b[i]) + addze $carry,$carry # modulo-scheduled + $UMULL $t1,$a1,$bi + addi $cnt,$cnt,$SIZE_T + $UMULL $t2,$a2,$bi + andi. $cnt,$cnt,$SIZE_T*4-1 + $UMULL $t3,$a3,$bi + addc $acc0,$acc0,$t0 + $UMULH $t0,$a0,$bi # hi(a[4..7]*b[i]) + adde $acc1,$acc1,$t1 + $UMULH $t1,$a1,$bi + adde $acc2,$acc2,$t2 + $UMULH $t2,$a2,$bi + adde $acc3,$acc3,$t3 + $UMULH $t3,$a3,$bi + addze $acc4,$zero + $LDX $bi,$bp,$cnt # next b[i] (or b[0]) + addc $acc1,$acc1,$t0 + $UMULL $t0,$m0,$mi # lo(n[4..7]*a[0]*n0) + adde $acc2,$acc2,$t1 + $UMULL $t1,$m1,$mi + adde $acc3,$acc3,$t2 + $UMULL $t2,$m2,$mi + adde $acc4,$acc4,$t3 # can't overflow + $UMULL $t3,$m3,$mi + addc $acc0,$acc0,$t0 + $UMULH $t0,$m0,$mi # hi(n[4..7]*a[0]*n0) + adde $acc1,$acc1,$t1 + $UMULH $t1,$m1,$mi + adde $acc2,$acc2,$t2 + $UMULH $t2,$m2,$mi + adde $acc3,$acc3,$t3 + adde $acc4,$acc4,$carry + $UMULH $t3,$m3,$mi + addze $carry,$zero + addi $mi,$sp,$SIZE_T*8 + $LDX $mi,$mi,$cnt # next t[0]*n0 + $STU $acc0,$SIZE_T($tp) # word of result + addc $acc0,$acc1,$t0 + adde $acc1,$acc2,$t1 + adde $acc2,$acc3,$t2 + adde $acc3,$acc4,$t3 + #addze $carry,$carry + bne .Loop_mul4x_1st_tail + + sub $t1,$ap_end,$num # rewinded $ap + $UCMP $ap_end,$ap # done yet? + beq .Lmul4x_proceed + + $LD $a0,$SIZE_T*1($ap) + $LD $a1,$SIZE_T*2($ap) + $LD $a2,$SIZE_T*3($ap) + $LDU $a3,$SIZE_T*4($ap) + $LD $m0,$SIZE_T*1($np) + $LD $m1,$SIZE_T*2($np) + $LD $m2,$SIZE_T*3($np) + $LDU $m3,$SIZE_T*4($np) + b .Loop_mul4x_1st_tail + +.align 5 +.Lmul4x_proceed: + $LDU $bi,$SIZE_T*4($bp) # *++b + addze $carry,$carry # topmost carry + $LD $a0,$SIZE_T*1($t1) + $LD $a1,$SIZE_T*2($t1) + $LD $a2,$SIZE_T*3($t1) + $LD $a3,$SIZE_T*4($t1) + addi $ap,$t1,$SIZE_T*4 + sub $np,$np,$num # rewind np + + $ST $acc0,$SIZE_T*1($tp) # result + $ST $acc1,$SIZE_T*2($tp) + $ST $acc2,$SIZE_T*3($tp) + $ST $acc3,$SIZE_T*4($tp) + $ST $carry,$SIZE_T*5($tp) # save topmost carry + $LD $acc0,$SIZE_T*12($sp) # t[0..3] + $LD $acc1,$SIZE_T*13($sp) + $LD $acc2,$SIZE_T*14($sp) + $LD $acc3,$SIZE_T*15($sp) + + $LD $m0,$SIZE_T*1($np) # n[0..3] + $LD $m1,$SIZE_T*2($np) + $LD $m2,$SIZE_T*3($np) + $LDU $m3,$SIZE_T*4($np) + addic $tp,$sp,$SIZE_T*7 # &t[-1], clear carry bit + li $carry,0 + b .Loop_mul4x_reduction + +.align 5 +.Loop_mul4x_reduction: + $UMULL $t0,$a0,$bi # lo(a[0..3]*b[4]) + addze $carry,$carry # modulo-scheduled + $UMULL $t1,$a1,$bi + addi $cnt,$cnt,$SIZE_T + $UMULL $t2,$a2,$bi + andi. $cnt,$cnt,$SIZE_T*4-1 + $UMULL $t3,$a3,$bi + addc $acc0,$acc0,$t0 + $UMULH $t0,$a0,$bi # hi(a[0..3]*b[4]) + adde $acc1,$acc1,$t1 + $UMULH $t1,$a1,$bi + adde $acc2,$acc2,$t2 + $UMULL $mi,$acc0,$n0 # t[0]*n0 + adde $acc3,$acc3,$t3 + $UMULH $t2,$a2,$bi + addze $acc4,$zero + $UMULH $t3,$a3,$bi + $LDX $bi,$bp,$cnt # next b[i] + addc $acc1,$acc1,$t0 + # (*) mul $t0,$m0,$mi + $STU $mi,$SIZE_T($tp) # put aside t[0]*n0 for tail processing + adde $acc2,$acc2,$t1 + $UMULL $t1,$m1,$mi # lo(n[0..3]*t[0]*n0 + adde $acc3,$acc3,$t2 + $UMULL $t2,$m2,$mi + adde $acc4,$acc4,$t3 # can't overflow + $UMULL $t3,$m3,$mi + # (*) addc $acc0,$acc0,$t0 + addic $acc0,$acc0,-1 # (*), discarded + $UMULH $t0,$m0,$mi # hi(n[0..3]*t[0]*n0 + adde $acc0,$acc1,$t1 + $UMULH $t1,$m1,$mi + adde $acc1,$acc2,$t2 + $UMULH $t2,$m2,$mi + adde $acc2,$acc3,$t3 + $UMULH $t3,$m3,$mi + adde $acc3,$acc4,$carry + addze $carry,$zero + addc $acc0,$acc0,$t0 + adde $acc1,$acc1,$t1 + adde $acc2,$acc2,$t2 + adde $acc3,$acc3,$t3 + #addze $carry,$carry + bne .Loop_mul4x_reduction + + $LD $t0,$SIZE_T*5($tp) # t[4..7] + addze $carry,$carry + $LD $t1,$SIZE_T*6($tp) + $LD $t2,$SIZE_T*7($tp) + $LD $t3,$SIZE_T*8($tp) + $LD $a0,$SIZE_T*1($ap) # a[4..7] + $LD $a1,$SIZE_T*2($ap) + $LD $a2,$SIZE_T*3($ap) + $LDU $a3,$SIZE_T*4($ap) + addc $acc0,$acc0,$t0 + adde $acc1,$acc1,$t1 + adde $acc2,$acc2,$t2 + adde $acc3,$acc3,$t3 + #addze $carry,$carry + + $LD $mi,$SIZE_T*8($sp) # t[0]*n0 + $LD $m0,$SIZE_T*1($np) # n[4..7] + $LD $m1,$SIZE_T*2($np) + $LD $m2,$SIZE_T*3($np) + $LDU $m3,$SIZE_T*4($np) + b .Loop_mul4x_tail + +.align 5 +.Loop_mul4x_tail: + $UMULL $t0,$a0,$bi # lo(a[4..7]*b[4]) + addze $carry,$carry # modulo-scheduled + $UMULL $t1,$a1,$bi + addi $cnt,$cnt,$SIZE_T + $UMULL $t2,$a2,$bi + andi. $cnt,$cnt,$SIZE_T*4-1 + $UMULL $t3,$a3,$bi + addc $acc0,$acc0,$t0 + $UMULH $t0,$a0,$bi # hi(a[4..7]*b[4]) + adde $acc1,$acc1,$t1 + $UMULH $t1,$a1,$bi + adde $acc2,$acc2,$t2 + $UMULH $t2,$a2,$bi + adde $acc3,$acc3,$t3 + $UMULH $t3,$a3,$bi + addze $acc4,$zero + $LDX $bi,$bp,$cnt # next b[i] + addc $acc1,$acc1,$t0 + $UMULL $t0,$m0,$mi # lo(n[4..7]*t[0]*n0) + adde $acc2,$acc2,$t1 + $UMULL $t1,$m1,$mi + adde $acc3,$acc3,$t2 + $UMULL $t2,$m2,$mi + adde $acc4,$acc4,$t3 # can't overflow + $UMULL $t3,$m3,$mi + addc $acc0,$acc0,$t0 + $UMULH $t0,$m0,$mi # hi(n[4..7]*t[0]*n0) + adde $acc1,$acc1,$t1 + $UMULH $t1,$m1,$mi + adde $acc2,$acc2,$t2 + $UMULH $t2,$m2,$mi + adde $acc3,$acc3,$t3 + $UMULH $t3,$m3,$mi + adde $acc4,$acc4,$carry + addi $mi,$sp,$SIZE_T*8 + $LDX $mi,$mi,$cnt # next a[0]*n0 + addze $carry,$zero + $STU $acc0,$SIZE_T($tp) # word of result + addc $acc0,$acc1,$t0 + adde $acc1,$acc2,$t1 + adde $acc2,$acc3,$t2 + adde $acc3,$acc4,$t3 + #addze $carry,$carry + bne .Loop_mul4x_tail + + $LD $t0,$SIZE_T*5($tp) # next t[i] or topmost carry + sub $t1,$np,$num # rewinded np? + addze $carry,$carry + $UCMP $ap_end,$ap # done yet? + beq .Loop_mul4x_break + + $LD $t1,$SIZE_T*6($tp) + $LD $t2,$SIZE_T*7($tp) + $LD $t3,$SIZE_T*8($tp) + $LD $a0,$SIZE_T*1($ap) + $LD $a1,$SIZE_T*2($ap) + $LD $a2,$SIZE_T*3($ap) + $LDU $a3,$SIZE_T*4($ap) + addc $acc0,$acc0,$t0 + adde $acc1,$acc1,$t1 + adde $acc2,$acc2,$t2 + adde $acc3,$acc3,$t3 + #addze $carry,$carry + + $LD $m0,$SIZE_T*1($np) # n[4..7] + $LD $m1,$SIZE_T*2($np) + $LD $m2,$SIZE_T*3($np) + $LDU $m3,$SIZE_T*4($np) + b .Loop_mul4x_tail + +.align 5 +.Loop_mul4x_break: + $POP $t2,$SIZE_T*6($sp) # pull rp and &b[num-4] + $POP $t3,$SIZE_T*7($sp) + addc $a0,$acc0,$t0 # accumulate topmost carry + $LD $acc0,$SIZE_T*12($sp) # t[0..3] + addze $a1,$acc1 + $LD $acc1,$SIZE_T*13($sp) + addze $a2,$acc2 + $LD $acc2,$SIZE_T*14($sp) + addze $a3,$acc3 + $LD $acc3,$SIZE_T*15($sp) + addze $carry,$carry # topmost carry + $ST $a0,$SIZE_T*1($tp) # result + sub $ap,$ap_end,$num # rewind ap + $ST $a1,$SIZE_T*2($tp) + $ST $a2,$SIZE_T*3($tp) + $ST $a3,$SIZE_T*4($tp) + $ST $carry,$SIZE_T*5($tp) # store topmost carry + + $LD $m0,$SIZE_T*1($t1) # n[0..3] + $LD $m1,$SIZE_T*2($t1) + $LD $m2,$SIZE_T*3($t1) + $LD $m3,$SIZE_T*4($t1) + addi $np,$t1,$SIZE_T*4 + $UCMP $bp,$t3 # done yet? + beq .Lmul4x_post + + $LDU $bi,$SIZE_T*4($bp) + $LD $a0,$SIZE_T*1($ap) # a[0..3] + $LD $a1,$SIZE_T*2($ap) + $LD $a2,$SIZE_T*3($ap) + $LDU $a3,$SIZE_T*4($ap) + li $carry,0 + addic $tp,$sp,$SIZE_T*7 # &t[-1], clear carry bit + b .Loop_mul4x_reduction + +.align 5 +.Lmul4x_post: + # Final step. We see if result is larger than modulus, and + # if it is, subtract the modulus. But comparison implies + # subtraction. So we subtract modulus, see if it borrowed, + # and conditionally copy original value. + srwi $cnt,$num,`log($SIZE_T)/log(2)+2` + mr $bp,$t2 # &rp[-1] + subi $cnt,$cnt,1 + mr $ap_end,$t2 # &rp[-1] copy + subfc $t0,$m0,$acc0 + addi $tp,$sp,$SIZE_T*15 + subfe $t1,$m1,$acc1 + + mtctr $cnt +.Lmul4x_sub: + $LD $m0,$SIZE_T*1($np) + $LD $acc0,$SIZE_T*1($tp) + subfe $t2,$m2,$acc2 + $LD $m1,$SIZE_T*2($np) + $LD $acc1,$SIZE_T*2($tp) + subfe $t3,$m3,$acc3 + $LD $m2,$SIZE_T*3($np) + $LD $acc2,$SIZE_T*3($tp) + $LDU $m3,$SIZE_T*4($np) + $LDU $acc3,$SIZE_T*4($tp) + $ST $t0,$SIZE_T*1($bp) + $ST $t1,$SIZE_T*2($bp) + subfe $t0,$m0,$acc0 + $ST $t2,$SIZE_T*3($bp) + $STU $t3,$SIZE_T*4($bp) + subfe $t1,$m1,$acc1 + bdnz .Lmul4x_sub + + $LD $a0,$SIZE_T*1($ap_end) + $ST $t0,$SIZE_T*1($bp) + $LD $t0,$SIZE_T*12($sp) + subfe $t2,$m2,$acc2 + $LD $a1,$SIZE_T*2($ap_end) + $ST $t1,$SIZE_T*2($bp) + $LD $t1,$SIZE_T*13($sp) + subfe $t3,$m3,$acc3 + subfe $carry,$zero,$carry # did it borrow? + addi $tp,$sp,$SIZE_T*12 + $LD $a2,$SIZE_T*3($ap_end) + $ST $t2,$SIZE_T*3($bp) + $LD $t2,$SIZE_T*14($sp) + $LD $a3,$SIZE_T*4($ap_end) + $ST $t3,$SIZE_T*4($bp) + $LD $t3,$SIZE_T*15($sp) + + mtctr $cnt +.Lmul4x_cond_copy: + and $t0,$t0,$carry + andc $a0,$a0,$carry + $ST $zero,$SIZE_T*0($tp) # wipe stack clean + and $t1,$t1,$carry + andc $a1,$a1,$carry + $ST $zero,$SIZE_T*1($tp) + and $t2,$t2,$carry + andc $a2,$a2,$carry + $ST $zero,$SIZE_T*2($tp) + and $t3,$t3,$carry + andc $a3,$a3,$carry + $ST $zero,$SIZE_T*3($tp) + or $acc0,$t0,$a0 + $LD $a0,$SIZE_T*5($ap_end) + $LD $t0,$SIZE_T*4($tp) + or $acc1,$t1,$a1 + $LD $a1,$SIZE_T*6($ap_end) + $LD $t1,$SIZE_T*5($tp) + or $acc2,$t2,$a2 + $LD $a2,$SIZE_T*7($ap_end) + $LD $t2,$SIZE_T*6($tp) + or $acc3,$t3,$a3 + $LD $a3,$SIZE_T*8($ap_end) + $LD $t3,$SIZE_T*7($tp) + addi $tp,$tp,$SIZE_T*4 + $ST $acc0,$SIZE_T*1($ap_end) + $ST $acc1,$SIZE_T*2($ap_end) + $ST $acc2,$SIZE_T*3($ap_end) + $STU $acc3,$SIZE_T*4($ap_end) + bdnz .Lmul4x_cond_copy + + $POP $bp,0($sp) # pull saved sp + and $t0,$t0,$carry + andc $a0,$a0,$carry + $ST $zero,$SIZE_T*0($tp) + and $t1,$t1,$carry + andc $a1,$a1,$carry + $ST $zero,$SIZE_T*1($tp) + and $t2,$t2,$carry + andc $a2,$a2,$carry + $ST $zero,$SIZE_T*2($tp) + and $t3,$t3,$carry + andc $a3,$a3,$carry + $ST $zero,$SIZE_T*3($tp) + or $acc0,$t0,$a0 + or $acc1,$t1,$a1 + $ST $zero,$SIZE_T*4($tp) + or $acc2,$t2,$a2 + or $acc3,$t3,$a3 + $ST $acc0,$SIZE_T*1($ap_end) + $ST $acc1,$SIZE_T*2($ap_end) + $ST $acc2,$SIZE_T*3($ap_end) + $ST $acc3,$SIZE_T*4($ap_end) + + b .Lmul4x_done + +.align 4 +.Lmul4x4_post_condition: + $POP $ap,$SIZE_T*6($sp) # pull &rp[-1] + $POP $bp,0($sp) # pull saved sp + addze $carry,$carry # modulo-scheduled + # $acc0-3,$carry hold result, $m0-3 hold modulus + subfc $a0,$m0,$acc0 + subfe $a1,$m1,$acc1 + subfe $a2,$m2,$acc2 + subfe $a3,$m3,$acc3 + subfe $carry,$zero,$carry # did it borrow? + + and $m0,$m0,$carry + and $m1,$m1,$carry + addc $a0,$a0,$m0 + and $m2,$m2,$carry + adde $a1,$a1,$m1 + and $m3,$m3,$carry + adde $a2,$a2,$m2 + adde $a3,$a3,$m3 + + $ST $a0,$SIZE_T*1($ap) # write result + $ST $a1,$SIZE_T*2($ap) + $ST $a2,$SIZE_T*3($ap) + $ST $a3,$SIZE_T*4($ap) + +.Lmul4x_done: + $ST $zero,$SIZE_T*8($sp) # wipe stack clean + $ST $zero,$SIZE_T*9($sp) + $ST $zero,$SIZE_T*10($sp) + $ST $zero,$SIZE_T*11($sp) + li r3,1 # signal "done" + $POP r14,-$SIZE_T*18($bp) + $POP r15,-$SIZE_T*17($bp) + $POP r16,-$SIZE_T*16($bp) + $POP r17,-$SIZE_T*15($bp) + $POP r18,-$SIZE_T*14($bp) + $POP r19,-$SIZE_T*13($bp) + $POP r20,-$SIZE_T*12($bp) + $POP r21,-$SIZE_T*11($bp) + $POP r22,-$SIZE_T*10($bp) + $POP r23,-$SIZE_T*9($bp) + $POP r24,-$SIZE_T*8($bp) + $POP r25,-$SIZE_T*7($bp) + $POP r26,-$SIZE_T*6($bp) + $POP r27,-$SIZE_T*5($bp) + $POP r28,-$SIZE_T*4($bp) + $POP r29,-$SIZE_T*3($bp) + $POP r30,-$SIZE_T*2($bp) + $POP r31,-$SIZE_T*1($bp) + mr $sp,$bp + blr + .long 0 + .byte 0,12,4,0x20,0x80,18,6,0 + .long 0 +.size .bn_mul4x_mont_int,.-.bn_mul4x_mont_int +___ +} + +if (1) { +######################################################################## +# Following is PPC adaptation of sqrx8x_mont from x86_64-mont5 module. + +my ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("r$_",(9..12,14..17)); +my ($t0,$t1,$t2,$t3)=map("r$_",(18..21)); +my ($acc0,$acc1,$acc2,$acc3,$acc4,$acc5,$acc6,$acc7)=map("r$_",(22..29)); +my ($cnt,$carry,$zero)=("r30","r31","r0"); +my ($tp,$ap_end,$na0)=($bp,$np,$carry); + +# sp----------->+-------------------------------+ +# | saved sp | +# +-------------------------------+ +# . . +# +12*size_t +-------------------------------+ +# | size_t tmp[2*num] | +# . . +# . . +# . . +# +-------------------------------+ +# . . +# -18*size_t +-------------------------------+ +# | 18 saved gpr, r14-r31 | +# . . +# . . +# +-------------------------------+ +$code.=<<___; +.align 5 +__bn_sqr8x_mont: +.Lsqr8x_do: + mr $a0,$sp + slwi $a1,$num,`log($SIZE_T)/log(2)+1` + li $a2,-32*$SIZE_T + sub $a1,$a2,$a1 + slwi $num,$num,`log($SIZE_T)/log(2)` + $STUX $sp,$sp,$a1 # alloca + + $PUSH r14,-$SIZE_T*18($a0) + $PUSH r15,-$SIZE_T*17($a0) + $PUSH r16,-$SIZE_T*16($a0) + $PUSH r17,-$SIZE_T*15($a0) + $PUSH r18,-$SIZE_T*14($a0) + $PUSH r19,-$SIZE_T*13($a0) + $PUSH r20,-$SIZE_T*12($a0) + $PUSH r21,-$SIZE_T*11($a0) + $PUSH r22,-$SIZE_T*10($a0) + $PUSH r23,-$SIZE_T*9($a0) + $PUSH r24,-$SIZE_T*8($a0) + $PUSH r25,-$SIZE_T*7($a0) + $PUSH r26,-$SIZE_T*6($a0) + $PUSH r27,-$SIZE_T*5($a0) + $PUSH r28,-$SIZE_T*4($a0) + $PUSH r29,-$SIZE_T*3($a0) + $PUSH r30,-$SIZE_T*2($a0) + $PUSH r31,-$SIZE_T*1($a0) + subi $ap,$ap,$SIZE_T # bias by -1 + subi $t0,$np,$SIZE_T # bias by -1 + subi $rp,$rp,$SIZE_T # bias by -1 + $LD $n0,0($n0) # *n0 + li $zero,0 + + add $ap_end,$ap,$num + $LD $a0,$SIZE_T*1($ap) + #li $acc0,0 + $LD $a1,$SIZE_T*2($ap) + li $acc1,0 + $LD $a2,$SIZE_T*3($ap) + li $acc2,0 + $LD $a3,$SIZE_T*4($ap) + li $acc3,0 + $LD $a4,$SIZE_T*5($ap) + li $acc4,0 + $LD $a5,$SIZE_T*6($ap) + li $acc5,0 + $LD $a6,$SIZE_T*7($ap) + li $acc6,0 + $LDU $a7,$SIZE_T*8($ap) + li $acc7,0 + + addi $tp,$sp,$SIZE_T*11 # &tp[-1] + subic. $cnt,$num,$SIZE_T*8 + b .Lsqr8x_zero_start + +.align 5 +.Lsqr8x_zero: + subic. $cnt,$cnt,$SIZE_T*8 + $ST $zero,$SIZE_T*1($tp) + $ST $zero,$SIZE_T*2($tp) + $ST $zero,$SIZE_T*3($tp) + $ST $zero,$SIZE_T*4($tp) + $ST $zero,$SIZE_T*5($tp) + $ST $zero,$SIZE_T*6($tp) + $ST $zero,$SIZE_T*7($tp) + $ST $zero,$SIZE_T*8($tp) +.Lsqr8x_zero_start: + $ST $zero,$SIZE_T*9($tp) + $ST $zero,$SIZE_T*10($tp) + $ST $zero,$SIZE_T*11($tp) + $ST $zero,$SIZE_T*12($tp) + $ST $zero,$SIZE_T*13($tp) + $ST $zero,$SIZE_T*14($tp) + $ST $zero,$SIZE_T*15($tp) + $STU $zero,$SIZE_T*16($tp) + bne .Lsqr8x_zero + + $PUSH $rp,$SIZE_T*6($sp) # offload &rp[-1] + $PUSH $t0,$SIZE_T*7($sp) # offload &np[-1] + $PUSH $n0,$SIZE_T*8($sp) # offload n0 + $PUSH $tp,$SIZE_T*9($sp) # &tp[2*num-1] + $PUSH $zero,$SIZE_T*10($sp) # initial top-most carry + addi $tp,$sp,$SIZE_T*11 # &tp[-1] + + # Multiply everything but a[i]*a[i] +.align 5 +.Lsqr8x_outer_loop: + # a[1]a[0] (i) + # a[2]a[0] + # a[3]a[0] + # a[4]a[0] + # a[5]a[0] + # a[6]a[0] + # a[7]a[0] + # a[2]a[1] (ii) + # a[3]a[1] + # a[4]a[1] + # a[5]a[1] + # a[6]a[1] + # a[7]a[1] + # a[3]a[2] (iii) + # a[4]a[2] + # a[5]a[2] + # a[6]a[2] + # a[7]a[2] + # a[4]a[3] (iv) + # a[5]a[3] + # a[6]a[3] + # a[7]a[3] + # a[5]a[4] (v) + # a[6]a[4] + # a[7]a[4] + # a[6]a[5] (vi) + # a[7]a[5] + # a[7]a[6] (vii) + + $UMULL $t0,$a1,$a0 # lo(a[1..7]*a[0]) (i) + $UMULL $t1,$a2,$a0 + $UMULL $t2,$a3,$a0 + $UMULL $t3,$a4,$a0 + addc $acc1,$acc1,$t0 # t[1]+lo(a[1]*a[0]) + $UMULL $t0,$a5,$a0 + adde $acc2,$acc2,$t1 + $UMULL $t1,$a6,$a0 + adde $acc3,$acc3,$t2 + $UMULL $t2,$a7,$a0 + adde $acc4,$acc4,$t3 + $UMULH $t3,$a1,$a0 # hi(a[1..7]*a[0]) + adde $acc5,$acc5,$t0 + $UMULH $t0,$a2,$a0 + adde $acc6,$acc6,$t1 + $UMULH $t1,$a3,$a0 + adde $acc7,$acc7,$t2 + $UMULH $t2,$a4,$a0 + $ST $acc0,$SIZE_T*1($tp) # t[0] + addze $acc0,$zero # t[8] + $ST $acc1,$SIZE_T*2($tp) # t[1] + addc $acc2,$acc2,$t3 # t[2]+lo(a[1]*a[0]) + $UMULH $t3,$a5,$a0 + adde $acc3,$acc3,$t0 + $UMULH $t0,$a6,$a0 + adde $acc4,$acc4,$t1 + $UMULH $t1,$a7,$a0 + adde $acc5,$acc5,$t2 + $UMULL $t2,$a2,$a1 # lo(a[2..7]*a[1]) (ii) + adde $acc6,$acc6,$t3 + $UMULL $t3,$a3,$a1 + adde $acc7,$acc7,$t0 + $UMULL $t0,$a4,$a1 + adde $acc0,$acc0,$t1 + + $UMULL $t1,$a5,$a1 + addc $acc3,$acc3,$t2 + $UMULL $t2,$a6,$a1 + adde $acc4,$acc4,$t3 + $UMULL $t3,$a7,$a1 + adde $acc5,$acc5,$t0 + $UMULH $t0,$a2,$a1 # hi(a[2..7]*a[1]) + adde $acc6,$acc6,$t1 + $UMULH $t1,$a3,$a1 + adde $acc7,$acc7,$t2 + $UMULH $t2,$a4,$a1 + adde $acc0,$acc0,$t3 + $UMULH $t3,$a5,$a1 + $ST $acc2,$SIZE_T*3($tp) # t[2] + addze $acc1,$zero # t[9] + $ST $acc3,$SIZE_T*4($tp) # t[3] + addc $acc4,$acc4,$t0 + $UMULH $t0,$a6,$a1 + adde $acc5,$acc5,$t1 + $UMULH $t1,$a7,$a1 + adde $acc6,$acc6,$t2 + $UMULL $t2,$a3,$a2 # lo(a[3..7]*a[2]) (iii) + adde $acc7,$acc7,$t3 + $UMULL $t3,$a4,$a2 + adde $acc0,$acc0,$t0 + $UMULL $t0,$a5,$a2 + adde $acc1,$acc1,$t1 + + $UMULL $t1,$a6,$a2 + addc $acc5,$acc5,$t2 + $UMULL $t2,$a7,$a2 + adde $acc6,$acc6,$t3 + $UMULH $t3,$a3,$a2 # hi(a[3..7]*a[2]) + adde $acc7,$acc7,$t0 + $UMULH $t0,$a4,$a2 + adde $acc0,$acc0,$t1 + $UMULH $t1,$a5,$a2 + adde $acc1,$acc1,$t2 + $UMULH $t2,$a6,$a2 + $ST $acc4,$SIZE_T*5($tp) # t[4] + addze $acc2,$zero # t[10] + $ST $acc5,$SIZE_T*6($tp) # t[5] + addc $acc6,$acc6,$t3 + $UMULH $t3,$a7,$a2 + adde $acc7,$acc7,$t0 + $UMULL $t0,$a4,$a3 # lo(a[4..7]*a[3]) (iv) + adde $acc0,$acc0,$t1 + $UMULL $t1,$a5,$a3 + adde $acc1,$acc1,$t2 + $UMULL $t2,$a6,$a3 + adde $acc2,$acc2,$t3 + + $UMULL $t3,$a7,$a3 + addc $acc7,$acc7,$t0 + $UMULH $t0,$a4,$a3 # hi(a[4..7]*a[3]) + adde $acc0,$acc0,$t1 + $UMULH $t1,$a5,$a3 + adde $acc1,$acc1,$t2 + $UMULH $t2,$a6,$a3 + adde $acc2,$acc2,$t3 + $UMULH $t3,$a7,$a3 + $ST $acc6,$SIZE_T*7($tp) # t[6] + addze $acc3,$zero # t[11] + $STU $acc7,$SIZE_T*8($tp) # t[7] + addc $acc0,$acc0,$t0 + $UMULL $t0,$a5,$a4 # lo(a[5..7]*a[4]) (v) + adde $acc1,$acc1,$t1 + $UMULL $t1,$a6,$a4 + adde $acc2,$acc2,$t2 + $UMULL $t2,$a7,$a4 + adde $acc3,$acc3,$t3 + + $UMULH $t3,$a5,$a4 # hi(a[5..7]*a[4]) + addc $acc1,$acc1,$t0 + $UMULH $t0,$a6,$a4 + adde $acc2,$acc2,$t1 + $UMULH $t1,$a7,$a4 + adde $acc3,$acc3,$t2 + $UMULL $t2,$a6,$a5 # lo(a[6..7]*a[5]) (vi) + addze $acc4,$zero # t[12] + addc $acc2,$acc2,$t3 + $UMULL $t3,$a7,$a5 + adde $acc3,$acc3,$t0 + $UMULH $t0,$a6,$a5 # hi(a[6..7]*a[5]) + adde $acc4,$acc4,$t1 + + $UMULH $t1,$a7,$a5 + addc $acc3,$acc3,$t2 + $UMULL $t2,$a7,$a6 # lo(a[7]*a[6]) (vii) + adde $acc4,$acc4,$t3 + $UMULH $t3,$a7,$a6 # hi(a[7]*a[6]) + addze $acc5,$zero # t[13] + addc $acc4,$acc4,$t0 + $UCMP $ap_end,$ap # done yet? + adde $acc5,$acc5,$t1 + + addc $acc5,$acc5,$t2 + sub $t0,$ap_end,$num # rewinded ap + addze $acc6,$zero # t[14] + add $acc6,$acc6,$t3 + + beq .Lsqr8x_outer_break + + mr $n0,$a0 + $LD $a0,$SIZE_T*1($tp) + $LD $a1,$SIZE_T*2($tp) + $LD $a2,$SIZE_T*3($tp) + $LD $a3,$SIZE_T*4($tp) + $LD $a4,$SIZE_T*5($tp) + $LD $a5,$SIZE_T*6($tp) + $LD $a6,$SIZE_T*7($tp) + $LD $a7,$SIZE_T*8($tp) + addc $acc0,$acc0,$a0 + $LD $a0,$SIZE_T*1($ap) + adde $acc1,$acc1,$a1 + $LD $a1,$SIZE_T*2($ap) + adde $acc2,$acc2,$a2 + $LD $a2,$SIZE_T*3($ap) + adde $acc3,$acc3,$a3 + $LD $a3,$SIZE_T*4($ap) + adde $acc4,$acc4,$a4 + $LD $a4,$SIZE_T*5($ap) + adde $acc5,$acc5,$a5 + $LD $a5,$SIZE_T*6($ap) + adde $acc6,$acc6,$a6 + $LD $a6,$SIZE_T*7($ap) + subi $rp,$ap,$SIZE_T*7 + addze $acc7,$a7 + $LDU $a7,$SIZE_T*8($ap) + #addze $carry,$zero # moved below + li $cnt,0 + b .Lsqr8x_mul + + # a[8]a[0] + # a[9]a[0] + # a[a]a[0] + # a[b]a[0] + # a[c]a[0] + # a[d]a[0] + # a[e]a[0] + # a[f]a[0] + # a[8]a[1] + # a[f]a[1]........................ + # a[8]a[2] + # a[f]a[2]........................ + # a[8]a[3] + # a[f]a[3]........................ + # a[8]a[4] + # a[f]a[4]........................ + # a[8]a[5] + # a[f]a[5]........................ + # a[8]a[6] + # a[f]a[6]........................ + # a[8]a[7] + # a[f]a[7]........................ +.align 5 +.Lsqr8x_mul: + $UMULL $t0,$a0,$n0 + addze $carry,$zero # carry bit, modulo-scheduled + $UMULL $t1,$a1,$n0 + addi $cnt,$cnt,$SIZE_T + $UMULL $t2,$a2,$n0 + andi. $cnt,$cnt,$SIZE_T*8-1 + $UMULL $t3,$a3,$n0 + addc $acc0,$acc0,$t0 + $UMULL $t0,$a4,$n0 + adde $acc1,$acc1,$t1 + $UMULL $t1,$a5,$n0 + adde $acc2,$acc2,$t2 + $UMULL $t2,$a6,$n0 + adde $acc3,$acc3,$t3 + $UMULL $t3,$a7,$n0 + adde $acc4,$acc4,$t0 + $UMULH $t0,$a0,$n0 + adde $acc5,$acc5,$t1 + $UMULH $t1,$a1,$n0 + adde $acc6,$acc6,$t2 + $UMULH $t2,$a2,$n0 + adde $acc7,$acc7,$t3 + $UMULH $t3,$a3,$n0 + addze $carry,$carry + $STU $acc0,$SIZE_T($tp) + addc $acc0,$acc1,$t0 + $UMULH $t0,$a4,$n0 + adde $acc1,$acc2,$t1 + $UMULH $t1,$a5,$n0 + adde $acc2,$acc3,$t2 + $UMULH $t2,$a6,$n0 + adde $acc3,$acc4,$t3 + $UMULH $t3,$a7,$n0 + $LDX $n0,$rp,$cnt + adde $acc4,$acc5,$t0 + adde $acc5,$acc6,$t1 + adde $acc6,$acc7,$t2 + adde $acc7,$carry,$t3 + #addze $carry,$zero # moved above + bne .Lsqr8x_mul + # note that carry flag is guaranteed + # to be zero at this point + $UCMP $ap,$ap_end # done yet? + beq .Lsqr8x_break + + $LD $a0,$SIZE_T*1($tp) + $LD $a1,$SIZE_T*2($tp) + $LD $a2,$SIZE_T*3($tp) + $LD $a3,$SIZE_T*4($tp) + $LD $a4,$SIZE_T*5($tp) + $LD $a5,$SIZE_T*6($tp) + $LD $a6,$SIZE_T*7($tp) + $LD $a7,$SIZE_T*8($tp) + addc $acc0,$acc0,$a0 + $LD $a0,$SIZE_T*1($ap) + adde $acc1,$acc1,$a1 + $LD $a1,$SIZE_T*2($ap) + adde $acc2,$acc2,$a2 + $LD $a2,$SIZE_T*3($ap) + adde $acc3,$acc3,$a3 + $LD $a3,$SIZE_T*4($ap) + adde $acc4,$acc4,$a4 + $LD $a4,$SIZE_T*5($ap) + adde $acc5,$acc5,$a5 + $LD $a5,$SIZE_T*6($ap) + adde $acc6,$acc6,$a6 + $LD $a6,$SIZE_T*7($ap) + adde $acc7,$acc7,$a7 + $LDU $a7,$SIZE_T*8($ap) + #addze $carry,$zero # moved above + b .Lsqr8x_mul + +.align 5 +.Lsqr8x_break: + $LD $a0,$SIZE_T*8($rp) + addi $ap,$rp,$SIZE_T*15 + $LD $a1,$SIZE_T*9($rp) + sub. $t0,$ap_end,$ap # is it last iteration? + $LD $a2,$SIZE_T*10($rp) + sub $t1,$tp,$t0 + $LD $a3,$SIZE_T*11($rp) + $LD $a4,$SIZE_T*12($rp) + $LD $a5,$SIZE_T*13($rp) + $LD $a6,$SIZE_T*14($rp) + $LD $a7,$SIZE_T*15($rp) + beq .Lsqr8x_outer_loop + + $ST $acc0,$SIZE_T*1($tp) + $LD $acc0,$SIZE_T*1($t1) + $ST $acc1,$SIZE_T*2($tp) + $LD $acc1,$SIZE_T*2($t1) + $ST $acc2,$SIZE_T*3($tp) + $LD $acc2,$SIZE_T*3($t1) + $ST $acc3,$SIZE_T*4($tp) + $LD $acc3,$SIZE_T*4($t1) + $ST $acc4,$SIZE_T*5($tp) + $LD $acc4,$SIZE_T*5($t1) + $ST $acc5,$SIZE_T*6($tp) + $LD $acc5,$SIZE_T*6($t1) + $ST $acc6,$SIZE_T*7($tp) + $LD $acc6,$SIZE_T*7($t1) + $ST $acc7,$SIZE_T*8($tp) + $LD $acc7,$SIZE_T*8($t1) + mr $tp,$t1 + b .Lsqr8x_outer_loop + +.align 5 +.Lsqr8x_outer_break: + #################################################################### + # Now multiply above result by 2 and add a[n-1]*a[n-1]|...|a[0]*a[0] + $LD $a1,$SIZE_T*1($t0) # recall that $t0 is &a[-1] + $LD $a3,$SIZE_T*2($t0) + $LD $a5,$SIZE_T*3($t0) + $LD $a7,$SIZE_T*4($t0) + addi $ap,$t0,$SIZE_T*4 + # "tp[x]" comments are for num==8 case + $LD $t1,$SIZE_T*13($sp) # =tp[1], t[0] is not interesting + $LD $t2,$SIZE_T*14($sp) + $LD $t3,$SIZE_T*15($sp) + $LD $t0,$SIZE_T*16($sp) + + $ST $acc0,$SIZE_T*1($tp) # tp[8]= + srwi $cnt,$num,`log($SIZE_T)/log(2)+2` + $ST $acc1,$SIZE_T*2($tp) + subi $cnt,$cnt,1 + $ST $acc2,$SIZE_T*3($tp) + $ST $acc3,$SIZE_T*4($tp) + $ST $acc4,$SIZE_T*5($tp) + $ST $acc5,$SIZE_T*6($tp) + $ST $acc6,$SIZE_T*7($tp) + #$ST $acc7,$SIZE_T*8($tp) # tp[15] is not interesting + addi $tp,$sp,$SIZE_T*11 # &tp[-1] + $UMULL $acc0,$a1,$a1 + $UMULH $a1,$a1,$a1 + add $acc1,$t1,$t1 # <<1 + $SHRI $t1,$t1,$BITS-1 + $UMULL $a2,$a3,$a3 + $UMULH $a3,$a3,$a3 + addc $acc1,$acc1,$a1 + add $acc2,$t2,$t2 + $SHRI $t2,$t2,$BITS-1 + add $acc3,$t3,$t3 + $SHRI $t3,$t3,$BITS-1 + or $acc2,$acc2,$t1 + + mtctr $cnt +.Lsqr4x_shift_n_add: + $UMULL $a4,$a5,$a5 + $UMULH $a5,$a5,$a5 + $LD $t1,$SIZE_T*6($tp) # =tp[5] + $LD $a1,$SIZE_T*1($ap) + adde $acc2,$acc2,$a2 + add $acc4,$t0,$t0 + $SHRI $t0,$t0,$BITS-1 + or $acc3,$acc3,$t2 + $LD $t2,$SIZE_T*7($tp) # =tp[6] + adde $acc3,$acc3,$a3 + $LD $a3,$SIZE_T*2($ap) + add $acc5,$t1,$t1 + $SHRI $t1,$t1,$BITS-1 + or $acc4,$acc4,$t3 + $LD $t3,$SIZE_T*8($tp) # =tp[7] + $UMULL $a6,$a7,$a7 + $UMULH $a7,$a7,$a7 + adde $acc4,$acc4,$a4 + add $acc6,$t2,$t2 + $SHRI $t2,$t2,$BITS-1 + or $acc5,$acc5,$t0 + $LD $t0,$SIZE_T*9($tp) # =tp[8] + adde $acc5,$acc5,$a5 + $LD $a5,$SIZE_T*3($ap) + add $acc7,$t3,$t3 + $SHRI $t3,$t3,$BITS-1 + or $acc6,$acc6,$t1 + $LD $t1,$SIZE_T*10($tp) # =tp[9] + $UMULL $a0,$a1,$a1 + $UMULH $a1,$a1,$a1 + adde $acc6,$acc6,$a6 + $ST $acc0,$SIZE_T*1($tp) # tp[0]= + add $acc0,$t0,$t0 + $SHRI $t0,$t0,$BITS-1 + or $acc7,$acc7,$t2 + $LD $t2,$SIZE_T*11($tp) # =tp[10] + adde $acc7,$acc7,$a7 + $LDU $a7,$SIZE_T*4($ap) + $ST $acc1,$SIZE_T*2($tp) # tp[1]= + add $acc1,$t1,$t1 + $SHRI $t1,$t1,$BITS-1 + or $acc0,$acc0,$t3 + $LD $t3,$SIZE_T*12($tp) # =tp[11] + $UMULL $a2,$a3,$a3 + $UMULH $a3,$a3,$a3 + adde $acc0,$acc0,$a0 + $ST $acc2,$SIZE_T*3($tp) # tp[2]= + add $acc2,$t2,$t2 + $SHRI $t2,$t2,$BITS-1 + or $acc1,$acc1,$t0 + $LD $t0,$SIZE_T*13($tp) # =tp[12] + adde $acc1,$acc1,$a1 + $ST $acc3,$SIZE_T*4($tp) # tp[3]= + $ST $acc4,$SIZE_T*5($tp) # tp[4]= + $ST $acc5,$SIZE_T*6($tp) # tp[5]= + $ST $acc6,$SIZE_T*7($tp) # tp[6]= + $STU $acc7,$SIZE_T*8($tp) # tp[7]= + add $acc3,$t3,$t3 + $SHRI $t3,$t3,$BITS-1 + or $acc2,$acc2,$t1 + bdnz .Lsqr4x_shift_n_add +___ +my ($np,$np_end)=($ap,$ap_end); +$code.=<<___; + $POP $np,$SIZE_T*7($sp) # pull &np[-1] and n0 + $POP $n0,$SIZE_T*8($sp) + + $UMULL $a4,$a5,$a5 + $UMULH $a5,$a5,$a5 + $ST $acc0,$SIZE_T*1($tp) # tp[8]= + $LD $acc0,$SIZE_T*12($sp) # =tp[0] + $LD $t1,$SIZE_T*6($tp) # =tp[13] + adde $acc2,$acc2,$a2 + add $acc4,$t0,$t0 + $SHRI $t0,$t0,$BITS-1 + or $acc3,$acc3,$t2 + $LD $t2,$SIZE_T*7($tp) # =tp[14] + adde $acc3,$acc3,$a3 + add $acc5,$t1,$t1 + $SHRI $t1,$t1,$BITS-1 + or $acc4,$acc4,$t3 + $UMULL $a6,$a7,$a7 + $UMULH $a7,$a7,$a7 + adde $acc4,$acc4,$a4 + add $acc6,$t2,$t2 + $SHRI $t2,$t2,$BITS-1 + or $acc5,$acc5,$t0 + $ST $acc1,$SIZE_T*2($tp) # tp[9]= + $LD $acc1,$SIZE_T*13($sp) # =tp[1] + adde $acc5,$acc5,$a5 + or $acc6,$acc6,$t1 + $LD $a0,$SIZE_T*1($np) + $LD $a1,$SIZE_T*2($np) + adde $acc6,$acc6,$a6 + $LD $a2,$SIZE_T*3($np) + $LD $a3,$SIZE_T*4($np) + adde $acc7,$a7,$t2 + $LD $a4,$SIZE_T*5($np) + $LD $a5,$SIZE_T*6($np) + + ################################################################ + # Reduce by 8 limbs per iteration + $UMULL $na0,$n0,$acc0 # t[0]*n0 + li $cnt,8 + $LD $a6,$SIZE_T*7($np) + add $np_end,$np,$num + $LDU $a7,$SIZE_T*8($np) + $ST $acc2,$SIZE_T*3($tp) # tp[10]= + $LD $acc2,$SIZE_T*14($sp) + $ST $acc3,$SIZE_T*4($tp) # tp[11]= + $LD $acc3,$SIZE_T*15($sp) + $ST $acc4,$SIZE_T*5($tp) # tp[12]= + $LD $acc4,$SIZE_T*16($sp) + $ST $acc5,$SIZE_T*6($tp) # tp[13]= + $LD $acc5,$SIZE_T*17($sp) + $ST $acc6,$SIZE_T*7($tp) # tp[14]= + $LD $acc6,$SIZE_T*18($sp) + $ST $acc7,$SIZE_T*8($tp) # tp[15]= + $LD $acc7,$SIZE_T*19($sp) + addi $tp,$sp,$SIZE_T*11 # &tp[-1] + mtctr $cnt + b .Lsqr8x_reduction + +.align 5 +.Lsqr8x_reduction: + # (*) $UMULL $t0,$a0,$na0 # lo(n[0-7])*lo(t[0]*n0) + $UMULL $t1,$a1,$na0 + $UMULL $t2,$a2,$na0 + $STU $na0,$SIZE_T($tp) # put aside t[0]*n0 for tail processing + $UMULL $t3,$a3,$na0 + # (*) addc $acc0,$acc0,$t0 + addic $acc0,$acc0,-1 # (*) + $UMULL $t0,$a4,$na0 + adde $acc0,$acc1,$t1 + $UMULL $t1,$a5,$na0 + adde $acc1,$acc2,$t2 + $UMULL $t2,$a6,$na0 + adde $acc2,$acc3,$t3 + $UMULL $t3,$a7,$na0 + adde $acc3,$acc4,$t0 + $UMULH $t0,$a0,$na0 # hi(n[0-7])*lo(t[0]*n0) + adde $acc4,$acc5,$t1 + $UMULH $t1,$a1,$na0 + adde $acc5,$acc6,$t2 + $UMULH $t2,$a2,$na0 + adde $acc6,$acc7,$t3 + $UMULH $t3,$a3,$na0 + addze $acc7,$zero + addc $acc0,$acc0,$t0 + $UMULH $t0,$a4,$na0 + adde $acc1,$acc1,$t1 + $UMULH $t1,$a5,$na0 + adde $acc2,$acc2,$t2 + $UMULH $t2,$a6,$na0 + adde $acc3,$acc3,$t3 + $UMULH $t3,$a7,$na0 + $UMULL $na0,$n0,$acc0 # next t[0]*n0 + adde $acc4,$acc4,$t0 + adde $acc5,$acc5,$t1 + adde $acc6,$acc6,$t2 + adde $acc7,$acc7,$t3 + bdnz .Lsqr8x_reduction + + $LD $t0,$SIZE_T*1($tp) + $LD $t1,$SIZE_T*2($tp) + $LD $t2,$SIZE_T*3($tp) + $LD $t3,$SIZE_T*4($tp) + subi $rp,$tp,$SIZE_T*7 + $UCMP $np_end,$np # done yet? + addc $acc0,$acc0,$t0 + $LD $t0,$SIZE_T*5($tp) + adde $acc1,$acc1,$t1 + $LD $t1,$SIZE_T*6($tp) + adde $acc2,$acc2,$t2 + $LD $t2,$SIZE_T*7($tp) + adde $acc3,$acc3,$t3 + $LD $t3,$SIZE_T*8($tp) + adde $acc4,$acc4,$t0 + adde $acc5,$acc5,$t1 + adde $acc6,$acc6,$t2 + adde $acc7,$acc7,$t3 + #addze $carry,$zero # moved below + beq .Lsqr8x8_post_condition + + $LD $n0,$SIZE_T*0($rp) + $LD $a0,$SIZE_T*1($np) + $LD $a1,$SIZE_T*2($np) + $LD $a2,$SIZE_T*3($np) + $LD $a3,$SIZE_T*4($np) + $LD $a4,$SIZE_T*5($np) + $LD $a5,$SIZE_T*6($np) + $LD $a6,$SIZE_T*7($np) + $LDU $a7,$SIZE_T*8($np) + li $cnt,0 + +.align 5 +.Lsqr8x_tail: + $UMULL $t0,$a0,$n0 + addze $carry,$zero # carry bit, modulo-scheduled + $UMULL $t1,$a1,$n0 + addi $cnt,$cnt,$SIZE_T + $UMULL $t2,$a2,$n0 + andi. $cnt,$cnt,$SIZE_T*8-1 + $UMULL $t3,$a3,$n0 + addc $acc0,$acc0,$t0 + $UMULL $t0,$a4,$n0 + adde $acc1,$acc1,$t1 + $UMULL $t1,$a5,$n0 + adde $acc2,$acc2,$t2 + $UMULL $t2,$a6,$n0 + adde $acc3,$acc3,$t3 + $UMULL $t3,$a7,$n0 + adde $acc4,$acc4,$t0 + $UMULH $t0,$a0,$n0 + adde $acc5,$acc5,$t1 + $UMULH $t1,$a1,$n0 + adde $acc6,$acc6,$t2 + $UMULH $t2,$a2,$n0 + adde $acc7,$acc7,$t3 + $UMULH $t3,$a3,$n0 + addze $carry,$carry + $STU $acc0,$SIZE_T($tp) + addc $acc0,$acc1,$t0 + $UMULH $t0,$a4,$n0 + adde $acc1,$acc2,$t1 + $UMULH $t1,$a5,$n0 + adde $acc2,$acc3,$t2 + $UMULH $t2,$a6,$n0 + adde $acc3,$acc4,$t3 + $UMULH $t3,$a7,$n0 + $LDX $n0,$rp,$cnt + adde $acc4,$acc5,$t0 + adde $acc5,$acc6,$t1 + adde $acc6,$acc7,$t2 + adde $acc7,$carry,$t3 + #addze $carry,$zero # moved above + bne .Lsqr8x_tail + # note that carry flag is guaranteed + # to be zero at this point + $LD $a0,$SIZE_T*1($tp) + $POP $carry,$SIZE_T*10($sp) # pull top-most carry in case we break + $UCMP $np_end,$np # done yet? + $LD $a1,$SIZE_T*2($tp) + sub $t2,$np_end,$num # rewinded np + $LD $a2,$SIZE_T*3($tp) + $LD $a3,$SIZE_T*4($tp) + $LD $a4,$SIZE_T*5($tp) + $LD $a5,$SIZE_T*6($tp) + $LD $a6,$SIZE_T*7($tp) + $LD $a7,$SIZE_T*8($tp) + beq .Lsqr8x_tail_break + + addc $acc0,$acc0,$a0 + $LD $a0,$SIZE_T*1($np) + adde $acc1,$acc1,$a1 + $LD $a1,$SIZE_T*2($np) + adde $acc2,$acc2,$a2 + $LD $a2,$SIZE_T*3($np) + adde $acc3,$acc3,$a3 + $LD $a3,$SIZE_T*4($np) + adde $acc4,$acc4,$a4 + $LD $a4,$SIZE_T*5($np) + adde $acc5,$acc5,$a5 + $LD $a5,$SIZE_T*6($np) + adde $acc6,$acc6,$a6 + $LD $a6,$SIZE_T*7($np) + adde $acc7,$acc7,$a7 + $LDU $a7,$SIZE_T*8($np) + #addze $carry,$zero # moved above + b .Lsqr8x_tail + +.align 5 +.Lsqr8x_tail_break: + $POP $n0,$SIZE_T*8($sp) # pull n0 + $POP $t3,$SIZE_T*9($sp) # &tp[2*num-1] + addi $cnt,$tp,$SIZE_T*8 # end of current t[num] window + + addic $carry,$carry,-1 # "move" top-most carry to carry bit + adde $t0,$acc0,$a0 + $LD $acc0,$SIZE_T*8($rp) + $LD $a0,$SIZE_T*1($t2) # recall that $t2 is &n[-1] + adde $t1,$acc1,$a1 + $LD $acc1,$SIZE_T*9($rp) + $LD $a1,$SIZE_T*2($t2) + adde $acc2,$acc2,$a2 + $LD $a2,$SIZE_T*3($t2) + adde $acc3,$acc3,$a3 + $LD $a3,$SIZE_T*4($t2) + adde $acc4,$acc4,$a4 + $LD $a4,$SIZE_T*5($t2) + adde $acc5,$acc5,$a5 + $LD $a5,$SIZE_T*6($t2) + adde $acc6,$acc6,$a6 + $LD $a6,$SIZE_T*7($t2) + adde $acc7,$acc7,$a7 + $LD $a7,$SIZE_T*8($t2) + addi $np,$t2,$SIZE_T*8 + addze $t2,$zero # top-most carry + $UMULL $na0,$n0,$acc0 + $ST $t0,$SIZE_T*1($tp) + $UCMP $cnt,$t3 # did we hit the bottom? + $ST $t1,$SIZE_T*2($tp) + li $cnt,8 + $ST $acc2,$SIZE_T*3($tp) + $LD $acc2,$SIZE_T*10($rp) + $ST $acc3,$SIZE_T*4($tp) + $LD $acc3,$SIZE_T*11($rp) + $ST $acc4,$SIZE_T*5($tp) + $LD $acc4,$SIZE_T*12($rp) + $ST $acc5,$SIZE_T*6($tp) + $LD $acc5,$SIZE_T*13($rp) + $ST $acc6,$SIZE_T*7($tp) + $LD $acc6,$SIZE_T*14($rp) + $ST $acc7,$SIZE_T*8($tp) + $LD $acc7,$SIZE_T*15($rp) + $PUSH $t2,$SIZE_T*10($sp) # off-load top-most carry + addi $tp,$rp,$SIZE_T*7 # slide the window + mtctr $cnt + bne .Lsqr8x_reduction + + ################################################################ + # Final step. We see if result is larger than modulus, and + # if it is, subtract the modulus. But comparison implies + # subtraction. So we subtract modulus, see if it borrowed, + # and conditionally copy original value. + $POP $rp,$SIZE_T*6($sp) # pull &rp[-1] + srwi $cnt,$num,`log($SIZE_T)/log(2)+3` + mr $n0,$tp # put tp aside + addi $tp,$tp,$SIZE_T*8 + subi $cnt,$cnt,1 + subfc $t0,$a0,$acc0 + subfe $t1,$a1,$acc1 + mr $carry,$t2 + mr $ap_end,$rp # $rp copy + + mtctr $cnt + b .Lsqr8x_sub + +.align 5 +.Lsqr8x_sub: + $LD $a0,$SIZE_T*1($np) + $LD $acc0,$SIZE_T*1($tp) + $LD $a1,$SIZE_T*2($np) + $LD $acc1,$SIZE_T*2($tp) + subfe $t2,$a2,$acc2 + $LD $a2,$SIZE_T*3($np) + $LD $acc2,$SIZE_T*3($tp) + subfe $t3,$a3,$acc3 + $LD $a3,$SIZE_T*4($np) + $LD $acc3,$SIZE_T*4($tp) + $ST $t0,$SIZE_T*1($rp) + subfe $t0,$a4,$acc4 + $LD $a4,$SIZE_T*5($np) + $LD $acc4,$SIZE_T*5($tp) + $ST $t1,$SIZE_T*2($rp) + subfe $t1,$a5,$acc5 + $LD $a5,$SIZE_T*6($np) + $LD $acc5,$SIZE_T*6($tp) + $ST $t2,$SIZE_T*3($rp) + subfe $t2,$a6,$acc6 + $LD $a6,$SIZE_T*7($np) + $LD $acc6,$SIZE_T*7($tp) + $ST $t3,$SIZE_T*4($rp) + subfe $t3,$a7,$acc7 + $LDU $a7,$SIZE_T*8($np) + $LDU $acc7,$SIZE_T*8($tp) + $ST $t0,$SIZE_T*5($rp) + subfe $t0,$a0,$acc0 + $ST $t1,$SIZE_T*6($rp) + subfe $t1,$a1,$acc1 + $ST $t2,$SIZE_T*7($rp) + $STU $t3,$SIZE_T*8($rp) + bdnz .Lsqr8x_sub + + srwi $cnt,$num,`log($SIZE_T)/log(2)+2` + $LD $a0,$SIZE_T*1($ap_end) # original $rp + $LD $acc0,$SIZE_T*1($n0) # original $tp + subi $cnt,$cnt,1 + $LD $a1,$SIZE_T*2($ap_end) + $LD $acc1,$SIZE_T*2($n0) + subfe $t2,$a2,$acc2 + $LD $a2,$SIZE_T*3($ap_end) + $LD $acc2,$SIZE_T*3($n0) + subfe $t3,$a3,$acc3 + $LD $a3,$SIZE_T*4($ap_end) + $LDU $acc3,$SIZE_T*4($n0) + $ST $t0,$SIZE_T*1($rp) + subfe $t0,$a4,$acc4 + $ST $t1,$SIZE_T*2($rp) + subfe $t1,$a5,$acc5 + $ST $t2,$SIZE_T*3($rp) + subfe $t2,$a6,$acc6 + $ST $t3,$SIZE_T*4($rp) + subfe $t3,$a7,$acc7 + $ST $t0,$SIZE_T*5($rp) + subfe $carry,$zero,$carry # did it borrow? + $ST $t1,$SIZE_T*6($rp) + $ST $t2,$SIZE_T*7($rp) + $ST $t3,$SIZE_T*8($rp) + + addi $tp,$sp,$SIZE_T*11 + mtctr $cnt + +.Lsqr4x_cond_copy: + andc $a0,$a0,$carry + $ST $zero,-$SIZE_T*3($n0) # wipe stack clean + and $acc0,$acc0,$carry + $ST $zero,-$SIZE_T*2($n0) + andc $a1,$a1,$carry + $ST $zero,-$SIZE_T*1($n0) + and $acc1,$acc1,$carry + $ST $zero,-$SIZE_T*0($n0) + andc $a2,$a2,$carry + $ST $zero,$SIZE_T*1($tp) + and $acc2,$acc2,$carry + $ST $zero,$SIZE_T*2($tp) + andc $a3,$a3,$carry + $ST $zero,$SIZE_T*3($tp) + and $acc3,$acc3,$carry + $STU $zero,$SIZE_T*4($tp) + or $t0,$a0,$acc0 + $LD $a0,$SIZE_T*5($ap_end) + $LD $acc0,$SIZE_T*1($n0) + or $t1,$a1,$acc1 + $LD $a1,$SIZE_T*6($ap_end) + $LD $acc1,$SIZE_T*2($n0) + or $t2,$a2,$acc2 + $LD $a2,$SIZE_T*7($ap_end) + $LD $acc2,$SIZE_T*3($n0) + or $t3,$a3,$acc3 + $LD $a3,$SIZE_T*8($ap_end) + $LDU $acc3,$SIZE_T*4($n0) + $ST $t0,$SIZE_T*1($ap_end) + $ST $t1,$SIZE_T*2($ap_end) + $ST $t2,$SIZE_T*3($ap_end) + $STU $t3,$SIZE_T*4($ap_end) + bdnz .Lsqr4x_cond_copy + + $POP $ap,0($sp) # pull saved sp + andc $a0,$a0,$carry + and $acc0,$acc0,$carry + andc $a1,$a1,$carry + and $acc1,$acc1,$carry + andc $a2,$a2,$carry + and $acc2,$acc2,$carry + andc $a3,$a3,$carry + and $acc3,$acc3,$carry + or $t0,$a0,$acc0 + or $t1,$a1,$acc1 + or $t2,$a2,$acc2 + or $t3,$a3,$acc3 + $ST $t0,$SIZE_T*1($ap_end) + $ST $t1,$SIZE_T*2($ap_end) + $ST $t2,$SIZE_T*3($ap_end) + $ST $t3,$SIZE_T*4($ap_end) + + b .Lsqr8x_done + +.align 5 +.Lsqr8x8_post_condition: + $POP $rp,$SIZE_T*6($sp) # pull rp + $POP $ap,0($sp) # pull saved sp + addze $carry,$zero + + # $acc0-7,$carry hold result, $a0-7 hold modulus + subfc $acc0,$a0,$acc0 + subfe $acc1,$a1,$acc1 + $ST $zero,$SIZE_T*12($sp) # wipe stack clean + $ST $zero,$SIZE_T*13($sp) + subfe $acc2,$a2,$acc2 + $ST $zero,$SIZE_T*14($sp) + $ST $zero,$SIZE_T*15($sp) + subfe $acc3,$a3,$acc3 + $ST $zero,$SIZE_T*16($sp) + $ST $zero,$SIZE_T*17($sp) + subfe $acc4,$a4,$acc4 + $ST $zero,$SIZE_T*18($sp) + $ST $zero,$SIZE_T*19($sp) + subfe $acc5,$a5,$acc5 + $ST $zero,$SIZE_T*20($sp) + $ST $zero,$SIZE_T*21($sp) + subfe $acc6,$a6,$acc6 + $ST $zero,$SIZE_T*22($sp) + $ST $zero,$SIZE_T*23($sp) + subfe $acc7,$a7,$acc7 + $ST $zero,$SIZE_T*24($sp) + $ST $zero,$SIZE_T*25($sp) + subfe $carry,$zero,$carry # did it borrow? + $ST $zero,$SIZE_T*26($sp) + $ST $zero,$SIZE_T*27($sp) + + and $a0,$a0,$carry + and $a1,$a1,$carry + addc $acc0,$acc0,$a0 # add modulus back if borrowed + and $a2,$a2,$carry + adde $acc1,$acc1,$a1 + and $a3,$a3,$carry + adde $acc2,$acc2,$a2 + and $a4,$a4,$carry + adde $acc3,$acc3,$a3 + and $a5,$a5,$carry + adde $acc4,$acc4,$a4 + and $a6,$a6,$carry + adde $acc5,$acc5,$a5 + and $a7,$a7,$carry + adde $acc6,$acc6,$a6 + adde $acc7,$acc7,$a7 + $ST $acc0,$SIZE_T*1($rp) + $ST $acc1,$SIZE_T*2($rp) + $ST $acc2,$SIZE_T*3($rp) + $ST $acc3,$SIZE_T*4($rp) + $ST $acc4,$SIZE_T*5($rp) + $ST $acc5,$SIZE_T*6($rp) + $ST $acc6,$SIZE_T*7($rp) + $ST $acc7,$SIZE_T*8($rp) + +.Lsqr8x_done: + $PUSH $zero,$SIZE_T*8($sp) + $PUSH $zero,$SIZE_T*10($sp) + + $POP r14,-$SIZE_T*18($ap) + li r3,1 # signal "done" + $POP r15,-$SIZE_T*17($ap) + $POP r16,-$SIZE_T*16($ap) + $POP r17,-$SIZE_T*15($ap) + $POP r18,-$SIZE_T*14($ap) + $POP r19,-$SIZE_T*13($ap) + $POP r20,-$SIZE_T*12($ap) + $POP r21,-$SIZE_T*11($ap) + $POP r22,-$SIZE_T*10($ap) + $POP r23,-$SIZE_T*9($ap) + $POP r24,-$SIZE_T*8($ap) + $POP r25,-$SIZE_T*7($ap) + $POP r26,-$SIZE_T*6($ap) + $POP r27,-$SIZE_T*5($ap) + $POP r28,-$SIZE_T*4($ap) + $POP r29,-$SIZE_T*3($ap) + $POP r30,-$SIZE_T*2($ap) + $POP r31,-$SIZE_T*1($ap) + mr $sp,$ap + blr + .long 0 + .byte 0,12,4,0x20,0x80,18,6,0 + .long 0 +.size __bn_sqr8x_mont,.-__bn_sqr8x_mont +___ +} +$code.=<<___; .asciz "Montgomery Multiplication for PPC, CRYPTOGAMS by <appro\@openssl.org>" ___ diff --git a/deps/openssl/openssl/crypto/bn/asm/ppc.pl b/deps/openssl/openssl/crypto/bn/asm/ppc.pl index 4ea534a1c7..e37068192f 100644 --- a/deps/openssl/openssl/crypto/bn/asm/ppc.pl +++ b/deps/openssl/openssl/crypto/bn/asm/ppc.pl @@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright 2004-2016 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 2004-2018 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy @@ -38,9 +38,9 @@ #rsa 2048 bits 0.3036s 0.0085s 3.3 117.1 #rsa 4096 bits 2.0040s 0.0299s 0.5 33.4 #dsa 512 bits 0.0087s 0.0106s 114.3 94.5 -#dsa 1024 bits 0.0256s 0.0313s 39.0 32.0 +#dsa 1024 bits 0.0256s 0.0313s 39.0 32.0 # -# Same bechmark with this assembler code: +# Same benchmark with this assembler code: # #rsa 512 bits 0.0056s 0.0005s 178.6 2049.2 #rsa 1024 bits 0.0283s 0.0015s 35.3 674.1 @@ -74,7 +74,7 @@ #rsa 4096 bits 0.3700s 0.0058s 2.7 171.0 #dsa 512 bits 0.0016s 0.0020s 610.7 507.1 #dsa 1024 bits 0.0047s 0.0058s 212.5 173.2 -# +# # Again, performance increases by at about 75% # # Mac OS X, Apple G5 1.8GHz (Note this is 32 bit code) @@ -101,10 +101,7 @@ #dsa 2048 bits 0.0061s 0.0075s 163.5 132.8 # # Performance increase of ~60% -# -# If you have comments or suggestions to improve code send -# me a note at schari@us.ibm.com -# +# Based on submission from Suresh N. Chari of IBM $flavour = shift; @@ -125,7 +122,7 @@ if ($flavour =~ /32/) { $CNTLZ= "cntlzw"; # count leading zeros $SHL= "slw"; # shift left $SHR= "srw"; # unsigned shift right - $SHRI= "srwi"; # unsigned shift right by immediate + $SHRI= "srwi"; # unsigned shift right by immediate $SHLI= "slwi"; # shift left by immediate $CLRU= "clrlwi"; # clear upper bits $INSR= "insrwi"; # insert right @@ -149,10 +146,10 @@ if ($flavour =~ /32/) { $CNTLZ= "cntlzd"; # count leading zeros $SHL= "sld"; # shift left $SHR= "srd"; # unsigned shift right - $SHRI= "srdi"; # unsigned shift right by immediate + $SHRI= "srdi"; # unsigned shift right by immediate $SHLI= "sldi"; # shift left by immediate $CLRU= "clrldi"; # clear upper bits - $INSR= "insrdi"; # insert right + $INSR= "insrdi"; # insert right $ROTL= "rotldi"; # rotate left by immediate $TR= "td"; # conditional trap } else { die "nonsense $flavour"; } @@ -189,7 +186,7 @@ $data=<<EOF; # below. # 12/05/03 Suresh Chari # (with lots of help from) Andy Polyakov -## +## # 1. Initial version 10/20/02 Suresh Chari # # @@ -202,7 +199,7 @@ $data=<<EOF; # be done in the build process. # # Hand optimized assembly code for the following routines -# +# # bn_sqr_comba4 # bn_sqr_comba8 # bn_mul_comba4 @@ -225,10 +222,10 @@ $data=<<EOF; #-------------------------------------------------------------------------- # # Defines to be used in the assembly code. -# +# #.set r0,0 # we use it as storage for value of 0 #.set SP,1 # preserved -#.set RTOC,2 # preserved +#.set RTOC,2 # preserved #.set r3,3 # 1st argument/return value #.set r4,4 # 2nd argument/volatile register #.set r5,5 # 3rd argument/volatile register @@ -246,7 +243,7 @@ $data=<<EOF; # the first . i.e. for example change ".bn_sqr_comba4" # to "bn_sqr_comba4". This should be automatically done # in the build. - + .globl .bn_sqr_comba4 .globl .bn_sqr_comba8 .globl .bn_mul_comba4 @@ -257,9 +254,9 @@ $data=<<EOF; .globl .bn_sqr_words .globl .bn_mul_words .globl .bn_mul_add_words - + # .text section - + .machine "any" # @@ -278,8 +275,8 @@ $data=<<EOF; # r3 contains r # r4 contains a # -# Freely use registers r5,r6,r7,r8,r9,r10,r11 as follows: -# +# Freely use registers r5,r6,r7,r8,r9,r10,r11 as follows: +# # r5,r6 are the two BN_ULONGs being multiplied. # r7,r8 are the results of the 32x32 giving 64 bit multiply. # r9,r10, r11 are the equivalents of c1,c2, c3. @@ -288,10 +285,10 @@ $data=<<EOF; # xor r0,r0,r0 # set r0 = 0. Used in the addze # instructions below - + #sqr_add_c(a,0,c1,c2,c3) - $LD r5,`0*$BNSZ`(r4) - $UMULL r9,r5,r5 + $LD r5,`0*$BNSZ`(r4) + $UMULL r9,r5,r5 $UMULH r10,r5,r5 #in first iteration. No need #to add since c1=c2=c3=0. # Note c3(r11) is NOT set to 0 @@ -299,20 +296,20 @@ $data=<<EOF; $ST r9,`0*$BNSZ`(r3) # r[0]=c1; # sqr_add_c2(a,1,0,c2,c3,c1); - $LD r6,`1*$BNSZ`(r4) + $LD r6,`1*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r7,r7,r7 # compute (r7,r8)=2*(r7,r8) adde r8,r8,r8 addze r9,r0 # catch carry if any. - # r9= r0(=0) and carry - + # r9= r0(=0) and carry + addc r10,r7,r10 # now add to temp result. - addze r11,r8 # r8 added to r11 which is 0 + addze r11,r8 # r8 added to r11 which is 0 addze r9,r9 - - $ST r10,`1*$BNSZ`(r3) #r[1]=c2; + + $ST r10,`1*$BNSZ`(r3) #r[1]=c2; #sqr_add_c(a,1,c3,c1,c2) $UMULL r7,r6,r6 $UMULH r8,r6,r6 @@ -323,23 +320,23 @@ $data=<<EOF; $LD r6,`2*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r7,r7,r7 adde r8,r8,r8 addze r10,r10 - + addc r11,r7,r11 adde r9,r8,r9 addze r10,r10 - $ST r11,`2*$BNSZ`(r3) #r[2]=c3 + $ST r11,`2*$BNSZ`(r3) #r[2]=c3 #sqr_add_c2(a,3,0,c1,c2,c3); - $LD r6,`3*$BNSZ`(r4) + $LD r6,`3*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 addc r7,r7,r7 adde r8,r8,r8 addze r11,r0 - + addc r9,r7,r9 adde r10,r8,r10 addze r11,r11 @@ -348,7 +345,7 @@ $data=<<EOF; $LD r6,`2*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r7,r7,r7 adde r8,r8,r8 addze r11,r11 @@ -363,31 +360,31 @@ $data=<<EOF; adde r11,r8,r11 addze r9,r0 #sqr_add_c2(a,3,1,c2,c3,c1); - $LD r6,`3*$BNSZ`(r4) + $LD r6,`3*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 addc r7,r7,r7 adde r8,r8,r8 addze r9,r9 - + addc r10,r7,r10 adde r11,r8,r11 addze r9,r9 $ST r10,`4*$BNSZ`(r3) #r[4]=c2 #sqr_add_c2(a,3,2,c3,c1,c2); - $LD r5,`2*$BNSZ`(r4) + $LD r5,`2*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 addc r7,r7,r7 adde r8,r8,r8 addze r10,r0 - + addc r11,r7,r11 adde r9,r8,r9 addze r10,r10 $ST r11,`5*$BNSZ`(r3) #r[5] = c3 #sqr_add_c(a,3,c1,c2,c3); - $UMULL r7,r6,r6 + $UMULL r7,r6,r6 $UMULH r8,r6,r6 addc r9,r7,r9 adde r10,r8,r10 @@ -406,7 +403,7 @@ $data=<<EOF; # for the gcc compiler. This should be automatically # done in the build # - + .align 4 .bn_sqr_comba8: # @@ -418,15 +415,15 @@ $data=<<EOF; # r3 contains r # r4 contains a # -# Freely use registers r5,r6,r7,r8,r9,r10,r11 as follows: -# +# Freely use registers r5,r6,r7,r8,r9,r10,r11 as follows: +# # r5,r6 are the two BN_ULONGs being multiplied. # r7,r8 are the results of the 32x32 giving 64 bit multiply. # r9,r10, r11 are the equivalents of c1,c2, c3. # # Possible optimization of loading all 8 longs of a into registers # doesn't provide any speedup -# +# xor r0,r0,r0 #set r0 = 0.Used in addze #instructions below. @@ -439,18 +436,18 @@ $data=<<EOF; #sqr_add_c2(a,1,0,c2,c3,c1); $LD r6,`1*$BNSZ`(r4) $UMULL r7,r5,r6 - $UMULH r8,r5,r6 - + $UMULH r8,r5,r6 + addc r10,r7,r10 #add the two register number adde r11,r8,r0 # (r8,r7) to the three register addze r9,r0 # number (r9,r11,r10).NOTE:r0=0 - + addc r10,r7,r10 #add the two register number adde r11,r8,r11 # (r8,r7) to the three register addze r9,r9 # number (r9,r11,r10). - + $ST r10,`1*$BNSZ`(r3) # r[1]=c2 - + #sqr_add_c(a,1,c3,c1,c2); $UMULL r7,r6,r6 $UMULH r8,r6,r6 @@ -461,25 +458,25 @@ $data=<<EOF; $LD r6,`2*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r11,r7,r11 adde r9,r8,r9 addze r10,r10 - + addc r11,r7,r11 adde r9,r8,r9 addze r10,r10 - + $ST r11,`2*$BNSZ`(r3) #r[2]=c3 #sqr_add_c2(a,3,0,c1,c2,c3); $LD r6,`3*$BNSZ`(r4) #r6 = a[3]. r5 is already a[0]. $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r9,r7,r9 adde r10,r8,r10 addze r11,r0 - + addc r9,r7,r9 adde r10,r8,r10 addze r11,r11 @@ -488,20 +485,20 @@ $data=<<EOF; $LD r6,`2*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r9,r7,r9 adde r10,r8,r10 addze r11,r11 - + addc r9,r7,r9 adde r10,r8,r10 addze r11,r11 - + $ST r9,`3*$BNSZ`(r3) #r[3]=c1; #sqr_add_c(a,2,c2,c3,c1); $UMULL r7,r6,r6 $UMULH r8,r6,r6 - + addc r10,r7,r10 adde r11,r8,r11 addze r9,r0 @@ -509,11 +506,11 @@ $data=<<EOF; $LD r6,`3*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r10,r7,r10 adde r11,r8,r11 addze r9,r9 - + addc r10,r7,r10 adde r11,r8,r11 addze r9,r9 @@ -522,11 +519,11 @@ $data=<<EOF; $LD r6,`4*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r10,r7,r10 adde r11,r8,r11 addze r9,r9 - + addc r10,r7,r10 adde r11,r8,r11 addze r9,r9 @@ -535,11 +532,11 @@ $data=<<EOF; $LD r6,`5*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r11,r7,r11 adde r9,r8,r9 addze r10,r0 - + addc r11,r7,r11 adde r9,r8,r9 addze r10,r10 @@ -548,11 +545,11 @@ $data=<<EOF; $LD r6,`4*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r11,r7,r11 adde r9,r8,r9 addze r10,r10 - + addc r11,r7,r11 adde r9,r8,r9 addze r10,r10 @@ -561,11 +558,11 @@ $data=<<EOF; $LD r6,`3*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r11,r7,r11 adde r9,r8,r9 addze r10,r10 - + addc r11,r7,r11 adde r9,r8,r9 addze r10,r10 @@ -580,11 +577,11 @@ $data=<<EOF; $LD r6,`4*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r9,r7,r9 adde r10,r8,r10 addze r11,r11 - + addc r9,r7,r9 adde r10,r8,r10 addze r11,r11 @@ -593,11 +590,11 @@ $data=<<EOF; $LD r6,`5*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r9,r7,r9 adde r10,r8,r10 addze r11,r11 - + addc r9,r7,r9 adde r10,r8,r10 addze r11,r11 @@ -617,7 +614,7 @@ $data=<<EOF; $LD r6,`7*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r10,r7,r10 adde r11,r8,r11 addze r9,r0 @@ -629,7 +626,7 @@ $data=<<EOF; $LD r6,`6*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r10,r7,r10 adde r11,r8,r11 addze r9,r9 @@ -652,7 +649,7 @@ $data=<<EOF; $LD r6,`4*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r10,r7,r10 adde r11,r8,r11 addze r9,r9 @@ -684,7 +681,7 @@ $data=<<EOF; addc r11,r7,r11 adde r9,r8,r9 addze r10,r10 - + addc r11,r7,r11 adde r9,r8,r9 addze r10,r10 @@ -704,7 +701,7 @@ $data=<<EOF; $LD r5,`2*$BNSZ`(r4) $UMULL r7,r5,r6 $UMULH r8,r5,r6 - + addc r9,r7,r9 adde r10,r8,r10 addze r11,r0 @@ -801,7 +798,7 @@ $data=<<EOF; adde r10,r8,r10 addze r11,r11 $ST r9,`12*$BNSZ`(r3) #r[12]=c1; - + #sqr_add_c2(a,7,6,c2,c3,c1) $LD r5,`6*$BNSZ`(r4) $UMULL r7,r5,r6 @@ -850,21 +847,21 @@ $data=<<EOF; # xor r0,r0,r0 #r0=0. Used in addze below. #mul_add_c(a[0],b[0],c1,c2,c3); - $LD r6,`0*$BNSZ`(r4) - $LD r7,`0*$BNSZ`(r5) - $UMULL r10,r6,r7 - $UMULH r11,r6,r7 + $LD r6,`0*$BNSZ`(r4) + $LD r7,`0*$BNSZ`(r5) + $UMULL r10,r6,r7 + $UMULH r11,r6,r7 $ST r10,`0*$BNSZ`(r3) #r[0]=c1 #mul_add_c(a[0],b[1],c2,c3,c1); - $LD r7,`1*$BNSZ`(r5) + $LD r7,`1*$BNSZ`(r5) $UMULL r8,r6,r7 $UMULH r9,r6,r7 addc r11,r8,r11 adde r12,r9,r0 addze r10,r0 #mul_add_c(a[1],b[0],c2,c3,c1); - $LD r6, `1*$BNSZ`(r4) - $LD r7, `0*$BNSZ`(r5) + $LD r6, `1*$BNSZ`(r4) + $LD r7, `0*$BNSZ`(r5) $UMULL r8,r6,r7 $UMULH r9,r6,r7 addc r11,r8,r11 @@ -872,23 +869,23 @@ $data=<<EOF; addze r10,r10 $ST r11,`1*$BNSZ`(r3) #r[1]=c2 #mul_add_c(a[2],b[0],c3,c1,c2); - $LD r6,`2*$BNSZ`(r4) + $LD r6,`2*$BNSZ`(r4) $UMULL r8,r6,r7 $UMULH r9,r6,r7 addc r12,r8,r12 adde r10,r9,r10 addze r11,r0 #mul_add_c(a[1],b[1],c3,c1,c2); - $LD r6,`1*$BNSZ`(r4) - $LD r7,`1*$BNSZ`(r5) + $LD r6,`1*$BNSZ`(r4) + $LD r7,`1*$BNSZ`(r5) $UMULL r8,r6,r7 $UMULH r9,r6,r7 addc r12,r8,r12 adde r10,r9,r10 addze r11,r11 #mul_add_c(a[0],b[2],c3,c1,c2); - $LD r6,`0*$BNSZ`(r4) - $LD r7,`2*$BNSZ`(r5) + $LD r6,`0*$BNSZ`(r4) + $LD r7,`2*$BNSZ`(r5) $UMULL r8,r6,r7 $UMULH r9,r6,r7 addc r12,r8,r12 @@ -896,7 +893,7 @@ $data=<<EOF; addze r11,r11 $ST r12,`2*$BNSZ`(r3) #r[2]=c3 #mul_add_c(a[0],b[3],c1,c2,c3); - $LD r7,`3*$BNSZ`(r5) + $LD r7,`3*$BNSZ`(r5) $UMULL r8,r6,r7 $UMULH r9,r6,r7 addc r10,r8,r10 @@ -928,7 +925,7 @@ $data=<<EOF; addze r12,r12 $ST r10,`3*$BNSZ`(r3) #r[3]=c1 #mul_add_c(a[3],b[1],c2,c3,c1); - $LD r7,`1*$BNSZ`(r5) + $LD r7,`1*$BNSZ`(r5) $UMULL r8,r6,r7 $UMULH r9,r6,r7 addc r11,r8,r11 @@ -952,7 +949,7 @@ $data=<<EOF; addze r10,r10 $ST r11,`4*$BNSZ`(r3) #r[4]=c2 #mul_add_c(a[2],b[3],c3,c1,c2); - $LD r6,`2*$BNSZ`(r4) + $LD r6,`2*$BNSZ`(r4) $UMULL r8,r6,r7 $UMULH r9,r6,r7 addc r12,r8,r12 @@ -968,7 +965,7 @@ $data=<<EOF; addze r11,r11 $ST r12,`5*$BNSZ`(r3) #r[5]=c3 #mul_add_c(a[3],b[3],c1,c2,c3); - $LD r7,`3*$BNSZ`(r5) + $LD r7,`3*$BNSZ`(r5) $UMULL r8,r6,r7 $UMULH r9,r6,r7 addc r10,r8,r10 @@ -988,7 +985,7 @@ $data=<<EOF; # for the gcc compiler. This should be automatically # done in the build # - + .align 4 .bn_mul_comba8: # @@ -1003,7 +1000,7 @@ $data=<<EOF; # r10, r11, r12 are the equivalents of c1, c2, and c3. # xor r0,r0,r0 #r0=0. Used in addze below. - + #mul_add_c(a[0],b[0],c1,c2,c3); $LD r6,`0*$BNSZ`(r4) #a[0] $LD r7,`0*$BNSZ`(r5) #b[0] @@ -1065,7 +1062,7 @@ $data=<<EOF; addc r10,r10,r8 adde r11,r11,r9 addze r12,r12 - + #mul_add_c(a[2],b[1],c1,c2,c3); $LD r6,`2*$BNSZ`(r4) $LD r7,`1*$BNSZ`(r5) @@ -1131,7 +1128,7 @@ $data=<<EOF; adde r10,r10,r9 addze r11,r0 #mul_add_c(a[1],b[4],c3,c1,c2); - $LD r6,`1*$BNSZ`(r4) + $LD r6,`1*$BNSZ`(r4) $LD r7,`4*$BNSZ`(r5) $UMULL r8,r6,r7 $UMULH r9,r6,r7 @@ -1139,7 +1136,7 @@ $data=<<EOF; adde r10,r10,r9 addze r11,r11 #mul_add_c(a[2],b[3],c3,c1,c2); - $LD r6,`2*$BNSZ`(r4) + $LD r6,`2*$BNSZ`(r4) $LD r7,`3*$BNSZ`(r5) $UMULL r8,r6,r7 $UMULH r9,r6,r7 @@ -1147,7 +1144,7 @@ $data=<<EOF; adde r10,r10,r9 addze r11,r11 #mul_add_c(a[3],b[2],c3,c1,c2); - $LD r6,`3*$BNSZ`(r4) + $LD r6,`3*$BNSZ`(r4) $LD r7,`2*$BNSZ`(r5) $UMULL r8,r6,r7 $UMULH r9,r6,r7 @@ -1155,7 +1152,7 @@ $data=<<EOF; adde r10,r10,r9 addze r11,r11 #mul_add_c(a[4],b[1],c3,c1,c2); - $LD r6,`4*$BNSZ`(r4) + $LD r6,`4*$BNSZ`(r4) $LD r7,`1*$BNSZ`(r5) $UMULL r8,r6,r7 $UMULH r9,r6,r7 @@ -1163,7 +1160,7 @@ $data=<<EOF; adde r10,r10,r9 addze r11,r11 #mul_add_c(a[5],b[0],c3,c1,c2); - $LD r6,`5*$BNSZ`(r4) + $LD r6,`5*$BNSZ`(r4) $LD r7,`0*$BNSZ`(r5) $UMULL r8,r6,r7 $UMULH r9,r6,r7 @@ -1555,7 +1552,7 @@ $data=<<EOF; addi r3,r3,-$BNSZ addi r5,r5,-$BNSZ mtctr r6 -Lppcasm_sub_mainloop: +Lppcasm_sub_mainloop: $LDU r7,$BNSZ(r4) $LDU r8,$BNSZ(r5) subfe r6,r8,r7 # r6 = r7+carry bit + onescomplement(r8) @@ -1563,7 +1560,7 @@ Lppcasm_sub_mainloop: # is r7-r8 -1 as we need. $STU r6,$BNSZ(r3) bdnz Lppcasm_sub_mainloop -Lppcasm_sub_adios: +Lppcasm_sub_adios: subfze r3,r0 # if carry bit is set then r3 = 0 else -1 andi. r3,r3,1 # keep only last bit. blr @@ -1604,13 +1601,13 @@ Lppcasm_sub_adios: addi r3,r3,-$BNSZ addi r5,r5,-$BNSZ mtctr r6 -Lppcasm_add_mainloop: +Lppcasm_add_mainloop: $LDU r7,$BNSZ(r4) $LDU r8,$BNSZ(r5) adde r8,r7,r8 $STU r8,$BNSZ(r3) bdnz Lppcasm_add_mainloop -Lppcasm_add_adios: +Lppcasm_add_adios: addze r3,r0 #return carry bit. blr .long 0 @@ -1633,11 +1630,11 @@ Lppcasm_add_adios: # the PPC instruction to count leading zeros instead # of call to num_bits_word. Since this was compiled # only at level -O2 we can possibly squeeze it more? -# +# # r3 = h # r4 = l # r5 = d - + $UCMPI 0,r5,0 # compare r5 and 0 bne Lppcasm_div1 # proceed if d!=0 li r3,-1 # d=0 return -1 @@ -1653,7 +1650,7 @@ Lppcasm_div1: Lppcasm_div2: $UCMP 0,r3,r5 #h>=d? blt Lppcasm_div3 #goto Lppcasm_div3 if not - subf r3,r5,r3 #h-=d ; + subf r3,r5,r3 #h-=d ; Lppcasm_div3: #r7 = BN_BITS2-i. so r7=i cmpi 0,0,r7,0 # is (i == 0)? beq Lppcasm_div4 @@ -1668,7 +1665,7 @@ Lppcasm_div4: # as it saves registers. li r6,2 #r6=2 mtctr r6 #counter will be in count. -Lppcasm_divouterloop: +Lppcasm_divouterloop: $SHRI r8,r3,`$BITS/2` #r8 = (h>>BN_BITS4) $SHRI r11,r4,`$BITS/2` #r11= (l&BN_MASK2h)>>BN_BITS4 # compute here for innerloop. @@ -1676,7 +1673,7 @@ Lppcasm_divouterloop: bne Lppcasm_div5 # goto Lppcasm_div5 if not li r8,-1 - $CLRU r8,r8,`$BITS/2` #q = BN_MASK2l + $CLRU r8,r8,`$BITS/2` #q = BN_MASK2l b Lppcasm_div6 Lppcasm_div5: $UDIV r8,r3,r9 #q = h/dh @@ -1684,7 +1681,7 @@ Lppcasm_div6: $UMULL r12,r9,r8 #th = q*dh $CLRU r10,r5,`$BITS/2` #r10=dl $UMULL r6,r8,r10 #tl = q*dl - + Lppcasm_divinnerloop: subf r10,r12,r3 #t = h -th $SHRI r7,r10,`$BITS/2` #r7= (t &BN_MASK2H), sort of... @@ -1761,7 +1758,7 @@ Lppcasm_div9: addi r4,r4,-$BNSZ addi r3,r3,-$BNSZ mtctr r5 -Lppcasm_sqr_mainloop: +Lppcasm_sqr_mainloop: #sqr(r[0],r[1],a[0]); $LDU r6,$BNSZ(r4) $UMULL r7,r6,r6 @@ -1769,7 +1766,7 @@ Lppcasm_sqr_mainloop: $STU r7,$BNSZ(r3) $STU r8,$BNSZ(r3) bdnz Lppcasm_sqr_mainloop -Lppcasm_sqr_adios: +Lppcasm_sqr_adios: blr .long 0 .byte 0,12,0x14,0,0,0,3,0 @@ -1783,7 +1780,7 @@ Lppcasm_sqr_adios: # done in the build # -.align 4 +.align 4 .bn_mul_words: # # BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) @@ -1797,7 +1794,7 @@ Lppcasm_sqr_adios: rlwinm. r7,r5,30,2,31 # num >> 2 beq Lppcasm_mw_REM mtctr r7 -Lppcasm_mw_LOOP: +Lppcasm_mw_LOOP: #mul(rp[0],ap[0],w,c1); $LD r8,`0*$BNSZ`(r4) $UMULL r9,r6,r8 @@ -1809,7 +1806,7 @@ Lppcasm_mw_LOOP: #using adde. $ST r9,`0*$BNSZ`(r3) #mul(rp[1],ap[1],w,c1); - $LD r8,`1*$BNSZ`(r4) + $LD r8,`1*$BNSZ`(r4) $UMULL r11,r6,r8 $UMULH r12,r6,r8 adde r11,r11,r10 @@ -1830,7 +1827,7 @@ Lppcasm_mw_LOOP: addze r12,r12 #this spin we collect carry into #r12 $ST r11,`3*$BNSZ`(r3) - + addi r3,r3,`4*$BNSZ` addi r4,r4,`4*$BNSZ` bdnz Lppcasm_mw_LOOP @@ -1846,25 +1843,25 @@ Lppcasm_mw_REM: addze r10,r10 $ST r9,`0*$BNSZ`(r3) addi r12,r10,0 - + addi r5,r5,-1 cmpli 0,0,r5,0 beq Lppcasm_mw_OVER - + #mul(rp[1],ap[1],w,c1); - $LD r8,`1*$BNSZ`(r4) + $LD r8,`1*$BNSZ`(r4) $UMULL r9,r6,r8 $UMULH r10,r6,r8 addc r9,r9,r12 addze r10,r10 $ST r9,`1*$BNSZ`(r3) addi r12,r10,0 - + addi r5,r5,-1 cmpli 0,0,r5,0 beq Lppcasm_mw_OVER - + #mul_add(rp[2],ap[2],w,c1); $LD r8,`2*$BNSZ`(r4) $UMULL r9,r6,r8 @@ -1873,14 +1870,14 @@ Lppcasm_mw_REM: addze r10,r10 $ST r9,`2*$BNSZ`(r3) addi r12,r10,0 - -Lppcasm_mw_OVER: + +Lppcasm_mw_OVER: addi r3,r12,0 blr .long 0 .byte 0,12,0x14,0,0,0,4,0 .long 0 -.size bn_mul_words,.-bn_mul_words +.size .bn_mul_words,.-.bn_mul_words # # NOTE: The following label name should be changed to @@ -1902,11 +1899,11 @@ Lppcasm_mw_OVER: # empirical evidence suggests that unrolled version performs best!! # xor r0,r0,r0 #r0 = 0 - xor r12,r12,r12 #r12 = 0 . used for carry + xor r12,r12,r12 #r12 = 0 . used for carry rlwinm. r7,r5,30,2,31 # num >> 2 beq Lppcasm_maw_leftover # if (num < 4) go LPPCASM_maw_leftover mtctr r7 -Lppcasm_maw_mainloop: +Lppcasm_maw_mainloop: #mul_add(rp[0],ap[0],w,c1); $LD r8,`0*$BNSZ`(r4) $LD r11,`0*$BNSZ`(r3) @@ -1922,9 +1919,9 @@ Lppcasm_maw_mainloop: #by multiply and will be collected #in the next spin $ST r9,`0*$BNSZ`(r3) - + #mul_add(rp[1],ap[1],w,c1); - $LD r8,`1*$BNSZ`(r4) + $LD r8,`1*$BNSZ`(r4) $LD r9,`1*$BNSZ`(r3) $UMULL r11,r6,r8 $UMULH r12,r6,r8 @@ -1933,7 +1930,7 @@ Lppcasm_maw_mainloop: addc r11,r11,r9 #addze r12,r12 $ST r11,`1*$BNSZ`(r3) - + #mul_add(rp[2],ap[2],w,c1); $LD r8,`2*$BNSZ`(r4) $UMULL r9,r6,r8 @@ -1944,7 +1941,7 @@ Lppcasm_maw_mainloop: addc r9,r9,r11 #addze r10,r10 $ST r9,`2*$BNSZ`(r3) - + #mul_add(rp[3],ap[3],w,c1); $LD r8,`3*$BNSZ`(r4) $UMULL r11,r6,r8 @@ -1958,7 +1955,7 @@ Lppcasm_maw_mainloop: addi r3,r3,`4*$BNSZ` addi r4,r4,`4*$BNSZ` bdnz Lppcasm_maw_mainloop - + Lppcasm_maw_leftover: andi. r5,r5,0x3 beq Lppcasm_maw_adios @@ -1975,10 +1972,10 @@ Lppcasm_maw_leftover: addc r9,r9,r12 addze r12,r10 $ST r9,0(r3) - + bdz Lppcasm_maw_adios #mul_add(rp[1],ap[1],w,c1); - $LDU r8,$BNSZ(r4) + $LDU r8,$BNSZ(r4) $UMULL r9,r6,r8 $UMULH r10,r6,r8 $LDU r11,$BNSZ(r3) @@ -1987,7 +1984,7 @@ Lppcasm_maw_leftover: addc r9,r9,r12 addze r12,r10 $ST r9,0(r3) - + bdz Lppcasm_maw_adios #mul_add(rp[2],ap[2],w,c1); $LDU r8,$BNSZ(r4) @@ -1999,8 +1996,8 @@ Lppcasm_maw_leftover: addc r9,r9,r12 addze r12,r10 $ST r9,0(r3) - -Lppcasm_maw_adios: + +Lppcasm_maw_adios: addi r3,r12,0 blr .long 0 diff --git a/deps/openssl/openssl/crypto/bn/asm/ppc64-mont.pl b/deps/openssl/openssl/crypto/bn/asm/ppc64-mont.pl index 5d9f43aa5d..c41b620bc2 100644 --- a/deps/openssl/openssl/crypto/bn/asm/ppc64-mont.pl +++ b/deps/openssl/openssl/crypto/bn/asm/ppc64-mont.pl @@ -35,7 +35,7 @@ # key lengths. As it's obviously inappropriate as "best all-round" # alternative, it has to be complemented with run-time CPU family # detection. Oh! It should also be noted that unlike other PowerPC -# implementation IALU ppc-mont.pl module performs *suboptimaly* on +# implementation IALU ppc-mont.pl module performs *suboptimally* on # >=1024-bit key lengths on Power 6. It should also be noted that # *everything* said so far applies to 64-bit builds! As far as 32-bit # application executed on 64-bit CPU goes, this module is likely to @@ -1353,7 +1353,7 @@ $code.=<<___; std $t3,-16($tp) ; tp[j-1] std $t5,-8($tp) ; tp[j] - add $carry,$carry,$ovf ; comsume upmost overflow + add $carry,$carry,$ovf ; consume upmost overflow add $t6,$t6,$carry ; can not overflow srdi $carry,$t6,16 add $t7,$t7,$carry diff --git a/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl b/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl index 0466e11a25..f1292cc75c 100755 --- a/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl +++ b/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl @@ -1,68 +1,30 @@ #! /usr/bin/env perl # Copyright 2013-2018 The OpenSSL Project Authors. All Rights Reserved. +# Copyright (c) 2012, Intel Corporation. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy # in the file LICENSE in the source distribution or at # https://www.openssl.org/source/license.html - - -############################################################################## -# # -# Copyright (c) 2012, Intel Corporation # -# # -# All rights reserved. # -# # -# Redistribution and use in source and binary forms, with or without # -# modification, are permitted provided that the following conditions are # -# met: # -# # -# * Redistributions of source code must retain the above copyright # -# notice, this list of conditions and the following disclaimer. # -# # -# * Redistributions in binary form must reproduce the above copyright # -# notice, this list of conditions and the following disclaimer in the # -# documentation and/or other materials provided with the # -# distribution. # -# # -# * Neither the name of the Intel Corporation nor the names of its # -# contributors may be used to endorse or promote products derived from # -# this software without specific prior written permission. # -# # -# # -# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY # -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR # -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF # -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # -# # -############################################################################## -# Developers and authors: # -# Shay Gueron (1, 2), and Vlad Krasnov (1) # -# (1) Intel Corporation, Israel Development Center, Haifa, Israel # -# (2) University of Haifa, Israel # -############################################################################## -# Reference: # -# [1] S. Gueron, V. Krasnov: "Software Implementation of Modular # -# Exponentiation, Using Advanced Vector Instructions Architectures", # -# F. Ozbudak and F. Rodriguez-Henriquez (Eds.): WAIFI 2012, LNCS 7369, # -# pp. 119?135, 2012. Springer-Verlag Berlin Heidelberg 2012 # -# [2] S. Gueron: "Efficient Software Implementations of Modular # -# Exponentiation", Journal of Cryptographic Engineering 2:31-43 (2012). # -# [3] S. Gueron, V. Krasnov: "Speeding up Big-numbers Squaring",IEEE # -# Proceedings of 9th International Conference on Information Technology: # -# New Generations (ITNG 2012), pp.821-823 (2012) # -# [4] S. Gueron, V. Krasnov: "[PATCH] Efficient and side channel analysis # -# resistant 1024-bit modular exponentiation, for optimizing RSA2048 # -# on AVX2 capable x86_64 platforms", # -# http://rt.openssl.org/Ticket/Display.html?id=2850&user=guest&pass=guest# -############################################################################## +# +# Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1) +# (1) Intel Corporation, Israel Development Center, Haifa, Israel +# (2) University of Haifa, Israel +# +# References: +# [1] S. Gueron, V. Krasnov: "Software Implementation of Modular +# Exponentiation, Using Advanced Vector Instructions Architectures", +# F. Ozbudak and F. Rodriguez-Henriquez (Eds.): WAIFI 2012, LNCS 7369, +# pp. 119?135, 2012. Springer-Verlag Berlin Heidelberg 2012 +# [2] S. Gueron: "Efficient Software Implementations of Modular +# Exponentiation", Journal of Cryptographic Engineering 2:31-43 (2012). +# [3] S. Gueron, V. Krasnov: "Speeding up Big-numbers Squaring",IEEE +# Proceedings of 9th International Conference on Information Technology: +# New Generations (ITNG 2012), pp.821-823 (2012) +# [4] S. Gueron, V. Krasnov: "[PATCH] Efficient and side channel analysis +# resistant 1024-bit modular exponentiation, for optimizing RSA2048 +# on AVX2 capable x86_64 platforms", +# http://rt.openssl.org/Ticket/Display.html?id=2850&user=guest&pass=guest # # +13% improvement over original submission by <appro@openssl.org> # @@ -168,13 +130,21 @@ $code.=<<___; .type rsaz_1024_sqr_avx2,\@function,5 .align 64 rsaz_1024_sqr_avx2: # 702 cycles, 14% faster than rsaz_1024_mul_avx2 +.cfi_startproc lea (%rsp), %rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 vzeroupper ___ $code.=<<___ if ($win64); @@ -193,6 +163,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov %rax,%rbp +.cfi_def_cfa_register %rbp mov %rdx, $np # reassigned argument sub \$$FrameSize, %rsp mov $np, $tmp @@ -382,7 +353,7 @@ $code.=<<___; vpaddq $TEMP1, $ACC1, $ACC1 vpmuludq 32*7-128($aap), $B2, $ACC2 vpbroadcastq 32*5-128($tpa), $B2 - vpaddq 32*11-448($tp1), $ACC2, $ACC2 + vpaddq 32*11-448($tp1), $ACC2, $ACC2 vmovdqu $ACC6, 32*6-192($tp0) vmovdqu $ACC7, 32*7-192($tp0) @@ -441,7 +412,7 @@ $code.=<<___; vmovdqu $ACC7, 32*16-448($tp1) lea 8($tp1), $tp1 - dec $i + dec $i jnz .LOOP_SQR_1024 ___ $ZERO = $ACC9; @@ -786,7 +757,7 @@ $code.=<<___; vpblendd \$3, $TEMP4, $TEMP5, $TEMP4 vpaddq $TEMP3, $ACC7, $ACC7 vpaddq $TEMP4, $ACC8, $ACC8 - + vpsrlq \$29, $ACC4, $TEMP1 vpand $AND_MASK, $ACC4, $ACC4 vpsrlq \$29, $ACC5, $TEMP2 @@ -825,8 +796,10 @@ $code.=<<___; vzeroall mov %rbp, %rax +.cfi_def_cfa_register %rax ___ $code.=<<___ if ($win64); +.Lsqr_1024_in_tail: movaps -0xd8(%rax),%xmm6 movaps -0xc8(%rax),%xmm7 movaps -0xb8(%rax),%xmm8 @@ -840,14 +813,22 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax),%r15 +.cfi_restore %r15 mov -40(%rax),%r14 +.cfi_restore %r14 mov -32(%rax),%r13 +.cfi_restore %r13 mov -24(%rax),%r12 +.cfi_restore %r12 mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp # restore %rsp +.cfi_def_cfa_register %rsp .Lsqr_1024_epilogue: ret +.cfi_endproc .size rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2 ___ } @@ -900,13 +881,21 @@ $code.=<<___; .type rsaz_1024_mul_avx2,\@function,5 .align 64 rsaz_1024_mul_avx2: +.cfi_startproc lea (%rsp), %rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 ___ $code.=<<___ if ($win64); vzeroupper @@ -925,6 +914,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov %rax,%rbp +.cfi_def_cfa_register %rbp vzeroall mov %rdx, $bp # reassigned argument sub \$64,%rsp @@ -1450,15 +1440,17 @@ $code.=<<___; vpaddq $TEMP4, $ACC8, $ACC8 vmovdqu $ACC4, 128-128($rp) - vmovdqu $ACC5, 160-128($rp) + vmovdqu $ACC5, 160-128($rp) vmovdqu $ACC6, 192-128($rp) vmovdqu $ACC7, 224-128($rp) vmovdqu $ACC8, 256-128($rp) vzeroupper mov %rbp, %rax +.cfi_def_cfa_register %rax ___ $code.=<<___ if ($win64); +.Lmul_1024_in_tail: movaps -0xd8(%rax),%xmm6 movaps -0xc8(%rax),%xmm7 movaps -0xb8(%rax),%xmm8 @@ -1472,14 +1464,22 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax),%r15 +.cfi_restore %r15 mov -40(%rax),%r14 +.cfi_restore %r14 mov -32(%rax),%r13 +.cfi_restore %r13 mov -24(%rax),%r12 +.cfi_restore %r12 mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp # restore %rsp +.cfi_def_cfa_register %rsp .Lmul_1024_epilogue: ret +.cfi_endproc .size rsaz_1024_mul_avx2,.-rsaz_1024_mul_avx2 ___ } @@ -1598,8 +1598,10 @@ rsaz_1024_scatter5_avx2: .type rsaz_1024_gather5_avx2,\@abi-omnipotent .align 32 rsaz_1024_gather5_avx2: +.cfi_startproc vzeroupper mov %rsp,%r11 +.cfi_def_cfa_register %r11 ___ $code.=<<___ if ($win64); lea -0x88(%rsp),%rax @@ -1737,11 +1739,13 @@ $code.=<<___ if ($win64); movaps -0x38(%r11),%xmm13 movaps -0x28(%r11),%xmm14 movaps -0x18(%r11),%xmm15 -.LSEH_end_rsaz_1024_gather5: ___ $code.=<<___; lea (%r11),%rsp +.cfi_def_cfa_register %rsp ret +.cfi_endproc +.LSEH_end_rsaz_1024_gather5: .size rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2 ___ } @@ -1814,14 +1818,17 @@ rsaz_se_handler: cmp %r10,%rbx # context->Rip<prologue label jb .Lcommon_seh_tail - mov 152($context),%rax # pull context->Rsp - mov 4(%r11),%r10d # HandlerData[1] lea (%rsi,%r10),%r10 # epilogue label cmp %r10,%rbx # context->Rip>=epilogue label jae .Lcommon_seh_tail - mov 160($context),%rax # pull context->Rbp + mov 160($context),%rbp # pull context->Rbp + + mov 8(%r11),%r10d # HandlerData[2] + lea (%rsi,%r10),%r10 # "in tail" label + cmp %r10,%rbx # context->Rip>="in tail" label + cmovc %rbp,%rax mov -48(%rax),%r15 mov -40(%rax),%r14 @@ -1899,11 +1906,13 @@ rsaz_se_handler: .LSEH_info_rsaz_1024_sqr_avx2: .byte 9,0,0,0 .rva rsaz_se_handler - .rva .Lsqr_1024_body,.Lsqr_1024_epilogue + .rva .Lsqr_1024_body,.Lsqr_1024_epilogue,.Lsqr_1024_in_tail + .long 0 .LSEH_info_rsaz_1024_mul_avx2: .byte 9,0,0,0 .rva rsaz_se_handler - .rva .Lmul_1024_body,.Lmul_1024_epilogue + .rva .Lmul_1024_body,.Lmul_1024_epilogue,.Lmul_1024_in_tail + .long 0 .LSEH_info_rsaz_1024_gather5: .byte 0x01,0x36,0x17,0x0b .byte 0x36,0xf8,0x09,0x00 # vmovaps 0x90(rsp),xmm15 diff --git a/deps/openssl/openssl/crypto/bn/asm/rsaz-x86_64.pl b/deps/openssl/openssl/crypto/bn/asm/rsaz-x86_64.pl index 6f3b664f7a..b1797b649f 100755 --- a/deps/openssl/openssl/crypto/bn/asm/rsaz-x86_64.pl +++ b/deps/openssl/openssl/crypto/bn/asm/rsaz-x86_64.pl @@ -1,68 +1,29 @@ #! /usr/bin/env perl # Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved. +# Copyright (c) 2012, Intel Corporation. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy # in the file LICENSE in the source distribution or at # https://www.openssl.org/source/license.html - - -############################################################################## -# # -# Copyright (c) 2012, Intel Corporation # -# # -# All rights reserved. # -# # -# Redistribution and use in source and binary forms, with or without # -# modification, are permitted provided that the following conditions are # -# met: # -# # -# * Redistributions of source code must retain the above copyright # -# notice, this list of conditions and the following disclaimer. # -# # -# * Redistributions in binary form must reproduce the above copyright # -# notice, this list of conditions and the following disclaimer in the # -# documentation and/or other materials provided with the # -# distribution. # -# # -# * Neither the name of the Intel Corporation nor the names of its # -# contributors may be used to endorse or promote products derived from # -# this software without specific prior written permission. # -# # -# # -# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY # -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR # -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF # -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # -# # -############################################################################## -# Developers and authors: # -# Shay Gueron (1, 2), and Vlad Krasnov (1) # -# (1) Intel Architecture Group, Microprocessor and Chipset Development, # -# Israel Development Center, Haifa, Israel # -# (2) University of Haifa # -############################################################################## -# Reference: # -# [1] S. Gueron, "Efficient Software Implementations of Modular # -# Exponentiation", http://eprint.iacr.org/2011/239 # -# [2] S. Gueron, V. Krasnov. "Speeding up Big-Numbers Squaring". # -# IEEE Proceedings of 9th International Conference on Information # -# Technology: New Generations (ITNG 2012), 821-823 (2012). # -# [3] S. Gueron, Efficient Software Implementations of Modular Exponentiation# -# Journal of Cryptographic Engineering 2:31-43 (2012). # -# [4] S. Gueron, V. Krasnov: "[PATCH] Efficient and side channel analysis # -# resistant 512-bit and 1024-bit modular exponentiation for optimizing # -# RSA1024 and RSA2048 on x86_64 platforms", # -# http://rt.openssl.org/Ticket/Display.html?id=2582&user=guest&pass=guest# -############################################################################## - +# +# Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1) +# (1) Intel Corporation, Israel Development Center, Haifa, Israel +# (2) University of Haifa, Israel +# +# References: +# [1] S. Gueron, "Efficient Software Implementations of Modular +# Exponentiation", http://eprint.iacr.org/2011/239 +# [2] S. Gueron, V. Krasnov. "Speeding up Big-Numbers Squaring". +# IEEE Proceedings of 9th International Conference on Information +# Technology: New Generations (ITNG 2012), 821-823 (2012). +# [3] S. Gueron, Efficient Software Implementations of Modular Exponentiation +# Journal of Cryptographic Engineering 2:31-43 (2012). +# [4] S. Gueron, V. Krasnov: "[PATCH] Efficient and side channel analysis +# resistant 512-bit and 1024-bit modular exponentiation for optimizing +# RSA1024 and RSA2048 on x86_64 platforms", +# http://rt.openssl.org/Ticket/Display.html?id=2582&user=guest&pass=guest +# # While original submission covers 512- and 1024-bit exponentiation, # this module is limited to 512-bit version only (and as such # accelerates RSA1024 sign). This is because improvement for longer @@ -138,14 +99,22 @@ $code.=<<___; .type rsaz_512_sqr,\@function,5 .align 32 rsaz_512_sqr: # 25-29% faster than rsaz_512_mul +.cfi_startproc push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 subq \$128+24, %rsp +.cfi_adjust_cfa_offset 128+24 .Lsqr_body: movq $mod, %rbp # common argument movq ($inp), %rdx @@ -282,9 +251,9 @@ $code.=<<___; movq %r9, 16(%rsp) movq %r10, 24(%rsp) shrq \$63, %rbx - + #third iteration - movq 16($inp), %r9 + movq 16($inp), %r9 movq 24($inp), %rax mulq %r9 addq %rax, %r12 @@ -532,7 +501,7 @@ $code.=<<___; movl $times,128+8(%rsp) movq $out, %xmm0 # off-load movq %rbp, %xmm1 # off-load -#first iteration +#first iteration mulx %rax, %r8, %r9 mulx 16($inp), %rcx, %r10 @@ -568,7 +537,7 @@ $code.=<<___; mov %rax, (%rsp) mov %r8, 8(%rsp) -#second iteration +#second iteration mulx 16($inp), %rax, %rbx adox %rax, %r10 adcx %rbx, %r11 @@ -607,8 +576,8 @@ $code.=<<___; mov %r9, 16(%rsp) .byte 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00 # mov %r10, 24(%rsp) - -#third iteration + +#third iteration .byte 0xc4,0x62,0xc3,0xf6,0x8e,0x18,0x00,0x00,0x00 # mulx 24($inp), $out, %r9 adox $out, %r12 adcx %r9, %r13 @@ -643,8 +612,8 @@ $code.=<<___; mov %r11, 32(%rsp) .byte 0x4c,0x89,0xa4,0x24,0x28,0x00,0x00,0x00 # mov %r12, 40(%rsp) - -#fourth iteration + +#fourth iteration .byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x20,0x00,0x00,0x00 # mulx 32($inp), %rax, %rbx adox %rax, %r14 adcx %rbx, %r15 @@ -676,8 +645,8 @@ $code.=<<___; mov %r13, 48(%rsp) mov %r14, 56(%rsp) - -#fifth iteration + +#fifth iteration .byte 0xc4,0x62,0xc3,0xf6,0x9e,0x28,0x00,0x00,0x00 # mulx 40($inp), $out, %r11 adox $out, %r8 adcx %r11, %r9 @@ -704,8 +673,8 @@ $code.=<<___; mov %r15, 64(%rsp) mov %r8, 72(%rsp) - -#sixth iteration + +#sixth iteration .byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00 # mulx 48($inp), %rax, %rbx adox %rax, %r10 adcx %rbx, %r11 @@ -800,15 +769,24 @@ ___ $code.=<<___; leaq 128+24+48(%rsp), %rax +.cfi_def_cfa %rax,8 movq -48(%rax), %r15 +.cfi_restore %r15 movq -40(%rax), %r14 +.cfi_restore %r14 movq -32(%rax), %r13 +.cfi_restore %r13 movq -24(%rax), %r12 +.cfi_restore %r12 movq -16(%rax), %rbp +.cfi_restore %rbp movq -8(%rax), %rbx +.cfi_restore %rbx leaq (%rax), %rsp +.cfi_def_cfa_register %rsp .Lsqr_epilogue: ret +.cfi_endproc .size rsaz_512_sqr,.-rsaz_512_sqr ___ } @@ -819,14 +797,22 @@ $code.=<<___; .type rsaz_512_mul,\@function,5 .align 32 rsaz_512_mul: +.cfi_startproc push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 subq \$128+24, %rsp +.cfi_adjust_cfa_offset 128+24 .Lmul_body: movq $out, %xmm0 # off-load arguments movq $mod, %xmm1 @@ -896,15 +882,24 @@ $code.=<<___; call __rsaz_512_subtract leaq 128+24+48(%rsp), %rax +.cfi_def_cfa %rax,8 movq -48(%rax), %r15 +.cfi_restore %r15 movq -40(%rax), %r14 +.cfi_restore %r14 movq -32(%rax), %r13 +.cfi_restore %r13 movq -24(%rax), %r12 +.cfi_restore %r12 movq -16(%rax), %rbp +.cfi_restore %rbp movq -8(%rax), %rbx +.cfi_restore %rbx leaq (%rax), %rsp +.cfi_def_cfa_register %rsp .Lmul_epilogue: ret +.cfi_endproc .size rsaz_512_mul,.-rsaz_512_mul ___ } @@ -915,14 +910,22 @@ $code.=<<___; .type rsaz_512_mul_gather4,\@function,6 .align 32 rsaz_512_mul_gather4: +.cfi_startproc push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 subq \$`128+24+($win64?0xb0:0)`, %rsp +.cfi_adjust_cfa_offset `128+24+($win64?0xb0:0)` ___ $code.=<<___ if ($win64); movaps %xmm6,0xa0(%rsp) @@ -1048,7 +1051,7 @@ $code.=<<___; movq 56($ap), %rax movq %rdx, %r14 adcq \$0, %r14 - + mulq %rbx addq %rax, %r14 movq ($ap), %rax @@ -1150,7 +1153,7 @@ $code.=<<___; movq ($ap), %rax adcq \$0, %rdx addq %r15, %r14 - movq %rdx, %r15 + movq %rdx, %r15 adcq \$0, %r15 leaq 8(%rdi), %rdi @@ -1212,7 +1215,7 @@ $code.=<<___ if ($addx); mulx 48($ap), %rbx, %r14 adcx %rax, %r12 - + mulx 56($ap), %rax, %r15 adcx %rbx, %r13 adcx %rax, %r14 @@ -1348,15 +1351,24 @@ $code.=<<___ if ($win64); lea 0xb0(%rax),%rax ___ $code.=<<___; +.cfi_def_cfa %rax,8 movq -48(%rax), %r15 +.cfi_restore %r15 movq -40(%rax), %r14 +.cfi_restore %r14 movq -32(%rax), %r13 +.cfi_restore %r13 movq -24(%rax), %r12 +.cfi_restore %r12 movq -16(%rax), %rbp +.cfi_restore %rbp movq -8(%rax), %rbx +.cfi_restore %rbx leaq (%rax), %rsp +.cfi_def_cfa_register %rsp .Lmul_gather4_epilogue: ret +.cfi_endproc .size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4 ___ } @@ -1367,15 +1379,23 @@ $code.=<<___; .type rsaz_512_mul_scatter4,\@function,6 .align 32 rsaz_512_mul_scatter4: +.cfi_startproc push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 mov $pwr, $pwr subq \$128+24, %rsp +.cfi_adjust_cfa_offset 128+24 .Lmul_scatter4_body: leaq ($tbl,$pwr,8), $tbl movq $out, %xmm0 # off-load arguments @@ -1411,7 +1431,7 @@ $code.=<<___; ___ $code.=<<___ if ($addx); jmp .Lmul_scatter_tail - + .align 32 .Lmulx_scatter: movq ($out), %rdx # pass b[0] @@ -1458,15 +1478,24 @@ $code.=<<___; movq %r15, 128*7($inp) leaq 128+24+48(%rsp), %rax +.cfi_def_cfa %rax,8 movq -48(%rax), %r15 +.cfi_restore %r15 movq -40(%rax), %r14 +.cfi_restore %r14 movq -32(%rax), %r13 +.cfi_restore %r13 movq -24(%rax), %r12 +.cfi_restore %r12 movq -16(%rax), %rbp +.cfi_restore %rbp movq -8(%rax), %rbx +.cfi_restore %rbx leaq (%rax), %rsp +.cfi_def_cfa_register %rsp .Lmul_scatter4_epilogue: ret +.cfi_endproc .size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4 ___ } @@ -1477,14 +1506,22 @@ $code.=<<___; .type rsaz_512_mul_by_one,\@function,4 .align 32 rsaz_512_mul_by_one: +.cfi_startproc push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 subq \$128+24, %rsp +.cfi_adjust_cfa_offset 128+24 .Lmul_by_one_body: ___ $code.=<<___ if ($addx); @@ -1539,15 +1576,24 @@ $code.=<<___; movq %r15, 56($out) leaq 128+24+48(%rsp), %rax +.cfi_def_cfa %rax,8 movq -48(%rax), %r15 +.cfi_restore %r15 movq -40(%rax), %r14 +.cfi_restore %r14 movq -32(%rax), %r13 +.cfi_restore %r13 movq -24(%rax), %r12 +.cfi_restore %r12 movq -16(%rax), %rbp +.cfi_restore %rbp movq -8(%rax), %rbx +.cfi_restore %rbx leaq (%rax), %rsp +.cfi_def_cfa_register %rsp .Lmul_by_one_epilogue: ret +.cfi_endproc .size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one ___ } @@ -1824,7 +1870,7 @@ __rsaz_512_mul: movq 56($ap), %rax movq %rdx, %r14 adcq \$0, %r14 - + mulq %rbx addq %rax, %r14 movq ($ap), %rax @@ -1901,7 +1947,7 @@ __rsaz_512_mul: movq ($ap), %rax adcq \$0, %rdx addq %r15, %r14 - movq %rdx, %r15 + movq %rdx, %r15 adcq \$0, %r15 leaq 8(%rdi), %rdi diff --git a/deps/openssl/openssl/crypto/bn/asm/s390x-gf2m.pl b/deps/openssl/openssl/crypto/bn/asm/s390x-gf2m.pl index cbd16f4214..06181bf9b9 100644 --- a/deps/openssl/openssl/crypto/bn/asm/s390x-gf2m.pl +++ b/deps/openssl/openssl/crypto/bn/asm/s390x-gf2m.pl @@ -20,7 +20,7 @@ # in bn_gf2m.c. It's kind of low-hanging mechanical port from C for # the time being... gcc 4.3 appeared to generate poor code, therefore # the effort. And indeed, the module delivers 55%-90%(*) improvement -# on haviest ECDSA verify and ECDH benchmarks for 163- and 571-bit +# on heaviest ECDSA verify and ECDH benchmarks for 163- and 571-bit # key lengths on z990, 30%-55%(*) - on z10, and 70%-110%(*) - on z196. # This is for 64-bit build. In 32-bit "highgprs" case improvement is # even higher, for example on z990 it was measured 80%-150%. ECDSA @@ -198,7 +198,7 @@ $code.=<<___; xgr $hi,@r[1] xgr $lo,@r[0] xgr $hi,@r[2] - xgr $lo,@r[3] + xgr $lo,@r[3] xgr $hi,@r[3] xgr $lo,$hi stg $hi,16($rp) diff --git a/deps/openssl/openssl/crypto/bn/asm/s390x-mont.pl b/deps/openssl/openssl/crypto/bn/asm/s390x-mont.pl index 66780cdf80..c2fc5adffe 100644 --- a/deps/openssl/openssl/crypto/bn/asm/s390x-mont.pl +++ b/deps/openssl/openssl/crypto/bn/asm/s390x-mont.pl @@ -8,7 +8,7 @@ # ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. diff --git a/deps/openssl/openssl/crypto/bn/asm/sparct4-mont.pl b/deps/openssl/openssl/crypto/bn/asm/sparct4-mont.pl index 4f339b2279..fcae9cfc5b 100755 --- a/deps/openssl/openssl/crypto/bn/asm/sparct4-mont.pl +++ b/deps/openssl/openssl/crypto/bn/asm/sparct4-mont.pl @@ -8,9 +8,9 @@ # ==================================================================== -# Written by David S. Miller <davem@devemloft.net> and Andy Polyakov -# <appro@openssl.org>. The module is licensed under 2-clause BSD -# license. November 2012. All rights reserved. +# Written by David S. Miller and Andy Polyakov +# The module is licensed under 2-clause BSD license. +# November 2012. All rights reserved. # ==================================================================== ###################################################################### diff --git a/deps/openssl/openssl/crypto/bn/asm/sparcv8.S b/deps/openssl/openssl/crypto/bn/asm/sparcv8.S index 9c31073b24..75d72eb92c 100644 --- a/deps/openssl/openssl/crypto/bn/asm/sparcv8.S +++ b/deps/openssl/openssl/crypto/bn/asm/sparcv8.S @@ -1,5 +1,5 @@ .ident "sparcv8.s, Version 1.4" -.ident "SPARC v8 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" +.ident "SPARC v8 ISA artwork by Andy Polyakov <appro@openssl.org>" /* * ==================================================================== @@ -13,7 +13,7 @@ */ /* - * This is my modest contributon to OpenSSL project (see + * This is my modest contribution to OpenSSL project (see * http://www.openssl.org/ for more information about it) and is * a drop-in SuperSPARC ISA replacement for crypto/bn/bn_asm.c * module. For updates see http://fy.chalmers.se/~appro/hpe/. @@ -159,12 +159,12 @@ bn_mul_add_words: */ bn_mul_words: cmp %o2,0 - bg,a .L_bn_mul_words_proceeed + bg,a .L_bn_mul_words_proceed ld [%o1],%g2 retl clr %o0 -.L_bn_mul_words_proceeed: +.L_bn_mul_words_proceed: andcc %o2,-4,%g0 bz .L_bn_mul_words_tail clr %o5 @@ -251,12 +251,12 @@ bn_mul_words: */ bn_sqr_words: cmp %o2,0 - bg,a .L_bn_sqr_words_proceeed + bg,a .L_bn_sqr_words_proceed ld [%o1],%g2 retl clr %o0 -.L_bn_sqr_words_proceeed: +.L_bn_sqr_words_proceed: andcc %o2,-4,%g0 bz .L_bn_sqr_words_tail clr %o5 diff --git a/deps/openssl/openssl/crypto/bn/asm/sparcv8plus.S b/deps/openssl/openssl/crypto/bn/asm/sparcv8plus.S index 714a136675..fe4699b2bd 100644 --- a/deps/openssl/openssl/crypto/bn/asm/sparcv8plus.S +++ b/deps/openssl/openssl/crypto/bn/asm/sparcv8plus.S @@ -1,5 +1,5 @@ .ident "sparcv8plus.s, Version 1.4" -.ident "SPARC v9 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" +.ident "SPARC v9 ISA artwork by Andy Polyakov <appro@openssl.org>" /* * ==================================================================== @@ -13,7 +13,7 @@ */ /* - * This is my modest contributon to OpenSSL project (see + * This is my modest contribution to OpenSSL project (see * http://www.openssl.org/ for more information about it) and is * a drop-in UltraSPARC ISA replacement for crypto/bn/bn_asm.c * module. For updates see http://fy.chalmers.se/~appro/hpe/. @@ -144,10 +144,6 @@ * } */ -#ifdef OPENSSL_FIPSCANISTER -#include <openssl/fipssyms.h> -#endif - #if defined(__SUNPRO_C) && defined(__sparcv9) /* They've said -xarch=v9 at command line */ .register %g2,#scratch @@ -282,7 +278,7 @@ bn_mul_add_words: */ bn_mul_words: sra %o2,%g0,%o2 ! signx %o2 - brgz,a %o2,.L_bn_mul_words_proceeed + brgz,a %o2,.L_bn_mul_words_proceed lduw [%o1],%g2 retl clr %o0 @@ -290,7 +286,7 @@ bn_mul_words: nop nop -.L_bn_mul_words_proceeed: +.L_bn_mul_words_proceed: srl %o3,%g0,%o3 ! clruw %o3 andcc %o2,-4,%g0 bz,pn %icc,.L_bn_mul_words_tail @@ -370,7 +366,7 @@ bn_mul_words: */ bn_sqr_words: sra %o2,%g0,%o2 ! signx %o2 - brgz,a %o2,.L_bn_sqr_words_proceeed + brgz,a %o2,.L_bn_sqr_words_proceed lduw [%o1],%g2 retl clr %o0 @@ -378,7 +374,7 @@ bn_sqr_words: nop nop -.L_bn_sqr_words_proceeed: +.L_bn_sqr_words_proceed: andcc %o2,-4,%g0 nop bz,pn %icc,.L_bn_sqr_words_tail diff --git a/deps/openssl/openssl/crypto/bn/asm/sparcv9-mont.pl b/deps/openssl/openssl/crypto/bn/asm/sparcv9-mont.pl index 074f9df14b..b41903af98 100644 --- a/deps/openssl/openssl/crypto/bn/asm/sparcv9-mont.pl +++ b/deps/openssl/openssl/crypto/bn/asm/sparcv9-mont.pl @@ -8,7 +8,7 @@ # ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -612,7 +612,7 @@ $code.=<<___; add $tp,8,$tp .type $fname,#function .size $fname,(.-$fname) -.asciz "Montgomery Multipltication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>" +.asciz "Montgomery Multiplication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>" .align 32 ___ $code =~ s/\`([^\`]*)\`/eval($1)/gem; diff --git a/deps/openssl/openssl/crypto/bn/asm/sparcv9a-mont.pl b/deps/openssl/openssl/crypto/bn/asm/sparcv9a-mont.pl index 50b690653f..c8f759df9f 100755 --- a/deps/openssl/openssl/crypto/bn/asm/sparcv9a-mont.pl +++ b/deps/openssl/openssl/crypto/bn/asm/sparcv9a-mont.pl @@ -8,7 +8,7 @@ # ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -865,7 +865,7 @@ $fname: restore .type $fname,#function .size $fname,(.-$fname) -.asciz "Montgomery Multipltication for UltraSPARC, CRYPTOGAMS by <appro\@openssl.org>" +.asciz "Montgomery Multiplication for UltraSPARC, CRYPTOGAMS by <appro\@openssl.org>" .align 32 ___ diff --git a/deps/openssl/openssl/crypto/bn/asm/via-mont.pl b/deps/openssl/openssl/crypto/bn/asm/via-mont.pl index 9d65a146a2..9cf717e841 100644 --- a/deps/openssl/openssl/crypto/bn/asm/via-mont.pl +++ b/deps/openssl/openssl/crypto/bn/asm/via-mont.pl @@ -8,7 +8,7 @@ # # ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -76,7 +76,7 @@ # dsa 1024 bits 0.001346s 0.001595s 742.7 627.0 # dsa 2048 bits 0.004745s 0.005582s 210.7 179.1 # -# Conclusions: +# Conclusions: # - VIA SDK leaves a *lot* of room for improvement (which this # implementation successfully fills:-); # - 'rep montmul' gives up to >3x performance improvement depending on @@ -91,7 +91,7 @@ require "x86asm.pl"; $output = pop; open STDOUT,">$output"; -&asm_init($ARGV[0],"via-mont.pl"); +&asm_init($ARGV[0]); # int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num); $func="bn_mul_mont_padlock"; diff --git a/deps/openssl/openssl/crypto/bn/asm/vis3-mont.pl b/deps/openssl/openssl/crypto/bn/asm/vis3-mont.pl index ba34b36a81..04833a0c87 100644 --- a/deps/openssl/openssl/crypto/bn/asm/vis3-mont.pl +++ b/deps/openssl/openssl/crypto/bn/asm/vis3-mont.pl @@ -16,7 +16,7 @@ # October 2012. # -# SPARCv9 VIS3 Montgomery multiplicaion procedure suitable for T3 and +# SPARCv9 VIS3 Montgomery multiplication procedure suitable for T3 and # onward. There are three new instructions used here: umulxhi, # addxc[cc] and initializing store. On T3 RSA private key operations # are 1.54/1.87/2.11/2.26 times faster for 512/1024/2048/4096-bit key diff --git a/deps/openssl/openssl/crypto/bn/asm/x86-gf2m.pl b/deps/openssl/openssl/crypto/bn/asm/x86-gf2m.pl index f464368733..d03efcc750 100644 --- a/deps/openssl/openssl/crypto/bn/asm/x86-gf2m.pl +++ b/deps/openssl/openssl/crypto/bn/asm/x86-gf2m.pl @@ -46,7 +46,7 @@ require "x86asm.pl"; $output = pop; open STDOUT,">$output"; -&asm_init($ARGV[0],$0,$x86only = $ARGV[$#ARGV] eq "386"); +&asm_init($ARGV[0],$x86only = $ARGV[$#ARGV] eq "386"); $sse2=0; for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } @@ -152,7 +152,7 @@ $R="mm0"; &xor ($a4,$a2); # a2=a4^a2^a4 &mov (&DWP(5*4,"esp"),$a1); # a1^a4 &xor ($a4,$a1); # a1^a2^a4 - &sar (@i[1],31); # broardcast 30th bit + &sar (@i[1],31); # broadcast 30th bit &and ($lo,$b); &mov (&DWP(6*4,"esp"),$a2); # a2^a4 &and (@i[1],$b); diff --git a/deps/openssl/openssl/crypto/bn/asm/x86-mont.pl b/deps/openssl/openssl/crypto/bn/asm/x86-mont.pl index f1abcc5b4c..7ba2133ac9 100755 --- a/deps/openssl/openssl/crypto/bn/asm/x86-mont.pl +++ b/deps/openssl/openssl/crypto/bn/asm/x86-mont.pl @@ -8,7 +8,7 @@ # ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -39,8 +39,8 @@ require "x86asm.pl"; $output = pop; open STDOUT,">$output"; - -&asm_init($ARGV[0],$0); + +&asm_init($ARGV[0]); $sse2=0; for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } @@ -78,7 +78,7 @@ $frame=32; # size of above frame rounded up to 16n &lea ("ebp",&DWP(-$frame,"esp","edi",4)); # future alloca($frame+4*(num+2)) &neg ("edi"); - # minimize cache contention by arraning 2K window between stack + # minimize cache contention by arranging 2K window between stack # pointer and ap argument [np is also position sensitive vector, # but it's assumed to be near ap, as it's allocated at ~same # time]. diff --git a/deps/openssl/openssl/crypto/bn/asm/x86_64-gcc.c b/deps/openssl/openssl/crypto/bn/asm/x86_64-gcc.c index 621be33054..31839ba060 100644 --- a/deps/openssl/openssl/crypto/bn/asm/x86_64-gcc.c +++ b/deps/openssl/openssl/crypto/bn/asm/x86_64-gcc.c @@ -14,7 +14,7 @@ /*- * x86_64 BIGNUM accelerator version 0.1, December 2002. * - * Implemented by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL + * Implemented by Andy Polyakov <appro@openssl.org> for the OpenSSL * project. * * Rights for redistribution and usage in source and binary forms are @@ -114,7 +114,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG c1 = 0; if (num <= 0) - return (c1); + return c1; while (num & ~3) { mul_add(rp[0], ap[0], w, c1); @@ -136,7 +136,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, return c1; } - return (c1); + return c1; } BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) @@ -144,7 +144,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) BN_ULONG c1 = 0; if (num <= 0) - return (c1); + return c1; while (num & ~3) { mul(rp[0], ap[0], w, c1); @@ -164,7 +164,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) return c1; mul(rp[2], ap[2], w, c1); } - return (c1); + return c1; } void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) @@ -264,7 +264,7 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) int c = 0; if (n <= 0) - return ((BN_ULONG)0); + return (BN_ULONG)0; for (;;) { t1 = a[0]; @@ -303,7 +303,7 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) b += 4; r += 4; } - return (c); + return c; } # endif diff --git a/deps/openssl/openssl/crypto/bn/asm/x86_64-gf2m.pl b/deps/openssl/openssl/crypto/bn/asm/x86_64-gf2m.pl index d962f62033..0fd6e985d7 100644 --- a/deps/openssl/openssl/crypto/bn/asm/x86_64-gf2m.pl +++ b/deps/openssl/openssl/crypto/bn/asm/x86_64-gf2m.pl @@ -54,7 +54,9 @@ $code.=<<___; .type _mul_1x1,\@abi-omnipotent .align 16 _mul_1x1: +.cfi_startproc sub \$128+8,%rsp +.cfi_adjust_cfa_offset 128+8 mov \$-1,$a1 lea ($a,$a),$i0 shr \$3,$a1 @@ -66,7 +68,7 @@ _mul_1x1: sar \$63,$i0 # broadcast 62nd bit lea (,$a1,4),$a4 and $b,$a - sar \$63,$i1 # boardcast 61st bit + sar \$63,$i1 # broadcast 61st bit mov $a,$hi # $a is $lo shl \$63,$lo and $b,$i0 @@ -160,8 +162,10 @@ $code.=<<___; xor $i1,$hi add \$128+8,%rsp +.cfi_adjust_cfa_offset -128-8 ret .Lend_mul_1x1: +.cfi_endproc .size _mul_1x1,.-_mul_1x1 ___ @@ -174,8 +178,10 @@ $code.=<<___; .type bn_GF2m_mul_2x2,\@abi-omnipotent .align 16 bn_GF2m_mul_2x2: - mov OPENSSL_ia32cap_P(%rip),%rax - bt \$33,%rax +.cfi_startproc + mov %rsp,%rax + mov OPENSSL_ia32cap_P(%rip),%r10 + bt \$33,%r10 jnc .Lvanilla_mul_2x2 movq $a1,%xmm0 @@ -210,6 +216,7 @@ $code.=<<___; .align 16 .Lvanilla_mul_2x2: lea -8*17(%rsp),%rsp +.cfi_adjust_cfa_offset 8*17 ___ $code.=<<___ if ($win64); mov `8*17+40`(%rsp),$b0 @@ -218,10 +225,15 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov %r14,8*10(%rsp) +.cfi_rel_offset %r14,8*10 mov %r13,8*11(%rsp) +.cfi_rel_offset %r13,8*11 mov %r12,8*12(%rsp) +.cfi_rel_offset %r12,8*12 mov %rbp,8*13(%rsp) +.cfi_rel_offset %rbp,8*13 mov %rbx,8*14(%rsp) +.cfi_rel_offset %rbx,8*14 .Lbody_mul_2x2: mov $rp,32(%rsp) # save the arguments mov $a1,40(%rsp) @@ -269,10 +281,15 @@ $code.=<<___; mov $lo,8(%rbp) mov 8*10(%rsp),%r14 +.cfi_restore %r14 mov 8*11(%rsp),%r13 +.cfi_restore %r13 mov 8*12(%rsp),%r12 +.cfi_restore %r12 mov 8*13(%rsp),%rbp +.cfi_restore %rbp mov 8*14(%rsp),%rbx +.cfi_restore %rbx ___ $code.=<<___ if ($win64); mov 8*15(%rsp),%rdi @@ -280,8 +297,11 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea 8*17(%rsp),%rsp +.cfi_adjust_cfa_offset -8*17 +.Lepilogue_mul_2x2: ret .Lend_mul_2x2: +.cfi_endproc .size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 .asciz "GF(2^m) Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>" .align 16 @@ -312,13 +332,19 @@ se_handler: pushfq sub \$64,%rsp - mov 152($context),%rax # pull context->Rsp + mov 120($context),%rax # pull context->Rax mov 248($context),%rbx # pull context->Rip lea .Lbody_mul_2x2(%rip),%r10 cmp %r10,%rbx # context->Rip<"prologue" label jb .Lin_prologue + mov 152($context),%rax # pull context->Rsp + + lea .Lepilogue_mul_2x2(%rip),%r10 + cmp %r10,%rbx # context->Rip>="epilogue" label + jae .Lin_prologue + mov 8*10(%rax),%r14 # mimic epilogue mov 8*11(%rax),%r13 mov 8*12(%rax),%r12 @@ -335,8 +361,9 @@ se_handler: mov %r13,224($context) # restore context->R13 mov %r14,232($context) # restore context->R14 -.Lin_prologue: lea 8*17(%rax),%rax + +.Lin_prologue: mov %rax,152($context) # restore context->Rsp mov 40($disp),%rdi # disp->ContextRecord diff --git a/deps/openssl/openssl/crypto/bn/asm/x86_64-mont.pl b/deps/openssl/openssl/crypto/bn/asm/x86_64-mont.pl index 8d2fb2cebb..c051135e30 100755 --- a/deps/openssl/openssl/crypto/bn/asm/x86_64-mont.pl +++ b/deps/openssl/openssl/crypto/bn/asm/x86_64-mont.pl @@ -104,8 +104,10 @@ $code=<<___; .type bn_mul_mont,\@function,6 .align 16 bn_mul_mont: +.cfi_startproc mov ${num}d,${num}d mov %rsp,%rax +.cfi_def_cfa_register %rax test \$3,${num}d jnz .Lmul_enter cmp \$8,${num}d @@ -124,11 +126,17 @@ $code.=<<___; .align 16 .Lmul_enter: push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 neg $num mov %rsp,%r11 @@ -161,6 +169,7 @@ $code.=<<___; .Lmul_page_walk_done: mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp +.cfi_cfa_expression %rsp+8,$num,8,mul,plus,deref,+8 .Lmul_body: mov $bp,%r12 # reassign $bp ___ @@ -309,7 +318,7 @@ $code.=<<___; mov %rax,($rp,$i,8) # rp[i]=tp[i]-np[i] mov 8(%rsp,$i,8),%rax # tp[i+1] lea 1($i),$i # i++ - dec $j # doesnn't affect CF! + dec $j # doesn't affect CF! jnz .Lsub sbb \$0,%rax # handle upmost overflow bit @@ -331,16 +340,25 @@ $code.=<<___; jnz .Lcopy mov 8(%rsp,$num,8),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lmul_epilogue: ret +.cfi_endproc .size bn_mul_mont,.-bn_mul_mont ___ {{{ @@ -350,8 +368,10 @@ $code.=<<___; .type bn_mul4x_mont,\@function,6 .align 16 bn_mul4x_mont: +.cfi_startproc mov ${num}d,${num}d mov %rsp,%rax +.cfi_def_cfa_register %rax .Lmul4x_enter: ___ $code.=<<___ if ($addx); @@ -361,11 +381,17 @@ $code.=<<___ if ($addx); ___ $code.=<<___; push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 neg $num mov %rsp,%r11 @@ -389,6 +415,7 @@ $code.=<<___; .Lmul4x_page_walk_done: mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp +.cfi_cfa_expression %rsp+8,$num,8,mul,plus,deref,+8 .Lmul4x_body: mov $rp,16(%rsp,$num,8) # tp[num+2]=$rp mov %rdx,%r12 # reassign $bp @@ -721,7 +748,7 @@ $code.=<<___; mov 56($ap,$i,8),@ri[3] sbb 40($np,$i,8),@ri[1] lea 4($i),$i # i++ - dec $j # doesnn't affect CF! + dec $j # doesn't affect CF! jnz .Lsub4x mov @ri[0],0($rp,$i,8) # rp[i]=tp[i]-np[i] @@ -766,16 +793,25 @@ ___ } $code.=<<___; mov 8(%rsp,$num,8),%rsi # restore %rsp +.cfi_def_cfa %rsi, 8 mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lmul4x_epilogue: ret +.cfi_endproc .size bn_mul4x_mont,.-bn_mul4x_mont ___ }}} @@ -803,14 +839,22 @@ $code.=<<___; .type bn_sqr8x_mont,\@function,6 .align 32 bn_sqr8x_mont: +.cfi_startproc mov %rsp,%rax +.cfi_def_cfa_register %rax .Lsqr8x_enter: push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lsqr8x_prologue: mov ${num}d,%r10d @@ -866,6 +910,7 @@ bn_sqr8x_mont: mov $n0, 32(%rsp) mov %rax, 40(%rsp) # save original %rsp +.cfi_cfa_expression %rsp+40,deref,+8 .Lsqr8x_body: movq $nptr, %xmm2 # save pointer to modulus @@ -935,6 +980,7 @@ $code.=<<___; pxor %xmm0,%xmm0 pshufd \$0,%xmm1,%xmm1 mov 40(%rsp),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 jmp .Lsqr8x_cond_copy .align 32 @@ -964,14 +1010,22 @@ $code.=<<___; mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lsqr8x_epilogue: ret +.cfi_endproc .size bn_sqr8x_mont,.-bn_sqr8x_mont ___ }}} @@ -983,14 +1037,22 @@ $code.=<<___; .type bn_mulx4x_mont,\@function,6 .align 32 bn_mulx4x_mont: +.cfi_startproc mov %rsp,%rax +.cfi_def_cfa_register %rax .Lmulx4x_enter: push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lmulx4x_prologue: shl \$3,${num}d # convert $num to bytes @@ -1036,6 +1098,7 @@ bn_mulx4x_mont: mov $n0, 24(%rsp) # save *n0 mov $rp, 32(%rsp) # save $rp mov %rax,40(%rsp) # save original %rsp +.cfi_cfa_expression %rsp+40,deref,+8 mov $num,48(%rsp) # inner counter jmp .Lmulx4x_body @@ -1285,6 +1348,7 @@ $code.=<<___; pxor %xmm0,%xmm0 pshufd \$0,%xmm1,%xmm1 mov 40(%rsp),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 jmp .Lmulx4x_cond_copy .align 32 @@ -1314,14 +1378,22 @@ $code.=<<___; mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lmulx4x_epilogue: ret +.cfi_endproc .size bn_mulx4x_mont,.-bn_mulx4x_mont ___ }}} @@ -1400,12 +1472,12 @@ sqr_handler: mov 0(%r11),%r10d # HandlerData[0] lea (%rsi,%r10),%r10 # end of prologue label - cmp %r10,%rbx # context->Rip<.Lsqr_body + cmp %r10,%rbx # context->Rip<.Lsqr_prologue jb .Lcommon_seh_tail mov 4(%r11),%r10d # HandlerData[1] lea (%rsi,%r10),%r10 # body label - cmp %r10,%rbx # context->Rip>=.Lsqr_epilogue + cmp %r10,%rbx # context->Rip<.Lsqr_body jb .Lcommon_pop_regs mov 152($context),%rax # pull context->Rsp diff --git a/deps/openssl/openssl/crypto/bn/asm/x86_64-mont5.pl b/deps/openssl/openssl/crypto/bn/asm/x86_64-mont5.pl index 97d8eee700..ad6e8ada3c 100755 --- a/deps/openssl/openssl/crypto/bn/asm/x86_64-mont5.pl +++ b/deps/openssl/openssl/crypto/bn/asm/x86_64-mont5.pl @@ -93,8 +93,10 @@ $code=<<___; .type bn_mul_mont_gather5,\@function,6 .align 64 bn_mul_mont_gather5: +.cfi_startproc mov ${num}d,${num}d mov %rsp,%rax +.cfi_def_cfa_register %rax test \$7,${num}d jnz .Lmul_enter ___ @@ -108,11 +110,17 @@ $code.=<<___; .Lmul_enter: movd `($win64?56:8)`(%rsp),%xmm5 # load 7th argument push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 neg $num mov %rsp,%r11 @@ -145,6 +153,7 @@ $code.=<<___; lea .Linc(%rip),%r10 mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp +.cfi_cfa_expression %rsp+8,$num,8,mul,plus,deref,+8 .Lmul_body: lea 128($bp),%r12 # reassign $bp (+size optimization) @@ -410,7 +419,7 @@ $code.=<<___; mov %rax,($rp,$i,8) # rp[i]=tp[i]-np[i] mov 8($ap,$i,8),%rax # tp[i+1] lea 1($i),$i # i++ - dec $j # doesnn't affect CF! + dec $j # doesn't affect CF! jnz .Lsub sbb \$0,%rax # handle upmost overflow bit @@ -432,17 +441,26 @@ $code.=<<___; jnz .Lcopy mov 8(%rsp,$num,8),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lmul_epilogue: ret +.cfi_endproc .size bn_mul_mont_gather5,.-bn_mul_mont_gather5 ___ {{{ @@ -452,8 +470,10 @@ $code.=<<___; .type bn_mul4x_mont_gather5,\@function,6 .align 32 bn_mul4x_mont_gather5: +.cfi_startproc .byte 0x67 mov %rsp,%rax +.cfi_def_cfa_register %rax .Lmul4x_enter: ___ $code.=<<___ if ($addx); @@ -463,11 +483,17 @@ $code.=<<___ if ($addx); ___ $code.=<<___; push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lmul4x_prologue: .byte 0x67 @@ -523,22 +549,32 @@ $code.=<<___; neg $num mov %rax,40(%rsp) +.cfi_cfa_expression %rsp+40,deref,+8 .Lmul4x_body: call mul4x_internal mov 40(%rsp),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lmul4x_epilogue: ret +.cfi_endproc .size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 .type mul4x_internal,\@abi-omnipotent @@ -1050,7 +1086,7 @@ my $bptr="%rdx"; # const void *table, my $nptr="%rcx"; # const BN_ULONG *nptr, my $n0 ="%r8"; # const BN_ULONG *n0); my $num ="%r9"; # int num, has to be divisible by 8 - # int pwr + # int pwr my ($i,$j,$tptr)=("%rbp","%rcx",$rptr); my @A0=("%r10","%r11"); @@ -1062,7 +1098,9 @@ $code.=<<___; .type bn_power5,\@function,6 .align 32 bn_power5: +.cfi_startproc mov %rsp,%rax +.cfi_def_cfa_register %rax ___ $code.=<<___ if ($addx); mov OPENSSL_ia32cap_P+8(%rip),%r11d @@ -1072,11 +1110,17 @@ $code.=<<___ if ($addx); ___ $code.=<<___; push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lpower5_prologue: shl \$3,${num}d # convert $num to bytes @@ -1127,7 +1171,7 @@ $code.=<<___; ja .Lpwr_page_walk .Lpwr_page_walk_done: - mov $num,%r10 + mov $num,%r10 neg $num ############################################################## @@ -1141,6 +1185,7 @@ $code.=<<___; # mov $n0, 32(%rsp) mov %rax, 40(%rsp) # save original %rsp +.cfi_cfa_expression %rsp+40,deref,+8 .Lpower5_body: movq $rptr,%xmm1 # save $rptr, used in sqr8x movq $nptr,%xmm2 # save $nptr @@ -1167,16 +1212,25 @@ $code.=<<___; call mul4x_internal mov 40(%rsp),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lpower5_epilogue: ret +.cfi_endproc .size bn_power5,.-bn_power5 .globl bn_sqr8x_internal @@ -2036,7 +2090,7 @@ __bn_post4x_internal: jnz .Lsqr4x_sub mov $num,%r10 # prepare for back-to-back call - neg $num # restore $num + neg $num # restore $num ret .size __bn_post4x_internal,.-__bn_post4x_internal ___ @@ -2056,14 +2110,22 @@ bn_from_montgomery: .type bn_from_mont8x,\@function,6 .align 32 bn_from_mont8x: +.cfi_startproc .byte 0x67 mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lfrom_prologue: shl \$3,${num}d # convert $num to bytes @@ -2128,6 +2190,7 @@ bn_from_mont8x: # mov $n0, 32(%rsp) mov %rax, 40(%rsp) # save original %rsp +.cfi_cfa_expression %rsp+40,deref,+8 .Lfrom_body: mov $num,%r11 lea 48(%rsp),%rax @@ -2171,7 +2234,6 @@ $code.=<<___ if ($addx); pxor %xmm0,%xmm0 lea 48(%rsp),%rax - mov 40(%rsp),%rsi # restore %rsp jmp .Lfrom_mont_zero .align 32 @@ -2183,11 +2245,12 @@ $code.=<<___; pxor %xmm0,%xmm0 lea 48(%rsp),%rax - mov 40(%rsp),%rsi # restore %rsp jmp .Lfrom_mont_zero .align 32 .Lfrom_mont_zero: + mov 40(%rsp),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 movdqa %xmm0,16*0(%rax) movdqa %xmm0,16*1(%rax) movdqa %xmm0,16*2(%rax) @@ -2198,14 +2261,22 @@ $code.=<<___; mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lfrom_epilogue: ret +.cfi_endproc .size bn_from_mont8x,.-bn_from_mont8x ___ } @@ -2218,14 +2289,22 @@ $code.=<<___; .type bn_mulx4x_mont_gather5,\@function,6 .align 32 bn_mulx4x_mont_gather5: +.cfi_startproc mov %rsp,%rax +.cfi_def_cfa_register %rax .Lmulx4x_enter: push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lmulx4x_prologue: shl \$3,${num}d # convert $num to bytes @@ -2259,7 +2338,7 @@ bn_mulx4x_mont_gather5: mov \$0,%r10 cmovc %r10,%r11 sub %r11,%rbp -.Lmulx4xsp_done: +.Lmulx4xsp_done: and \$-64,%rbp # ensure alignment mov %rsp,%r11 sub %rbp,%r11 @@ -2291,21 +2370,31 @@ bn_mulx4x_mont_gather5: # mov $n0, 32(%rsp) # save *n0 mov %rax,40(%rsp) # save original %rsp +.cfi_cfa_expression %rsp+40,deref,+8 .Lmulx4x_body: call mulx4x_internal mov 40(%rsp),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lmulx4x_epilogue: ret +.cfi_endproc .size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5 .type mulx4x_internal,\@abi-omnipotent @@ -2333,7 +2422,7 @@ my $N=$STRIDE/4; # should match cache line size $code.=<<___; movdqa 0(%rax),%xmm0 # 00000001000000010000000000000000 movdqa 16(%rax),%xmm1 # 00000002000000020000000200000002 - lea 88-112(%rsp,%r10),%r10 # place the mask after tp[num+1] (+ICache optimizaton) + lea 88-112(%rsp,%r10),%r10 # place the mask after tp[num+1] (+ICache optimization) lea 128($bp),$bptr # size optimization pshufd \$0,%xmm5,%xmm5 # broadcast index @@ -2683,14 +2772,22 @@ $code.=<<___; .type bn_powerx5,\@function,6 .align 32 bn_powerx5: +.cfi_startproc mov %rsp,%rax +.cfi_def_cfa_register %rax .Lpowerx5_enter: push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lpowerx5_prologue: shl \$3,${num}d # convert $num to bytes @@ -2741,7 +2838,7 @@ bn_powerx5: ja .Lpwrx_page_walk .Lpwrx_page_walk_done: - mov $num,%r10 + mov $num,%r10 neg $num ############################################################## @@ -2762,6 +2859,7 @@ bn_powerx5: movq $bptr,%xmm4 mov $n0, 32(%rsp) mov %rax, 40(%rsp) # save original %rsp +.cfi_cfa_expression %rsp+40,deref,+8 .Lpowerx5_body: call __bn_sqrx8x_internal @@ -2784,17 +2882,26 @@ bn_powerx5: call mulx4x_internal mov 40(%rsp),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lpowerx5_epilogue: ret +.cfi_endproc .size bn_powerx5,.-bn_powerx5 .globl bn_sqrx8x_internal @@ -3678,8 +3785,8 @@ mul_handler: jb .Lcommon_seh_tail mov 4(%r11),%r10d # HandlerData[1] - lea (%rsi,%r10),%r10 # epilogue label - cmp %r10,%rbx # context->Rip>=epilogue label + lea (%rsi,%r10),%r10 # beginning of body label + cmp %r10,%rbx # context->Rip<body label jb .Lcommon_pop_regs mov 152($context),%rax # pull context->Rsp diff --git a/deps/openssl/openssl/crypto/bn/bn_add.c b/deps/openssl/openssl/crypto/bn/bn_add.c index 7cdefa77a1..f2736b8f6d 100644 --- a/deps/openssl/openssl/crypto/bn/bn_add.c +++ b/deps/openssl/openssl/crypto/bn/bn_add.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -10,51 +10,69 @@ #include "internal/cryptlib.h" #include "bn_lcl.h" -/* r can == a or b */ +/* signed add of b to a. */ int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) { - int a_neg = a->neg, ret; + int ret, r_neg, cmp_res; bn_check_top(a); bn_check_top(b); - /*- - * a + b a+b - * a + -b a-b - * -a + b b-a - * -a + -b -(a+b) - */ - if (a_neg ^ b->neg) { - /* only one is negative */ - if (a_neg) { - const BIGNUM *tmp; - - tmp = a; - a = b; - b = tmp; + if (a->neg == b->neg) { + r_neg = a->neg; + ret = BN_uadd(r, a, b); + } else { + cmp_res = BN_ucmp(a, b); + if (cmp_res > 0) { + r_neg = a->neg; + ret = BN_usub(r, a, b); + } else if (cmp_res < 0) { + r_neg = b->neg; + ret = BN_usub(r, b, a); + } else { + r_neg = 0; + BN_zero(r); + ret = 1; } + } + + r->neg = r_neg; + bn_check_top(r); + return ret; +} + +/* signed sub of b from a. */ +int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) +{ + int ret, r_neg, cmp_res; - /* we are now a - b */ + bn_check_top(a); + bn_check_top(b); - if (BN_ucmp(a, b) < 0) { - if (!BN_usub(r, b, a)) - return 0; - r->neg = 1; + if (a->neg != b->neg) { + r_neg = a->neg; + ret = BN_uadd(r, a, b); + } else { + cmp_res = BN_ucmp(a, b); + if (cmp_res > 0) { + r_neg = a->neg; + ret = BN_usub(r, a, b); + } else if (cmp_res < 0) { + r_neg = !b->neg; + ret = BN_usub(r, b, a); } else { - if (!BN_usub(r, a, b)) - return 0; - r->neg = 0; + r_neg = 0; + BN_zero(r); + ret = 1; } - return 1; } - ret = BN_uadd(r, a, b); - r->neg = a_neg; + r->neg = r_neg; bn_check_top(r); return ret; } -/* unsigned add of b to a */ +/* unsigned add of b to a, r can be equal to a or b. */ int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) { int max, min, dif; @@ -151,59 +169,3 @@ int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) return 1; } -int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) -{ - int max; - int add = 0, neg = 0; - - bn_check_top(a); - bn_check_top(b); - - /*- - * a - b a-b - * a - -b a+b - * -a - b -(a+b) - * -a - -b b-a - */ - if (a->neg) { - if (b->neg) { - const BIGNUM *tmp; - - tmp = a; - a = b; - b = tmp; - } else { - add = 1; - neg = 1; - } - } else { - if (b->neg) { - add = 1; - neg = 0; - } - } - - if (add) { - if (!BN_uadd(r, a, b)) - return 0; - r->neg = neg; - return 1; - } - - /* We are actually doing a - b :-) */ - - max = (a->top > b->top) ? a->top : b->top; - if (bn_wexpand(r, max) == NULL) - return 0; - if (BN_ucmp(a, b) < 0) { - if (!BN_usub(r, b, a)) - return 0; - r->neg = 1; - } else { - if (!BN_usub(r, a, b)) - return 0; - r->neg = 0; - } - bn_check_top(r); - return 1; -} diff --git a/deps/openssl/openssl/crypto/bn/bn_asm.c b/deps/openssl/openssl/crypto/bn/bn_asm.c index 39c6c2134b..729b2480ac 100644 --- a/deps/openssl/openssl/crypto/bn/bn_asm.c +++ b/deps/openssl/openssl/crypto/bn/bn_asm.c @@ -21,7 +21,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, assert(num >= 0); if (num <= 0) - return (c1); + return c1; # ifndef OPENSSL_SMALL_FOOTPRINT while (num & ~3) { @@ -41,7 +41,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, num--; } - return (c1); + return c1; } BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) @@ -50,7 +50,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) assert(num >= 0); if (num <= 0) - return (c1); + return c1; # ifndef OPENSSL_SMALL_FOOTPRINT while (num & ~3) { @@ -69,7 +69,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) rp++; num--; } - return (c1); + return c1; } void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) @@ -108,7 +108,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, assert(num >= 0); if (num <= 0) - return ((BN_ULONG)0); + return (BN_ULONG)0; bl = LBITS(w); bh = HBITS(w); @@ -130,7 +130,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, rp++; num--; } - return (c); + return c; } BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) @@ -140,7 +140,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) assert(num >= 0); if (num <= 0) - return ((BN_ULONG)0); + return (BN_ULONG)0; bl = LBITS(w); bh = HBITS(w); @@ -162,7 +162,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) rp++; num--; } - return (carry); + return carry; } void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) @@ -210,7 +210,7 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) int i, count = 2; if (d == 0) - return (BN_MASK2); + return BN_MASK2; i = BN_num_bits_word(d); assert((i == BN_BITS2) || (h <= (BN_ULONG)1 << i)); @@ -264,7 +264,7 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) l = (l & BN_MASK2l) << BN_BITS4; } ret |= q; - return (ret); + return ret; } #endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */ @@ -276,7 +276,7 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, assert(n >= 0); if (n <= 0) - return ((BN_ULONG)0); + return (BN_ULONG)0; # ifndef OPENSSL_SMALL_FOOTPRINT while (n & ~3) { @@ -307,7 +307,7 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, r++; n--; } - return ((BN_ULONG)ll); + return (BN_ULONG)ll; } #else /* !BN_LLONG */ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, @@ -317,7 +317,7 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, assert(n >= 0); if (n <= 0) - return ((BN_ULONG)0); + return (BN_ULONG)0; c = 0; # ifndef OPENSSL_SMALL_FOOTPRINT @@ -364,7 +364,7 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, r++; n--; } - return ((BN_ULONG)c); + return (BN_ULONG)c; } #endif /* !BN_LLONG */ @@ -376,7 +376,7 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, assert(n >= 0); if (n <= 0) - return ((BN_ULONG)0); + return (BN_ULONG)0; #ifndef OPENSSL_SMALL_FOOTPRINT while (n & ~3) { @@ -417,7 +417,7 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, r++; n--; } - return (c); + return c; } #if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT) diff --git a/deps/openssl/openssl/crypto/bn/bn_blind.c b/deps/openssl/openssl/crypto/bn/bn_blind.c index 9474e21e4c..450cdfb348 100644 --- a/deps/openssl/openssl/crypto/bn/bn_blind.c +++ b/deps/openssl/openssl/crypto/bn/bn_blind.c @@ -82,7 +82,6 @@ void BN_BLINDING_free(BN_BLINDING *r) { if (r == NULL) return; - BN_free(r->A); BN_free(r->Ai); BN_free(r->e); @@ -124,7 +123,7 @@ int BN_BLINDING_update(BN_BLINDING *b, BN_CTX *ctx) err: if (b->counter == BN_BLINDING_COUNTER) b->counter = 0; - return (ret); + return ret; } int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx) @@ -140,14 +139,14 @@ int BN_BLINDING_convert_ex(BIGNUM *n, BIGNUM *r, BN_BLINDING *b, BN_CTX *ctx) if ((b->A == NULL) || (b->Ai == NULL)) { BNerr(BN_F_BN_BLINDING_CONVERT_EX, BN_R_NOT_INITIALIZED); - return (0); + return 0; } if (b->counter == -1) /* Fresh blinding, doesn't need updating. */ b->counter = 0; else if (!BN_BLINDING_update(b, ctx)) - return (0); + return 0; if (r != NULL && (BN_copy(r, b->Ai) == NULL)) return 0; @@ -198,7 +197,7 @@ int BN_BLINDING_invert_ex(BIGNUM *n, const BIGNUM *r, BN_BLINDING *b, } bn_check_top(n); - return (ret); + return ret; } int BN_BLINDING_is_current_thread(BN_BLINDING *b) @@ -271,7 +270,7 @@ BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b, do { int rv; - if (!BN_rand_range(ret->A, ret->mod)) + if (!BN_priv_rand_range(ret->A, ret->mod)) goto err; if (int_bn_mod_inverse(ret->Ai, ret->A, ret->mod, ctx, &rv)) break; diff --git a/deps/openssl/openssl/crypto/bn/bn_ctx.c b/deps/openssl/openssl/crypto/bn/bn_ctx.c index 68c0468743..aa08b31a34 100644 --- a/deps/openssl/openssl/crypto/bn/bn_ctx.c +++ b/deps/openssl/openssl/crypto/bn/bn_ctx.c @@ -1,5 +1,5 @@ /* - * Copyright 2000-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2000-2018 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -255,9 +255,12 @@ static int BN_STACK_push(BN_STACK *st, unsigned int idx) /* Need to expand */ unsigned int newsize = st->size ? (st->size * 3 / 2) : BN_CTX_START_FRAMES; - unsigned int *newitems = OPENSSL_malloc(sizeof(*newitems) * newsize); - if (newitems == NULL) + unsigned int *newitems; + + if ((newitems = OPENSSL_malloc(sizeof(*newitems) * newsize)) == NULL) { + BNerr(BN_F_BN_STACK_PUSH, ERR_R_MALLOC_FAILURE); return 0; + } if (st->depth) memcpy(newitems, st->indexes, sizeof(*newitems) * st->depth); OPENSSL_free(st->indexes); @@ -306,9 +309,12 @@ static BIGNUM *BN_POOL_get(BN_POOL *p, int flag) /* Full; allocate a new pool item and link it in. */ if (p->used == p->size) { - BN_POOL_ITEM *item = OPENSSL_malloc(sizeof(*item)); - if (item == NULL) + BN_POOL_ITEM *item; + + if ((item = OPENSSL_malloc(sizeof(*item))) == NULL) { + BNerr(BN_F_BN_POOL_GET, ERR_R_MALLOC_FAILURE); return NULL; + } for (loop = 0, bn = item->vals; loop++ < BN_CTX_POOL_SIZE; bn++) { bn_init(bn); if ((flag & BN_FLG_SECURE) != 0) diff --git a/deps/openssl/openssl/crypto/bn/bn_dh.c b/deps/openssl/openssl/crypto/bn/bn_dh.c index 17d05597b3..38acdee234 100644 --- a/deps/openssl/openssl/crypto/bn/bn_dh.c +++ b/deps/openssl/openssl/crypto/bn/bn_dh.c @@ -1,5 +1,5 @@ /* - * Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2014-2017 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -8,7 +8,7 @@ */ #include "bn_lcl.h" -#include "e_os.h" +#include "internal/nelem.h" #ifndef OPENSSL_NO_DH #include <openssl/dh.h> @@ -104,6 +104,146 @@ static const BN_ULONG dh2048_256_q[] = { 0x8CF83642A709A097ULL }; +/* Primes from RFC 7919 */ +static const BN_ULONG ffdhe2048_p[] = { + 0xFFFFFFFFFFFFFFFFULL, 0x886B423861285C97ULL, 0xC6F34A26C1B2EFFAULL, + 0xC58EF1837D1683B2ULL, 0x3BB5FCBC2EC22005ULL, 0xC3FE3B1B4C6FAD73ULL, + 0x8E4F1232EEF28183ULL, 0x9172FE9CE98583FFULL, 0xC03404CD28342F61ULL, + 0x9E02FCE1CDF7E2ECULL, 0x0B07A7C8EE0A6D70ULL, 0xAE56EDE76372BB19ULL, + 0x1D4F42A3DE394DF4ULL, 0xB96ADAB760D7F468ULL, 0xD108A94BB2C8E3FBULL, + 0xBC0AB182B324FB61ULL, 0x30ACCA4F483A797AULL, 0x1DF158A136ADE735ULL, + 0xE2A689DAF3EFE872ULL, 0x984F0C70E0E68B77ULL, 0xB557135E7F57C935ULL, + 0x856365553DED1AF3ULL, 0x2433F51F5F066ED0ULL, 0xD3DF1ED5D5FD6561ULL, + 0xF681B202AEC4617AULL, 0x7D2FE363630C75D8ULL, 0xCC939DCE249B3EF9ULL, + 0xA9E13641146433FBULL, 0xD8B9C583CE2D3695ULL, 0xAFDC5620273D3CF1ULL, + 0xADF85458A2BB4A9AULL, 0xFFFFFFFFFFFFFFFFULL +}; + +static const BN_ULONG ffdhe3072_p[] = { + 0xFFFFFFFFFFFFFFFFULL, 0x25E41D2B66C62E37ULL, 0x3C1B20EE3FD59D7CULL, + 0x0ABCD06BFA53DDEFULL, 0x1DBF9A42D5C4484EULL, 0xABC521979B0DEADAULL, + 0xE86D2BC522363A0DULL, 0x5CAE82AB9C9DF69EULL, 0x64F2E21E71F54BFFULL, + 0xF4FD4452E2D74DD3ULL, 0xB4130C93BC437944ULL, 0xAEFE130985139270ULL, + 0x598CB0FAC186D91CULL, 0x7AD91D2691F7F7EEULL, 0x61B46FC9D6E6C907ULL, + 0xBC34F4DEF99C0238ULL, 0xDE355B3B6519035BULL, 0x886B4238611FCFDCULL, + 0xC6F34A26C1B2EFFAULL, 0xC58EF1837D1683B2ULL, 0x3BB5FCBC2EC22005ULL, + 0xC3FE3B1B4C6FAD73ULL, 0x8E4F1232EEF28183ULL, 0x9172FE9CE98583FFULL, + 0xC03404CD28342F61ULL, 0x9E02FCE1CDF7E2ECULL, 0x0B07A7C8EE0A6D70ULL, + 0xAE56EDE76372BB19ULL, 0x1D4F42A3DE394DF4ULL, 0xB96ADAB760D7F468ULL, + 0xD108A94BB2C8E3FBULL, 0xBC0AB182B324FB61ULL, 0x30ACCA4F483A797AULL, + 0x1DF158A136ADE735ULL, 0xE2A689DAF3EFE872ULL, 0x984F0C70E0E68B77ULL, + 0xB557135E7F57C935ULL, 0x856365553DED1AF3ULL, 0x2433F51F5F066ED0ULL, + 0xD3DF1ED5D5FD6561ULL, 0xF681B202AEC4617AULL, 0x7D2FE363630C75D8ULL, + 0xCC939DCE249B3EF9ULL, 0xA9E13641146433FBULL, 0xD8B9C583CE2D3695ULL, + 0xAFDC5620273D3CF1ULL, 0xADF85458A2BB4A9AULL, 0xFFFFFFFFFFFFFFFFULL +}; + +static const BN_ULONG ffdhe4096_p[] = { + 0xFFFFFFFFFFFFFFFFULL, 0xC68A007E5E655F6AULL, 0x4DB5A851F44182E1ULL, + 0x8EC9B55A7F88A46BULL, 0x0A8291CDCEC97DCFULL, 0x2A4ECEA9F98D0ACCULL, + 0x1A1DB93D7140003CULL, 0x092999A333CB8B7AULL, 0x6DC778F971AD0038ULL, + 0xA907600A918130C4ULL, 0xED6A1E012D9E6832ULL, 0x7135C886EFB4318AULL, + 0x87F55BA57E31CC7AULL, 0x7763CF1D55034004ULL, 0xAC7D5F42D69F6D18ULL, + 0x7930E9E4E58857B6ULL, 0x6E6F52C3164DF4FBULL, 0x25E41D2B669E1EF1ULL, + 0x3C1B20EE3FD59D7CULL, 0x0ABCD06BFA53DDEFULL, 0x1DBF9A42D5C4484EULL, + 0xABC521979B0DEADAULL, 0xE86D2BC522363A0DULL, 0x5CAE82AB9C9DF69EULL, + 0x64F2E21E71F54BFFULL, 0xF4FD4452E2D74DD3ULL, 0xB4130C93BC437944ULL, + 0xAEFE130985139270ULL, 0x598CB0FAC186D91CULL, 0x7AD91D2691F7F7EEULL, + 0x61B46FC9D6E6C907ULL, 0xBC34F4DEF99C0238ULL, 0xDE355B3B6519035BULL, + 0x886B4238611FCFDCULL, 0xC6F34A26C1B2EFFAULL, 0xC58EF1837D1683B2ULL, + 0x3BB5FCBC2EC22005ULL, 0xC3FE3B1B4C6FAD73ULL, 0x8E4F1232EEF28183ULL, + 0x9172FE9CE98583FFULL, 0xC03404CD28342F61ULL, 0x9E02FCE1CDF7E2ECULL, + 0x0B07A7C8EE0A6D70ULL, 0xAE56EDE76372BB19ULL, 0x1D4F42A3DE394DF4ULL, + 0xB96ADAB760D7F468ULL, 0xD108A94BB2C8E3FBULL, 0xBC0AB182B324FB61ULL, + 0x30ACCA4F483A797AULL, 0x1DF158A136ADE735ULL, 0xE2A689DAF3EFE872ULL, + 0x984F0C70E0E68B77ULL, 0xB557135E7F57C935ULL, 0x856365553DED1AF3ULL, + 0x2433F51F5F066ED0ULL, 0xD3DF1ED5D5FD6561ULL, 0xF681B202AEC4617AULL, + 0x7D2FE363630C75D8ULL, 0xCC939DCE249B3EF9ULL, 0xA9E13641146433FBULL, + 0xD8B9C583CE2D3695ULL, 0xAFDC5620273D3CF1ULL, 0xADF85458A2BB4A9AULL, + 0xFFFFFFFFFFFFFFFFULL +}; + +static const BN_ULONG ffdhe6144_p[] = { + 0xFFFFFFFFFFFFFFFFULL, 0xA40E329CD0E40E65ULL, 0xA41D570D7938DAD4ULL, + 0x62A69526D43161C1ULL, 0x3FDD4A8E9ADB1E69ULL, 0x5B3B71F9DC6B80D6ULL, + 0xEC9D1810C6272B04ULL, 0x8CCF2DD5CACEF403ULL, 0xE49F5235C95B9117ULL, + 0x505DC82DB854338AULL, 0x62292C311562A846ULL, 0xD72B03746AE77F5EULL, + 0xF9C9091B462D538CULL, 0x0AE8DB5847A67CBEULL, 0xB3A739C122611682ULL, + 0xEEAAC0232A281BF6ULL, 0x94C6651E77CAF992ULL, 0x763E4E4B94B2BBC1ULL, + 0x587E38DA0077D9B4ULL, 0x7FB29F8C183023C3ULL, 0x0ABEC1FFF9E3A26EULL, + 0xA00EF092350511E3ULL, 0xB855322EDB6340D8ULL, 0xA52471F7A9A96910ULL, + 0x388147FB4CFDB477ULL, 0x9B1F5C3E4E46041FULL, 0xCDAD0657FCCFEC71ULL, + 0xB38E8C334C701C3AULL, 0x917BDD64B1C0FD4CULL, 0x3BB454329B7624C8ULL, + 0x23BA4442CAF53EA6ULL, 0x4E677D2C38532A3AULL, 0x0BFD64B645036C7AULL, + 0xC68A007E5E0DD902ULL, 0x4DB5A851F44182E1ULL, 0x8EC9B55A7F88A46BULL, + 0x0A8291CDCEC97DCFULL, 0x2A4ECEA9F98D0ACCULL, 0x1A1DB93D7140003CULL, + 0x092999A333CB8B7AULL, 0x6DC778F971AD0038ULL, 0xA907600A918130C4ULL, + 0xED6A1E012D9E6832ULL, 0x7135C886EFB4318AULL, 0x87F55BA57E31CC7AULL, + 0x7763CF1D55034004ULL, 0xAC7D5F42D69F6D18ULL, 0x7930E9E4E58857B6ULL, + 0x6E6F52C3164DF4FBULL, 0x25E41D2B669E1EF1ULL, 0x3C1B20EE3FD59D7CULL, + 0x0ABCD06BFA53DDEFULL, 0x1DBF9A42D5C4484EULL, 0xABC521979B0DEADAULL, + 0xE86D2BC522363A0DULL, 0x5CAE82AB9C9DF69EULL, 0x64F2E21E71F54BFFULL, + 0xF4FD4452E2D74DD3ULL, 0xB4130C93BC437944ULL, 0xAEFE130985139270ULL, + 0x598CB0FAC186D91CULL, 0x7AD91D2691F7F7EEULL, 0x61B46FC9D6E6C907ULL, + 0xBC34F4DEF99C0238ULL, 0xDE355B3B6519035BULL, 0x886B4238611FCFDCULL, + 0xC6F34A26C1B2EFFAULL, 0xC58EF1837D1683B2ULL, 0x3BB5FCBC2EC22005ULL, + 0xC3FE3B1B4C6FAD73ULL, 0x8E4F1232EEF28183ULL, 0x9172FE9CE98583FFULL, + 0xC03404CD28342F61ULL, 0x9E02FCE1CDF7E2ECULL, 0x0B07A7C8EE0A6D70ULL, + 0xAE56EDE76372BB19ULL, 0x1D4F42A3DE394DF4ULL, 0xB96ADAB760D7F468ULL, + 0xD108A94BB2C8E3FBULL, 0xBC0AB182B324FB61ULL, 0x30ACCA4F483A797AULL, + 0x1DF158A136ADE735ULL, 0xE2A689DAF3EFE872ULL, 0x984F0C70E0E68B77ULL, + 0xB557135E7F57C935ULL, 0x856365553DED1AF3ULL, 0x2433F51F5F066ED0ULL, + 0xD3DF1ED5D5FD6561ULL, 0xF681B202AEC4617AULL, 0x7D2FE363630C75D8ULL, + 0xCC939DCE249B3EF9ULL, 0xA9E13641146433FBULL, 0xD8B9C583CE2D3695ULL, + 0xAFDC5620273D3CF1ULL, 0xADF85458A2BB4A9AULL, 0xFFFFFFFFFFFFFFFFULL +}; + +static const BN_ULONG ffdhe8192_p[] = { + 0xFFFFFFFFFFFFFFFFULL, 0xD68C8BB7C5C6424CULL, 0x011E2A94838FF88CULL, + 0x0822E506A9F4614EULL, 0x97D11D49F7A8443DULL, 0xA6BBFDE530677F0DULL, + 0x2F741EF8C1FE86FEULL, 0xFAFABE1C5D71A87EULL, 0xDED2FBABFBE58A30ULL, + 0xB6855DFE72B0A66EULL, 0x1EFC8CE0BA8A4FE8ULL, 0x83F81D4A3F2FA457ULL, + 0xA1FE3075A577E231ULL, 0xD5B8019488D9C0A0ULL, 0x624816CDAD9A95F9ULL, + 0x99E9E31650C1217BULL, 0x51AA691E0E423CFCULL, 0x1C217E6C3826E52CULL, + 0x51A8A93109703FEEULL, 0xBB7099876A460E74ULL, 0x541FC68C9C86B022ULL, + 0x59160CC046FD8251ULL, 0x2846C0BA35C35F5CULL, 0x54504AC78B758282ULL, + 0x29388839D2AF05E4ULL, 0xCB2C0F1CC01BD702ULL, 0x555B2F747C932665ULL, + 0x86B63142A3AB8829ULL, 0x0B8CC3BDF64B10EFULL, 0x687FEB69EDD1CC5EULL, + 0xFDB23FCEC9509D43ULL, 0x1E425A31D951AE64ULL, 0x36AD004CF600C838ULL, + 0xA40E329CCFF46AAAULL, 0xA41D570D7938DAD4ULL, 0x62A69526D43161C1ULL, + 0x3FDD4A8E9ADB1E69ULL, 0x5B3B71F9DC6B80D6ULL, 0xEC9D1810C6272B04ULL, + 0x8CCF2DD5CACEF403ULL, 0xE49F5235C95B9117ULL, 0x505DC82DB854338AULL, + 0x62292C311562A846ULL, 0xD72B03746AE77F5EULL, 0xF9C9091B462D538CULL, + 0x0AE8DB5847A67CBEULL, 0xB3A739C122611682ULL, 0xEEAAC0232A281BF6ULL, + 0x94C6651E77CAF992ULL, 0x763E4E4B94B2BBC1ULL, 0x587E38DA0077D9B4ULL, + 0x7FB29F8C183023C3ULL, 0x0ABEC1FFF9E3A26EULL, 0xA00EF092350511E3ULL, + 0xB855322EDB6340D8ULL, 0xA52471F7A9A96910ULL, 0x388147FB4CFDB477ULL, + 0x9B1F5C3E4E46041FULL, 0xCDAD0657FCCFEC71ULL, 0xB38E8C334C701C3AULL, + 0x917BDD64B1C0FD4CULL, 0x3BB454329B7624C8ULL, 0x23BA4442CAF53EA6ULL, + 0x4E677D2C38532A3AULL, 0x0BFD64B645036C7AULL, 0xC68A007E5E0DD902ULL, + 0x4DB5A851F44182E1ULL, 0x8EC9B55A7F88A46BULL, 0x0A8291CDCEC97DCFULL, + 0x2A4ECEA9F98D0ACCULL, 0x1A1DB93D7140003CULL, 0x092999A333CB8B7AULL, + 0x6DC778F971AD0038ULL, 0xA907600A918130C4ULL, 0xED6A1E012D9E6832ULL, + 0x7135C886EFB4318AULL, 0x87F55BA57E31CC7AULL, 0x7763CF1D55034004ULL, + 0xAC7D5F42D69F6D18ULL, 0x7930E9E4E58857B6ULL, 0x6E6F52C3164DF4FBULL, + 0x25E41D2B669E1EF1ULL, 0x3C1B20EE3FD59D7CULL, 0x0ABCD06BFA53DDEFULL, + 0x1DBF9A42D5C4484EULL, 0xABC521979B0DEADAULL, 0xE86D2BC522363A0DULL, + 0x5CAE82AB9C9DF69EULL, 0x64F2E21E71F54BFFULL, 0xF4FD4452E2D74DD3ULL, + 0xB4130C93BC437944ULL, 0xAEFE130985139270ULL, 0x598CB0FAC186D91CULL, + 0x7AD91D2691F7F7EEULL, 0x61B46FC9D6E6C907ULL, 0xBC34F4DEF99C0238ULL, + 0xDE355B3B6519035BULL, 0x886B4238611FCFDCULL, 0xC6F34A26C1B2EFFAULL, + 0xC58EF1837D1683B2ULL, 0x3BB5FCBC2EC22005ULL, 0xC3FE3B1B4C6FAD73ULL, + 0x8E4F1232EEF28183ULL, 0x9172FE9CE98583FFULL, 0xC03404CD28342F61ULL, + 0x9E02FCE1CDF7E2ECULL, 0x0B07A7C8EE0A6D70ULL, 0xAE56EDE76372BB19ULL, + 0x1D4F42A3DE394DF4ULL, 0xB96ADAB760D7F468ULL, 0xD108A94BB2C8E3FBULL, + 0xBC0AB182B324FB61ULL, 0x30ACCA4F483A797AULL, 0x1DF158A136ADE735ULL, + 0xE2A689DAF3EFE872ULL, 0x984F0C70E0E68B77ULL, 0xB557135E7F57C935ULL, + 0x856365553DED1AF3ULL, 0x2433F51F5F066ED0ULL, 0xD3DF1ED5D5FD6561ULL, + 0xF681B202AEC4617AULL, 0x7D2FE363630C75D8ULL, 0xCC939DCE249B3EF9ULL, + 0xA9E13641146433FBULL, 0xD8B9C583CE2D3695ULL, 0xAFDC5620273D3CF1ULL, + 0xADF85458A2BB4A9AULL, 0xFFFFFFFFFFFFFFFFULL +}; + # elif BN_BITS2 == 32 static const BN_ULONG dh1024_160_p[] = { @@ -194,6 +334,147 @@ static const BN_ULONG dh2048_256_q[] = { 0xA709A097, 0x8CF83642 }; +/* Primes from RFC 7919 */ + +static const BN_ULONG ffdhe2048_p[] = { + 0xFFFFFFFF, 0xFFFFFFFF, 0x61285C97, 0x886B4238, 0xC1B2EFFA, 0xC6F34A26, + 0x7D1683B2, 0xC58EF183, 0x2EC22005, 0x3BB5FCBC, 0x4C6FAD73, 0xC3FE3B1B, + 0xEEF28183, 0x8E4F1232, 0xE98583FF, 0x9172FE9C, 0x28342F61, 0xC03404CD, + 0xCDF7E2EC, 0x9E02FCE1, 0xEE0A6D70, 0x0B07A7C8, 0x6372BB19, 0xAE56EDE7, + 0xDE394DF4, 0x1D4F42A3, 0x60D7F468, 0xB96ADAB7, 0xB2C8E3FB, 0xD108A94B, + 0xB324FB61, 0xBC0AB182, 0x483A797A, 0x30ACCA4F, 0x36ADE735, 0x1DF158A1, + 0xF3EFE872, 0xE2A689DA, 0xE0E68B77, 0x984F0C70, 0x7F57C935, 0xB557135E, + 0x3DED1AF3, 0x85636555, 0x5F066ED0, 0x2433F51F, 0xD5FD6561, 0xD3DF1ED5, + 0xAEC4617A, 0xF681B202, 0x630C75D8, 0x7D2FE363, 0x249B3EF9, 0xCC939DCE, + 0x146433FB, 0xA9E13641, 0xCE2D3695, 0xD8B9C583, 0x273D3CF1, 0xAFDC5620, + 0xA2BB4A9A, 0xADF85458, 0xFFFFFFFF, 0xFFFFFFFF +}; + +static const BN_ULONG ffdhe3072_p[] = { + 0xFFFFFFFF, 0xFFFFFFFF, 0x66C62E37, 0x25E41D2B, 0x3FD59D7C, 0x3C1B20EE, + 0xFA53DDEF, 0x0ABCD06B, 0xD5C4484E, 0x1DBF9A42, 0x9B0DEADA, 0xABC52197, + 0x22363A0D, 0xE86D2BC5, 0x9C9DF69E, 0x5CAE82AB, 0x71F54BFF, 0x64F2E21E, + 0xE2D74DD3, 0xF4FD4452, 0xBC437944, 0xB4130C93, 0x85139270, 0xAEFE1309, + 0xC186D91C, 0x598CB0FA, 0x91F7F7EE, 0x7AD91D26, 0xD6E6C907, 0x61B46FC9, + 0xF99C0238, 0xBC34F4DE, 0x6519035B, 0xDE355B3B, 0x611FCFDC, 0x886B4238, + 0xC1B2EFFA, 0xC6F34A26, 0x7D1683B2, 0xC58EF183, 0x2EC22005, 0x3BB5FCBC, + 0x4C6FAD73, 0xC3FE3B1B, 0xEEF28183, 0x8E4F1232, 0xE98583FF, 0x9172FE9C, + 0x28342F61, 0xC03404CD, 0xCDF7E2EC, 0x9E02FCE1, 0xEE0A6D70, 0x0B07A7C8, + 0x6372BB19, 0xAE56EDE7, 0xDE394DF4, 0x1D4F42A3, 0x60D7F468, 0xB96ADAB7, + 0xB2C8E3FB, 0xD108A94B, 0xB324FB61, 0xBC0AB182, 0x483A797A, 0x30ACCA4F, + 0x36ADE735, 0x1DF158A1, 0xF3EFE872, 0xE2A689DA, 0xE0E68B77, 0x984F0C70, + 0x7F57C935, 0xB557135E, 0x3DED1AF3, 0x85636555, 0x5F066ED0, 0x2433F51F, + 0xD5FD6561, 0xD3DF1ED5, 0xAEC4617A, 0xF681B202, 0x630C75D8, 0x7D2FE363, + 0x249B3EF9, 0xCC939DCE, 0x146433FB, 0xA9E13641, 0xCE2D3695, 0xD8B9C583, + 0x273D3CF1, 0xAFDC5620, 0xA2BB4A9A, 0xADF85458, 0xFFFFFFFF, 0xFFFFFFFF +}; + +static const BN_ULONG ffdhe4096_p[] = { + 0xFFFFFFFF, 0xFFFFFFFF, 0x5E655F6A, 0xC68A007E, 0xF44182E1, 0x4DB5A851, + 0x7F88A46B, 0x8EC9B55A, 0xCEC97DCF, 0x0A8291CD, 0xF98D0ACC, 0x2A4ECEA9, + 0x7140003C, 0x1A1DB93D, 0x33CB8B7A, 0x092999A3, 0x71AD0038, 0x6DC778F9, + 0x918130C4, 0xA907600A, 0x2D9E6832, 0xED6A1E01, 0xEFB4318A, 0x7135C886, + 0x7E31CC7A, 0x87F55BA5, 0x55034004, 0x7763CF1D, 0xD69F6D18, 0xAC7D5F42, + 0xE58857B6, 0x7930E9E4, 0x164DF4FB, 0x6E6F52C3, 0x669E1EF1, 0x25E41D2B, + 0x3FD59D7C, 0x3C1B20EE, 0xFA53DDEF, 0x0ABCD06B, 0xD5C4484E, 0x1DBF9A42, + 0x9B0DEADA, 0xABC52197, 0x22363A0D, 0xE86D2BC5, 0x9C9DF69E, 0x5CAE82AB, + 0x71F54BFF, 0x64F2E21E, 0xE2D74DD3, 0xF4FD4452, 0xBC437944, 0xB4130C93, + 0x85139270, 0xAEFE1309, 0xC186D91C, 0x598CB0FA, 0x91F7F7EE, 0x7AD91D26, + 0xD6E6C907, 0x61B46FC9, 0xF99C0238, 0xBC34F4DE, 0x6519035B, 0xDE355B3B, + 0x611FCFDC, 0x886B4238, 0xC1B2EFFA, 0xC6F34A26, 0x7D1683B2, 0xC58EF183, + 0x2EC22005, 0x3BB5FCBC, 0x4C6FAD73, 0xC3FE3B1B, 0xEEF28183, 0x8E4F1232, + 0xE98583FF, 0x9172FE9C, 0x28342F61, 0xC03404CD, 0xCDF7E2EC, 0x9E02FCE1, + 0xEE0A6D70, 0x0B07A7C8, 0x6372BB19, 0xAE56EDE7, 0xDE394DF4, 0x1D4F42A3, + 0x60D7F468, 0xB96ADAB7, 0xB2C8E3FB, 0xD108A94B, 0xB324FB61, 0xBC0AB182, + 0x483A797A, 0x30ACCA4F, 0x36ADE735, 0x1DF158A1, 0xF3EFE872, 0xE2A689DA, + 0xE0E68B77, 0x984F0C70, 0x7F57C935, 0xB557135E, 0x3DED1AF3, 0x85636555, + 0x5F066ED0, 0x2433F51F, 0xD5FD6561, 0xD3DF1ED5, 0xAEC4617A, 0xF681B202, + 0x630C75D8, 0x7D2FE363, 0x249B3EF9, 0xCC939DCE, 0x146433FB, 0xA9E13641, + 0xCE2D3695, 0xD8B9C583, 0x273D3CF1, 0xAFDC5620, 0xA2BB4A9A, 0xADF85458, + 0xFFFFFFFF, 0xFFFFFFFF +}; + +static const BN_ULONG ffdhe6144_p[] = { + 0xFFFFFFFF, 0xFFFFFFFF, 0xD0E40E65, 0xA40E329C, 0x7938DAD4, 0xA41D570D, + 0xD43161C1, 0x62A69526, 0x9ADB1E69, 0x3FDD4A8E, 0xDC6B80D6, 0x5B3B71F9, + 0xC6272B04, 0xEC9D1810, 0xCACEF403, 0x8CCF2DD5, 0xC95B9117, 0xE49F5235, + 0xB854338A, 0x505DC82D, 0x1562A846, 0x62292C31, 0x6AE77F5E, 0xD72B0374, + 0x462D538C, 0xF9C9091B, 0x47A67CBE, 0x0AE8DB58, 0x22611682, 0xB3A739C1, + 0x2A281BF6, 0xEEAAC023, 0x77CAF992, 0x94C6651E, 0x94B2BBC1, 0x763E4E4B, + 0x0077D9B4, 0x587E38DA, 0x183023C3, 0x7FB29F8C, 0xF9E3A26E, 0x0ABEC1FF, + 0x350511E3, 0xA00EF092, 0xDB6340D8, 0xB855322E, 0xA9A96910, 0xA52471F7, + 0x4CFDB477, 0x388147FB, 0x4E46041F, 0x9B1F5C3E, 0xFCCFEC71, 0xCDAD0657, + 0x4C701C3A, 0xB38E8C33, 0xB1C0FD4C, 0x917BDD64, 0x9B7624C8, 0x3BB45432, + 0xCAF53EA6, 0x23BA4442, 0x38532A3A, 0x4E677D2C, 0x45036C7A, 0x0BFD64B6, + 0x5E0DD902, 0xC68A007E, 0xF44182E1, 0x4DB5A851, 0x7F88A46B, 0x8EC9B55A, + 0xCEC97DCF, 0x0A8291CD, 0xF98D0ACC, 0x2A4ECEA9, 0x7140003C, 0x1A1DB93D, + 0x33CB8B7A, 0x092999A3, 0x71AD0038, 0x6DC778F9, 0x918130C4, 0xA907600A, + 0x2D9E6832, 0xED6A1E01, 0xEFB4318A, 0x7135C886, 0x7E31CC7A, 0x87F55BA5, + 0x55034004, 0x7763CF1D, 0xD69F6D18, 0xAC7D5F42, 0xE58857B6, 0x7930E9E4, + 0x164DF4FB, 0x6E6F52C3, 0x669E1EF1, 0x25E41D2B, 0x3FD59D7C, 0x3C1B20EE, + 0xFA53DDEF, 0x0ABCD06B, 0xD5C4484E, 0x1DBF9A42, 0x9B0DEADA, 0xABC52197, + 0x22363A0D, 0xE86D2BC5, 0x9C9DF69E, 0x5CAE82AB, 0x71F54BFF, 0x64F2E21E, + 0xE2D74DD3, 0xF4FD4452, 0xBC437944, 0xB4130C93, 0x85139270, 0xAEFE1309, + 0xC186D91C, 0x598CB0FA, 0x91F7F7EE, 0x7AD91D26, 0xD6E6C907, 0x61B46FC9, + 0xF99C0238, 0xBC34F4DE, 0x6519035B, 0xDE355B3B, 0x611FCFDC, 0x886B4238, + 0xC1B2EFFA, 0xC6F34A26, 0x7D1683B2, 0xC58EF183, 0x2EC22005, 0x3BB5FCBC, + 0x4C6FAD73, 0xC3FE3B1B, 0xEEF28183, 0x8E4F1232, 0xE98583FF, 0x9172FE9C, + 0x28342F61, 0xC03404CD, 0xCDF7E2EC, 0x9E02FCE1, 0xEE0A6D70, 0x0B07A7C8, + 0x6372BB19, 0xAE56EDE7, 0xDE394DF4, 0x1D4F42A3, 0x60D7F468, 0xB96ADAB7, + 0xB2C8E3FB, 0xD108A94B, 0xB324FB61, 0xBC0AB182, 0x483A797A, 0x30ACCA4F, + 0x36ADE735, 0x1DF158A1, 0xF3EFE872, 0xE2A689DA, 0xE0E68B77, 0x984F0C70, + 0x7F57C935, 0xB557135E, 0x3DED1AF3, 0x85636555, 0x5F066ED0, 0x2433F51F, + 0xD5FD6561, 0xD3DF1ED5, 0xAEC4617A, 0xF681B202, 0x630C75D8, 0x7D2FE363, + 0x249B3EF9, 0xCC939DCE, 0x146433FB, 0xA9E13641, 0xCE2D3695, 0xD8B9C583, + 0x273D3CF1, 0xAFDC5620, 0xA2BB4A9A, 0xADF85458, 0xFFFFFFFF, 0xFFFFFFFF +}; + +static const BN_ULONG ffdhe8192_p[] = { + 0xFFFFFFFF, 0xFFFFFFFF, 0xC5C6424C, 0xD68C8BB7, 0x838FF88C, 0x011E2A94, + 0xA9F4614E, 0x0822E506, 0xF7A8443D, 0x97D11D49, 0x30677F0D, 0xA6BBFDE5, + 0xC1FE86FE, 0x2F741EF8, 0x5D71A87E, 0xFAFABE1C, 0xFBE58A30, 0xDED2FBAB, + 0x72B0A66E, 0xB6855DFE, 0xBA8A4FE8, 0x1EFC8CE0, 0x3F2FA457, 0x83F81D4A, + 0xA577E231, 0xA1FE3075, 0x88D9C0A0, 0xD5B80194, 0xAD9A95F9, 0x624816CD, + 0x50C1217B, 0x99E9E316, 0x0E423CFC, 0x51AA691E, 0x3826E52C, 0x1C217E6C, + 0x09703FEE, 0x51A8A931, 0x6A460E74, 0xBB709987, 0x9C86B022, 0x541FC68C, + 0x46FD8251, 0x59160CC0, 0x35C35F5C, 0x2846C0BA, 0x8B758282, 0x54504AC7, + 0xD2AF05E4, 0x29388839, 0xC01BD702, 0xCB2C0F1C, 0x7C932665, 0x555B2F74, + 0xA3AB8829, 0x86B63142, 0xF64B10EF, 0x0B8CC3BD, 0xEDD1CC5E, 0x687FEB69, + 0xC9509D43, 0xFDB23FCE, 0xD951AE64, 0x1E425A31, 0xF600C838, 0x36AD004C, + 0xCFF46AAA, 0xA40E329C, 0x7938DAD4, 0xA41D570D, 0xD43161C1, 0x62A69526, + 0x9ADB1E69, 0x3FDD4A8E, 0xDC6B80D6, 0x5B3B71F9, 0xC6272B04, 0xEC9D1810, + 0xCACEF403, 0x8CCF2DD5, 0xC95B9117, 0xE49F5235, 0xB854338A, 0x505DC82D, + 0x1562A846, 0x62292C31, 0x6AE77F5E, 0xD72B0374, 0x462D538C, 0xF9C9091B, + 0x47A67CBE, 0x0AE8DB58, 0x22611682, 0xB3A739C1, 0x2A281BF6, 0xEEAAC023, + 0x77CAF992, 0x94C6651E, 0x94B2BBC1, 0x763E4E4B, 0x0077D9B4, 0x587E38DA, + 0x183023C3, 0x7FB29F8C, 0xF9E3A26E, 0x0ABEC1FF, 0x350511E3, 0xA00EF092, + 0xDB6340D8, 0xB855322E, 0xA9A96910, 0xA52471F7, 0x4CFDB477, 0x388147FB, + 0x4E46041F, 0x9B1F5C3E, 0xFCCFEC71, 0xCDAD0657, 0x4C701C3A, 0xB38E8C33, + 0xB1C0FD4C, 0x917BDD64, 0x9B7624C8, 0x3BB45432, 0xCAF53EA6, 0x23BA4442, + 0x38532A3A, 0x4E677D2C, 0x45036C7A, 0x0BFD64B6, 0x5E0DD902, 0xC68A007E, + 0xF44182E1, 0x4DB5A851, 0x7F88A46B, 0x8EC9B55A, 0xCEC97DCF, 0x0A8291CD, + 0xF98D0ACC, 0x2A4ECEA9, 0x7140003C, 0x1A1DB93D, 0x33CB8B7A, 0x092999A3, + 0x71AD0038, 0x6DC778F9, 0x918130C4, 0xA907600A, 0x2D9E6832, 0xED6A1E01, + 0xEFB4318A, 0x7135C886, 0x7E31CC7A, 0x87F55BA5, 0x55034004, 0x7763CF1D, + 0xD69F6D18, 0xAC7D5F42, 0xE58857B6, 0x7930E9E4, 0x164DF4FB, 0x6E6F52C3, + 0x669E1EF1, 0x25E41D2B, 0x3FD59D7C, 0x3C1B20EE, 0xFA53DDEF, 0x0ABCD06B, + 0xD5C4484E, 0x1DBF9A42, 0x9B0DEADA, 0xABC52197, 0x22363A0D, 0xE86D2BC5, + 0x9C9DF69E, 0x5CAE82AB, 0x71F54BFF, 0x64F2E21E, 0xE2D74DD3, 0xF4FD4452, + 0xBC437944, 0xB4130C93, 0x85139270, 0xAEFE1309, 0xC186D91C, 0x598CB0FA, + 0x91F7F7EE, 0x7AD91D26, 0xD6E6C907, 0x61B46FC9, 0xF99C0238, 0xBC34F4DE, + 0x6519035B, 0xDE355B3B, 0x611FCFDC, 0x886B4238, 0xC1B2EFFA, 0xC6F34A26, + 0x7D1683B2, 0xC58EF183, 0x2EC22005, 0x3BB5FCBC, 0x4C6FAD73, 0xC3FE3B1B, + 0xEEF28183, 0x8E4F1232, 0xE98583FF, 0x9172FE9C, 0x28342F61, 0xC03404CD, + 0xCDF7E2EC, 0x9E02FCE1, 0xEE0A6D70, 0x0B07A7C8, 0x6372BB19, 0xAE56EDE7, + 0xDE394DF4, 0x1D4F42A3, 0x60D7F468, 0xB96ADAB7, 0xB2C8E3FB, 0xD108A94B, + 0xB324FB61, 0xBC0AB182, 0x483A797A, 0x30ACCA4F, 0x36ADE735, 0x1DF158A1, + 0xF3EFE872, 0xE2A689DA, 0xE0E68B77, 0x984F0C70, 0x7F57C935, 0xB557135E, + 0x3DED1AF3, 0x85636555, 0x5F066ED0, 0x2433F51F, 0xD5FD6561, 0xD3DF1ED5, + 0xAEC4617A, 0xF681B202, 0x630C75D8, 0x7D2FE363, 0x249B3EF9, 0xCC939DCE, + 0x146433FB, 0xA9E13641, 0xCE2D3695, 0xD8B9C583, 0x273D3CF1, 0xAFDC5620, + 0xA2BB4A9A, 0xADF85458, 0xFFFFFFFF, 0xFFFFFFFF +}; + # else # error "unsupported BN_BITS2" # endif @@ -206,6 +487,10 @@ static const BN_ULONG dh2048_256_q[] = { OSSL_NELEM(x),\ 0, BN_FLG_STATIC_DATA }; +static const BN_ULONG value_2 = 2; + +const BIGNUM _bignum_const_2 = + { (BN_ULONG *)&value_2, 1, 1, 0, BN_FLG_STATIC_DATA }; make_dh_bn(dh1024_160_p) make_dh_bn(dh1024_160_g) @@ -217,4 +502,11 @@ make_dh_bn(dh2048_256_p) make_dh_bn(dh2048_256_g) make_dh_bn(dh2048_256_q) +make_dh_bn(ffdhe2048_p) +make_dh_bn(ffdhe3072_p) +make_dh_bn(ffdhe4096_p) +make_dh_bn(ffdhe6144_p) +make_dh_bn(ffdhe8192_p) + + #endif diff --git a/deps/openssl/openssl/crypto/bn/bn_div.c b/deps/openssl/openssl/crypto/bn/bn_div.c index 884ff29917..70add10c7d 100644 --- a/deps/openssl/openssl/crypto/bn/bn_div.c +++ b/deps/openssl/openssl/crypto/bn/bn_div.c @@ -24,17 +24,17 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, bn_check_top(d); if (BN_is_zero(d)) { BNerr(BN_F_BN_DIV, BN_R_DIV_BY_ZERO); - return (0); + return 0; } if (BN_ucmp(m, d) < 0) { if (rem != NULL) { if (BN_copy(rem, m) == NULL) - return (0); + return 0; } if (dv != NULL) BN_zero(dv); - return (1); + return 1; } BN_CTX_start(ctx); @@ -81,7 +81,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, ret = 1; end: BN_CTX_end(ctx); - return (ret); + return ret; } #else @@ -97,8 +97,6 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, * understand why...); * - divl doesn't only calculate quotient, but also leaves * remainder in %edx which we can definitely use here:-) - * - * <appro@fy.chalmers.se> */ # undef bn_div_words # define bn_div_words(n0,n1,d0) \ @@ -113,7 +111,6 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, # elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG) /* * Same story here, but it's 128-bit by 64-bit division. Wow! - * <appro@fy.chalmers.se> */ # undef bn_div_words # define bn_div_words(n0,n1,d0) \ @@ -177,28 +174,25 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, if (BN_is_zero(divisor)) { BNerr(BN_F_BN_DIV, BN_R_DIV_BY_ZERO); - return (0); + return 0; } if (!no_branch && BN_ucmp(num, divisor) < 0) { if (rm != NULL) { if (BN_copy(rm, num) == NULL) - return (0); + return 0; } if (dv != NULL) BN_zero(dv); - return (1); + return 1; } BN_CTX_start(ctx); + res = (dv == NULL) ? BN_CTX_get(ctx) : dv; tmp = BN_CTX_get(ctx); snum = BN_CTX_get(ctx); sdiv = BN_CTX_get(ctx); - if (dv == NULL) - res = BN_CTX_get(ctx); - else - res = dv; - if (sdiv == NULL || res == NULL || tmp == NULL || snum == NULL) + if (sdiv == NULL) goto err; /* First we normalise the numbers */ @@ -415,10 +409,10 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, if (no_branch) bn_correct_top(res); BN_CTX_end(ctx); - return (1); + return 1; err: bn_check_top(rm); BN_CTX_end(ctx); - return (0); + return 0; } #endif diff --git a/deps/openssl/openssl/crypto/bn/bn_err.c b/deps/openssl/openssl/crypto/bn/bn_err.c index 5fe9db9ede..dd87c152cf 100644 --- a/deps/openssl/openssl/crypto/bn/bn_err.c +++ b/deps/openssl/openssl/crypto/bn/bn_err.c @@ -1,6 +1,6 @@ /* * Generated by util/mkerr.pl DO NOT EDIT - * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -8,87 +8,99 @@ * https://www.openssl.org/source/license.html */ -#include <stdio.h> #include <openssl/err.h> -#include <openssl/bn.h> +#include <openssl/bnerr.h> -/* BEGIN ERROR CODES */ #ifndef OPENSSL_NO_ERR -# define ERR_FUNC(func) ERR_PACK(ERR_LIB_BN,func,0) -# define ERR_REASON(reason) ERR_PACK(ERR_LIB_BN,0,reason) - -static ERR_STRING_DATA BN_str_functs[] = { - {ERR_FUNC(BN_F_BNRAND), "bnrand"}, - {ERR_FUNC(BN_F_BN_BLINDING_CONVERT_EX), "BN_BLINDING_convert_ex"}, - {ERR_FUNC(BN_F_BN_BLINDING_CREATE_PARAM), "BN_BLINDING_create_param"}, - {ERR_FUNC(BN_F_BN_BLINDING_INVERT_EX), "BN_BLINDING_invert_ex"}, - {ERR_FUNC(BN_F_BN_BLINDING_NEW), "BN_BLINDING_new"}, - {ERR_FUNC(BN_F_BN_BLINDING_UPDATE), "BN_BLINDING_update"}, - {ERR_FUNC(BN_F_BN_BN2DEC), "BN_bn2dec"}, - {ERR_FUNC(BN_F_BN_BN2HEX), "BN_bn2hex"}, - {ERR_FUNC(BN_F_BN_COMPUTE_WNAF), "bn_compute_wNAF"}, - {ERR_FUNC(BN_F_BN_CTX_GET), "BN_CTX_get"}, - {ERR_FUNC(BN_F_BN_CTX_NEW), "BN_CTX_new"}, - {ERR_FUNC(BN_F_BN_CTX_START), "BN_CTX_start"}, - {ERR_FUNC(BN_F_BN_DIV), "BN_div"}, - {ERR_FUNC(BN_F_BN_DIV_RECP), "BN_div_recp"}, - {ERR_FUNC(BN_F_BN_EXP), "BN_exp"}, - {ERR_FUNC(BN_F_BN_EXPAND_INTERNAL), "bn_expand_internal"}, - {ERR_FUNC(BN_F_BN_GENCB_NEW), "BN_GENCB_new"}, - {ERR_FUNC(BN_F_BN_GENERATE_DSA_NONCE), "BN_generate_dsa_nonce"}, - {ERR_FUNC(BN_F_BN_GENERATE_PRIME_EX), "BN_generate_prime_ex"}, - {ERR_FUNC(BN_F_BN_GF2M_MOD), "BN_GF2m_mod"}, - {ERR_FUNC(BN_F_BN_GF2M_MOD_EXP), "BN_GF2m_mod_exp"}, - {ERR_FUNC(BN_F_BN_GF2M_MOD_MUL), "BN_GF2m_mod_mul"}, - {ERR_FUNC(BN_F_BN_GF2M_MOD_SOLVE_QUAD), "BN_GF2m_mod_solve_quad"}, - {ERR_FUNC(BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR), "BN_GF2m_mod_solve_quad_arr"}, - {ERR_FUNC(BN_F_BN_GF2M_MOD_SQR), "BN_GF2m_mod_sqr"}, - {ERR_FUNC(BN_F_BN_GF2M_MOD_SQRT), "BN_GF2m_mod_sqrt"}, - {ERR_FUNC(BN_F_BN_LSHIFT), "BN_lshift"}, - {ERR_FUNC(BN_F_BN_MOD_EXP2_MONT), "BN_mod_exp2_mont"}, - {ERR_FUNC(BN_F_BN_MOD_EXP_MONT), "BN_mod_exp_mont"}, - {ERR_FUNC(BN_F_BN_MOD_EXP_MONT_CONSTTIME), "BN_mod_exp_mont_consttime"}, - {ERR_FUNC(BN_F_BN_MOD_EXP_MONT_WORD), "BN_mod_exp_mont_word"}, - {ERR_FUNC(BN_F_BN_MOD_EXP_RECP), "BN_mod_exp_recp"}, - {ERR_FUNC(BN_F_BN_MOD_EXP_SIMPLE), "BN_mod_exp_simple"}, - {ERR_FUNC(BN_F_BN_MOD_INVERSE), "BN_mod_inverse"}, - {ERR_FUNC(BN_F_BN_MOD_INVERSE_NO_BRANCH), "BN_mod_inverse_no_branch"}, - {ERR_FUNC(BN_F_BN_MOD_LSHIFT_QUICK), "BN_mod_lshift_quick"}, - {ERR_FUNC(BN_F_BN_MOD_SQRT), "BN_mod_sqrt"}, - {ERR_FUNC(BN_F_BN_MPI2BN), "BN_mpi2bn"}, - {ERR_FUNC(BN_F_BN_NEW), "BN_new"}, - {ERR_FUNC(BN_F_BN_RAND), "BN_rand"}, - {ERR_FUNC(BN_F_BN_RAND_RANGE), "BN_rand_range"}, - {ERR_FUNC(BN_F_BN_RSHIFT), "BN_rshift"}, - {ERR_FUNC(BN_F_BN_SET_WORDS), "bn_set_words"}, - {ERR_FUNC(BN_F_BN_USUB), "BN_usub"}, +static const ERR_STRING_DATA BN_str_functs[] = { + {ERR_PACK(ERR_LIB_BN, BN_F_BNRAND, 0), "bnrand"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BNRAND_RANGE, 0), "bnrand_range"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_BLINDING_CONVERT_EX, 0), + "BN_BLINDING_convert_ex"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_BLINDING_CREATE_PARAM, 0), + "BN_BLINDING_create_param"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_BLINDING_INVERT_EX, 0), + "BN_BLINDING_invert_ex"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_BLINDING_NEW, 0), "BN_BLINDING_new"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_BLINDING_UPDATE, 0), "BN_BLINDING_update"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_BN2DEC, 0), "BN_bn2dec"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_BN2HEX, 0), "BN_bn2hex"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_COMPUTE_WNAF, 0), "bn_compute_wNAF"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_CTX_GET, 0), "BN_CTX_get"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_CTX_NEW, 0), "BN_CTX_new"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_CTX_START, 0), "BN_CTX_start"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_DIV, 0), "BN_div"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_DIV_RECP, 0), "BN_div_recp"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_EXP, 0), "BN_exp"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_EXPAND_INTERNAL, 0), "bn_expand_internal"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_GENCB_NEW, 0), "BN_GENCB_new"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_GENERATE_DSA_NONCE, 0), + "BN_generate_dsa_nonce"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_GENERATE_PRIME_EX, 0), + "BN_generate_prime_ex"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD, 0), "BN_GF2m_mod"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_EXP, 0), "BN_GF2m_mod_exp"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_MUL, 0), "BN_GF2m_mod_mul"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_SOLVE_QUAD, 0), + "BN_GF2m_mod_solve_quad"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR, 0), + "BN_GF2m_mod_solve_quad_arr"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_SQR, 0), "BN_GF2m_mod_sqr"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_SQRT, 0), "BN_GF2m_mod_sqrt"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_LSHIFT, 0), "BN_lshift"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP2_MONT, 0), "BN_mod_exp2_mont"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP_MONT, 0), "BN_mod_exp_mont"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP_MONT_CONSTTIME, 0), + "BN_mod_exp_mont_consttime"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP_MONT_WORD, 0), + "BN_mod_exp_mont_word"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP_RECP, 0), "BN_mod_exp_recp"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP_SIMPLE, 0), "BN_mod_exp_simple"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_INVERSE, 0), "BN_mod_inverse"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_INVERSE_NO_BRANCH, 0), + "BN_mod_inverse_no_branch"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_LSHIFT_QUICK, 0), "BN_mod_lshift_quick"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_SQRT, 0), "BN_mod_sqrt"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_MONT_CTX_NEW, 0), "BN_MONT_CTX_new"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_MPI2BN, 0), "BN_mpi2bn"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_NEW, 0), "BN_new"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_POOL_GET, 0), "BN_POOL_get"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_RAND, 0), "BN_rand"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_RAND_RANGE, 0), "BN_rand_range"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_RECP_CTX_NEW, 0), "BN_RECP_CTX_new"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_RSHIFT, 0), "BN_rshift"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_SET_WORDS, 0), "bn_set_words"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_STACK_PUSH, 0), "BN_STACK_push"}, + {ERR_PACK(ERR_LIB_BN, BN_F_BN_USUB, 0), "BN_usub"}, {0, NULL} }; -static ERR_STRING_DATA BN_str_reasons[] = { - {ERR_REASON(BN_R_ARG2_LT_ARG3), "arg2 lt arg3"}, - {ERR_REASON(BN_R_BAD_RECIPROCAL), "bad reciprocal"}, - {ERR_REASON(BN_R_BIGNUM_TOO_LONG), "bignum too long"}, - {ERR_REASON(BN_R_BITS_TOO_SMALL), "bits too small"}, - {ERR_REASON(BN_R_CALLED_WITH_EVEN_MODULUS), "called with even modulus"}, - {ERR_REASON(BN_R_DIV_BY_ZERO), "div by zero"}, - {ERR_REASON(BN_R_ENCODING_ERROR), "encoding error"}, - {ERR_REASON(BN_R_EXPAND_ON_STATIC_BIGNUM_DATA), - "expand on static bignum data"}, - {ERR_REASON(BN_R_INPUT_NOT_REDUCED), "input not reduced"}, - {ERR_REASON(BN_R_INVALID_LENGTH), "invalid length"}, - {ERR_REASON(BN_R_INVALID_RANGE), "invalid range"}, - {ERR_REASON(BN_R_INVALID_SHIFT), "invalid shift"}, - {ERR_REASON(BN_R_NOT_A_SQUARE), "not a square"}, - {ERR_REASON(BN_R_NOT_INITIALIZED), "not initialized"}, - {ERR_REASON(BN_R_NO_INVERSE), "no inverse"}, - {ERR_REASON(BN_R_NO_SOLUTION), "no solution"}, - {ERR_REASON(BN_R_PRIVATE_KEY_TOO_LARGE), "private key too large"}, - {ERR_REASON(BN_R_P_IS_NOT_PRIME), "p is not prime"}, - {ERR_REASON(BN_R_TOO_MANY_ITERATIONS), "too many iterations"}, - {ERR_REASON(BN_R_TOO_MANY_TEMPORARY_VARIABLES), - "too many temporary variables"}, +static const ERR_STRING_DATA BN_str_reasons[] = { + {ERR_PACK(ERR_LIB_BN, 0, BN_R_ARG2_LT_ARG3), "arg2 lt arg3"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_BAD_RECIPROCAL), "bad reciprocal"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_BIGNUM_TOO_LONG), "bignum too long"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_BITS_TOO_SMALL), "bits too small"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_CALLED_WITH_EVEN_MODULUS), + "called with even modulus"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_DIV_BY_ZERO), "div by zero"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_ENCODING_ERROR), "encoding error"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_EXPAND_ON_STATIC_BIGNUM_DATA), + "expand on static bignum data"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_INPUT_NOT_REDUCED), "input not reduced"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_INVALID_LENGTH), "invalid length"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_INVALID_RANGE), "invalid range"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_INVALID_SHIFT), "invalid shift"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_NOT_A_SQUARE), "not a square"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_NOT_INITIALIZED), "not initialized"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_NO_INVERSE), "no inverse"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_NO_SOLUTION), "no solution"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_PRIVATE_KEY_TOO_LARGE), + "private key too large"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_P_IS_NOT_PRIME), "p is not prime"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_TOO_MANY_ITERATIONS), "too many iterations"}, + {ERR_PACK(ERR_LIB_BN, 0, BN_R_TOO_MANY_TEMPORARY_VARIABLES), + "too many temporary variables"}, {0, NULL} }; @@ -97,10 +109,9 @@ static ERR_STRING_DATA BN_str_reasons[] = { int ERR_load_BN_strings(void) { #ifndef OPENSSL_NO_ERR - if (ERR_func_error_string(BN_str_functs[0].error) == NULL) { - ERR_load_strings(0, BN_str_functs); - ERR_load_strings(0, BN_str_reasons); + ERR_load_strings_const(BN_str_functs); + ERR_load_strings_const(BN_str_reasons); } #endif return 1; diff --git a/deps/openssl/openssl/crypto/bn/bn_exp.c b/deps/openssl/openssl/crypto/bn/bn_exp.c index a6ad475a0b..c026ffcb33 100644 --- a/deps/openssl/openssl/crypto/bn/bn_exp.c +++ b/deps/openssl/openssl/crypto/bn/bn_exp.c @@ -51,10 +51,7 @@ int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) } BN_CTX_start(ctx); - if ((r == a) || (r == p)) - rr = BN_CTX_get(ctx); - else - rr = r; + rr = ((r == a) || (r == p)) ? BN_CTX_get(ctx) : r; v = BN_CTX_get(ctx); if (rr == NULL || v == NULL) goto err; @@ -86,7 +83,7 @@ int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) err: BN_CTX_end(ctx); bn_check_top(r); - return (ret); + return ret; } int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, @@ -134,13 +131,6 @@ int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, #define RECP_MUL_MOD #ifdef MONT_MUL_MOD - /* - * I have finally been able to take out this pre-condition of the top bit - * being set. It was caused by an error in BN_div with negatives. There - * was also another problem when for a^b%m a >= m. eay 07-May-97 - */ - /* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */ - if (BN_is_odd(m)) { # ifdef MONT_EXP_WORD if (a->top == 1 && !a->neg @@ -165,7 +155,7 @@ int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, #endif bn_check_top(r); - return (ret); + return ret; } int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, @@ -201,7 +191,7 @@ int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX_start(ctx); aa = BN_CTX_get(ctx); val[0] = BN_CTX_get(ctx); - if (!aa || !val[0]) + if (val[0] == NULL) goto err; BN_RECP_CTX_init(&recp); @@ -300,7 +290,7 @@ int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX_end(ctx); BN_RECP_CTX_free(&recp); bn_check_top(r); - return (ret); + return ret; } int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, @@ -326,7 +316,7 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, if (!BN_is_odd(m)) { BNerr(BN_F_BN_MOD_EXP_MONT, BN_R_CALLED_WITH_EVEN_MODULUS); - return (0); + return 0; } bits = BN_num_bits(p); if (bits == 0) { @@ -344,7 +334,7 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, d = BN_CTX_get(ctx); r = BN_CTX_get(ctx); val[0] = BN_CTX_get(ctx); - if (!d || !r || !val[0]) + if (val[0] == NULL) goto err; /* @@ -366,11 +356,6 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, aa = val[0]; } else aa = a; - if (BN_is_zero(aa)) { - BN_zero(rr); - ret = 1; - goto err; - } if (!bn_to_mont_fixed_top(val[0], aa, mont, ctx)) goto err; /* 1 */ @@ -481,10 +466,9 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, BN_MONT_CTX_free(mont); BN_CTX_end(ctx); bn_check_top(rr); - return (ret); + return ret; } -#if defined(SPARC_T4_MONT) static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos) { BN_ULONG ret = 0; @@ -503,7 +487,6 @@ static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos) return ret & BN_MASK2; } -#endif /* * BN_mod_exp_mont_consttime() stores the precomputed powers in a specific @@ -610,7 +593,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) { - int i, bits, ret = 0, window, wvalue; + int i, bits, ret = 0, window, wvalue, wmask, window0; int top; BN_MONT_CTX *mont = NULL; @@ -629,7 +612,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, if (!BN_is_odd(m)) { BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME, BN_R_CALLED_WITH_EVEN_MODULUS); - return (0); + return 0; } top = m->top; @@ -666,31 +649,33 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, } #ifdef RSAZ_ENABLED - /* - * If the size of the operands allow it, perform the optimized - * RSAZ exponentiation. For further information see - * crypto/bn/rsaz_exp.c and accompanying assembly modules. - */ - if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024) - && rsaz_avx2_eligible()) { - if (NULL == bn_wexpand(rr, 16)) + if (!a->neg) { + /* + * If the size of the operands allow it, perform the optimized + * RSAZ exponentiation. For further information see + * crypto/bn/rsaz_exp.c and accompanying assembly modules. + */ + if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024) + && rsaz_avx2_eligible()) { + if (NULL == bn_wexpand(rr, 16)) + goto err; + RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d, + mont->n0[0]); + rr->top = 16; + rr->neg = 0; + bn_correct_top(rr); + ret = 1; goto err; - RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d, - mont->n0[0]); - rr->top = 16; - rr->neg = 0; - bn_correct_top(rr); - ret = 1; - goto err; - } else if ((8 == a->top) && (8 == p->top) && (BN_num_bits(m) == 512)) { - if (NULL == bn_wexpand(rr, 8)) + } else if ((8 == a->top) && (8 == p->top) && (BN_num_bits(m) == 512)) { + if (NULL == bn_wexpand(rr, 8)) + goto err; + RSAZ_512_mod_exp(rr->d, a->d, p->d, m->d, mont->n0[0], mont->RR.d); + rr->top = 8; + rr->neg = 0; + bn_correct_top(rr); + ret = 1; goto err; - RSAZ_512_mod_exp(rr->d, a->d, p->d, m->d, mont->n0[0], mont->RR.d); - rr->top = 8; - rr->neg = 0; - bn_correct_top(rr); - ret = 1; - goto err; + } } #endif @@ -763,7 +748,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, /* prepare a^1 in Montgomery domain */ if (a->neg || BN_ucmp(a, m) >= 0) { - if (!BN_mod(&am, a, m, ctx)) + if (!BN_nnmod(&am, a, m, ctx)) goto err; if (!bn_to_mont_fixed_top(&am, &am, mont, ctx)) goto err; @@ -861,20 +846,27 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, top /= 2; bn_flip_t4(np, mont->N.d, top); - bits--; - for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) - wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); + /* + * The exponent may not have a whole number of fixed-size windows. + * To simplify the main loop, the initial window has between 1 and + * full-window-size bits such that what remains is always a whole + * number of windows + */ + window0 = (bits - 1) % 5 + 1; + wmask = (1 << window0) - 1; + bits -= window0; + wvalue = bn_get_bits(p, bits) & wmask; bn_gather5_t4(tmp.d, top, powerbuf, wvalue); /* * Scan the exponent one window at a time starting from the most * significant bits. */ - while (bits >= 0) { + while (bits > 0) { if (bits < stride) - stride = bits + 1; + stride = bits; bits -= stride; - wvalue = bn_get_bits(p, bits + 1); + wvalue = bn_get_bits(p, bits); if ((*pwr5_worker) (tmp.d, np, n0, powerbuf, wvalue, stride)) continue; @@ -982,32 +974,36 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, bn_scatter5(tmp.d, top, powerbuf, i); } # endif - bits--; - for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) - wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); + /* + * The exponent may not have a whole number of fixed-size windows. + * To simplify the main loop, the initial window has between 1 and + * full-window-size bits such that what remains is always a whole + * number of windows + */ + window0 = (bits - 1) % 5 + 1; + wmask = (1 << window0) - 1; + bits -= window0; + wvalue = bn_get_bits(p, bits) & wmask; bn_gather5(tmp.d, top, powerbuf, wvalue); /* * Scan the exponent one window at a time starting from the most * significant bits. */ - if (top & 7) - while (bits >= 0) { - for (wvalue = 0, i = 0; i < 5; i++, bits--) - wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); - + if (top & 7) { + while (bits > 0) { bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, - wvalue); + bn_get_bits5(p->d, bits -= 5)); + } } else { - while (bits >= 0) { - wvalue = bn_get_bits5(p->d, bits - 4); - bits -= 5; - bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue); + while (bits > 0) { + bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, + bn_get_bits5(p->d, bits -= 5)); } } @@ -1049,28 +1045,45 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, } } - bits--; - for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) - wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); + /* + * The exponent may not have a whole number of fixed-size windows. + * To simplify the main loop, the initial window has between 1 and + * full-window-size bits such that what remains is always a whole + * number of windows + */ + window0 = (bits - 1) % window + 1; + wmask = (1 << window0) - 1; + bits -= window0; + wvalue = bn_get_bits(p, bits) & wmask; if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf, wvalue, window)) goto err; + wmask = (1 << window) - 1; /* * Scan the exponent one window at a time starting from the most * significant bits. */ - while (bits >= 0) { - wvalue = 0; /* The 'value' of the window */ + while (bits > 0) { - /* Scan the window, squaring the result as we go */ - for (i = 0; i < window; i++, bits--) { + /* Square the result window-size times */ + for (i = 0; i < window; i++) if (!bn_mul_mont_fixed_top(&tmp, &tmp, &tmp, mont, ctx)) goto err; - wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); - } /* + * Get a window's worth of bits from the exponent + * This avoids calling BN_is_bit_set for each bit, which + * is not only slower but also makes each bit vulnerable to + * EM (and likely other) side-channel attacks like One&Done + * (for details see "One&Done: A Single-Decryption EM-Based + * Attack on OpenSSL's Constant-Time Blinded RSA" by M. Alam, + * H. Khan, M. Dey, N. Sinha, R. Callan, A. Zajic, and + * M. Prvulovic, in USENIX Security'18) + */ + bits -= window; + wvalue = bn_get_bits(p, bits) & wmask; + /* * Fetch the appropriate pre-computed value from the pre-buf */ if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, wvalue, @@ -1108,7 +1121,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, OPENSSL_free(powerbufFree); } BN_CTX_end(ctx); - return (ret); + return ret; } int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, @@ -1118,7 +1131,7 @@ int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, int b, bits, ret = 0; int r_is_one; BN_ULONG w, next_w; - BIGNUM *d, *r, *t; + BIGNUM *r, *t; BIGNUM *swap_tmp; #define BN_MOD_MUL_WORD(r, w, m) \ (BN_mul_word(r, (w)) && \ @@ -1149,7 +1162,7 @@ int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, if (!BN_is_odd(m)) { BNerr(BN_F_BN_MOD_EXP_MONT_WORD, BN_R_CALLED_WITH_EVEN_MODULUS); - return (0); + return 0; } if (m->top == 1) a %= m->d[0]; /* make sure that 'a' is reduced */ @@ -1172,10 +1185,9 @@ int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, } BN_CTX_start(ctx); - d = BN_CTX_get(ctx); r = BN_CTX_get(ctx); t = BN_CTX_get(ctx); - if (d == NULL || r == NULL || t == NULL) + if (t == NULL) goto err; if (in_mont != NULL) @@ -1256,7 +1268,7 @@ int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, BN_MONT_CTX_free(mont); BN_CTX_end(ctx); bn_check_top(rr); - return (ret); + return ret; } /* The old fallback, simple version :-) */ @@ -1292,7 +1304,7 @@ int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX_start(ctx); d = BN_CTX_get(ctx); val[0] = BN_CTX_get(ctx); - if (!d || !val[0]) + if (val[0] == NULL) goto err; if (!BN_nnmod(val[0], a, m, ctx)) @@ -1377,5 +1389,5 @@ int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, err: BN_CTX_end(ctx); bn_check_top(r); - return (ret); + return ret; } diff --git a/deps/openssl/openssl/crypto/bn/bn_exp2.c b/deps/openssl/openssl/crypto/bn/bn_exp2.c index 5141c21f6d..082c9286a0 100644 --- a/deps/openssl/openssl/crypto/bn/bn_exp2.c +++ b/deps/openssl/openssl/crypto/bn/bn_exp2.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2017 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -34,7 +34,7 @@ int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1, if (!(m->d[0] & 1)) { BNerr(BN_F_BN_MOD_EXP2_MONT, BN_R_CALLED_WITH_EVEN_MODULUS); - return (0); + return 0; } bits1 = BN_num_bits(p1); bits2 = BN_num_bits(p2); @@ -50,7 +50,7 @@ int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1, r = BN_CTX_get(ctx); val1[0] = BN_CTX_get(ctx); val2[0] = BN_CTX_get(ctx); - if (!d || !r || !val1[0] || !val2[0]) + if (val2[0] == NULL) goto err; if (in_mont != NULL) @@ -197,5 +197,5 @@ int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1, BN_MONT_CTX_free(mont); BN_CTX_end(ctx); bn_check_top(rr); - return (ret); + return ret; } diff --git a/deps/openssl/openssl/crypto/bn/bn_gcd.c b/deps/openssl/openssl/crypto/bn/bn_gcd.c index bed231c8fa..0091ea4e08 100644 --- a/deps/openssl/openssl/crypto/bn/bn_gcd.c +++ b/deps/openssl/openssl/crypto/bn/bn_gcd.c @@ -23,7 +23,7 @@ int BN_gcd(BIGNUM *r, const BIGNUM *in_a, const BIGNUM *in_b, BN_CTX *ctx) BN_CTX_start(ctx); a = BN_CTX_get(ctx); b = BN_CTX_get(ctx); - if (a == NULL || b == NULL) + if (b == NULL) goto err; if (BN_copy(a, in_a) == NULL) @@ -48,7 +48,7 @@ int BN_gcd(BIGNUM *r, const BIGNUM *in_a, const BIGNUM *in_b, BN_CTX *ctx) err: BN_CTX_end(ctx); bn_check_top(r); - return (ret); + return ret; } static BIGNUM *euclid(BIGNUM *a, BIGNUM *b) @@ -111,9 +111,9 @@ static BIGNUM *euclid(BIGNUM *a, BIGNUM *b) goto err; } bn_check_top(a); - return (a); + return a; err: - return (NULL); + return NULL; } /* solves ax == 1 (mod n) */ @@ -448,7 +448,7 @@ BIGNUM *int_bn_mod_inverse(BIGNUM *in, BN_free(R); BN_CTX_end(ctx); bn_check_top(ret); - return (ret); + return ret; } /* @@ -619,5 +619,5 @@ static BIGNUM *BN_mod_inverse_no_branch(BIGNUM *in, BN_free(R); BN_CTX_end(ctx); bn_check_top(ret); - return (ret); + return ret; } diff --git a/deps/openssl/openssl/crypto/bn/bn_gf2m.c b/deps/openssl/openssl/crypto/bn/bn_gf2m.c index d80f3ec940..34d8b69c1e 100644 --- a/deps/openssl/openssl/crypto/bn/bn_gf2m.c +++ b/deps/openssl/openssl/crypto/bn/bn_gf2m.c @@ -1,5 +1,6 @@ /* * Copyright 2002-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -7,17 +8,6 @@ * https://www.openssl.org/source/license.html */ -/* ==================================================================== - * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED. - * - * The Elliptic Curve Public-Key Crypto Library (ECC Code) included - * herein is developed by SUN MICROSYSTEMS, INC., and is contributed - * to the OpenSSL project. - * - * The ECC Code is licensed pursuant to the OpenSSL open source - * license provided below. - */ - #include <assert.h> #include <limits.h> #include <stdio.h> @@ -559,7 +549,8 @@ int BN_GF2m_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) * Hernandez, J.L., and Menezes, A. "Software Implementation of Elliptic * Curve Cryptography Over Binary Fields". */ -int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) +static int BN_GF2m_mod_inv_vartime(BIGNUM *r, const BIGNUM *a, + const BIGNUM *p, BN_CTX *ctx) { BIGNUM *b, *c = NULL, *u = NULL, *v = NULL, *tmp; int ret = 0; @@ -569,13 +560,11 @@ int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) BN_CTX_start(ctx); - if ((b = BN_CTX_get(ctx)) == NULL) - goto err; - if ((c = BN_CTX_get(ctx)) == NULL) - goto err; - if ((u = BN_CTX_get(ctx)) == NULL) - goto err; - if ((v = BN_CTX_get(ctx)) == NULL) + b = BN_CTX_get(ctx); + c = BN_CTX_get(ctx); + u = BN_CTX_get(ctx); + v = BN_CTX_get(ctx); + if (v == NULL) goto err; if (!BN_GF2m_mod(u, a, p)) @@ -727,6 +716,46 @@ int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) return ret; } +/*- + * Wrapper for BN_GF2m_mod_inv_vartime that blinds the input before calling. + * This is not constant time. + * But it does eliminate first order deduction on the input. + */ +int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) +{ + BIGNUM *b = NULL; + int ret = 0; + + BN_CTX_start(ctx); + if ((b = BN_CTX_get(ctx)) == NULL) + goto err; + + /* generate blinding value */ + do { + if (!BN_priv_rand(b, BN_num_bits(p) - 1, + BN_RAND_TOP_ANY, BN_RAND_BOTTOM_ANY)) + goto err; + } while (BN_is_zero(b)); + + /* r := a * b */ + if (!BN_GF2m_mod_mul(r, a, b, p, ctx)) + goto err; + + /* r := 1/(a * b) */ + if (!BN_GF2m_mod_inv_vartime(r, r, p, ctx)) + goto err; + + /* r := b/(a * b) = 1/a */ + if (!BN_GF2m_mod_mul(r, r, b, p, ctx)) + goto err; + + ret = 1; + + err: + BN_CTX_end(ctx); + return ret; +} + /* * Invert xx, reduce modulo p, and store the result in r. r could be xx. * This function calls down to the BN_GF2m_mod_inv implementation; this @@ -754,7 +783,6 @@ int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *xx, const int p[], return ret; } -# ifndef OPENSSL_SUN_GF2M_DIV /* * Divide y by x, reduce modulo p, and store the result in r. r could be x * or y, x could equal y. @@ -785,94 +813,6 @@ int BN_GF2m_mod_div(BIGNUM *r, const BIGNUM *y, const BIGNUM *x, BN_CTX_end(ctx); return ret; } -# else -/* - * Divide y by x, reduce modulo p, and store the result in r. r could be x - * or y, x could equal y. Uses algorithm Modular_Division_GF(2^m) from - * Chang-Shantz, S. "From Euclid's GCD to Montgomery Multiplication to the - * Great Divide". - */ -int BN_GF2m_mod_div(BIGNUM *r, const BIGNUM *y, const BIGNUM *x, - const BIGNUM *p, BN_CTX *ctx) -{ - BIGNUM *a, *b, *u, *v; - int ret = 0; - - bn_check_top(y); - bn_check_top(x); - bn_check_top(p); - - BN_CTX_start(ctx); - - a = BN_CTX_get(ctx); - b = BN_CTX_get(ctx); - u = BN_CTX_get(ctx); - v = BN_CTX_get(ctx); - if (v == NULL) - goto err; - - /* reduce x and y mod p */ - if (!BN_GF2m_mod(u, y, p)) - goto err; - if (!BN_GF2m_mod(a, x, p)) - goto err; - if (!BN_copy(b, p)) - goto err; - - while (!BN_is_odd(a)) { - if (!BN_rshift1(a, a)) - goto err; - if (BN_is_odd(u)) - if (!BN_GF2m_add(u, u, p)) - goto err; - if (!BN_rshift1(u, u)) - goto err; - } - - do { - if (BN_GF2m_cmp(b, a) > 0) { - if (!BN_GF2m_add(b, b, a)) - goto err; - if (!BN_GF2m_add(v, v, u)) - goto err; - do { - if (!BN_rshift1(b, b)) - goto err; - if (BN_is_odd(v)) - if (!BN_GF2m_add(v, v, p)) - goto err; - if (!BN_rshift1(v, v)) - goto err; - } while (!BN_is_odd(b)); - } else if (BN_abs_is_word(a, 1)) - break; - else { - if (!BN_GF2m_add(a, a, b)) - goto err; - if (!BN_GF2m_add(u, u, v)) - goto err; - do { - if (!BN_rshift1(a, a)) - goto err; - if (BN_is_odd(u)) - if (!BN_GF2m_add(u, u, p)) - goto err; - if (!BN_rshift1(u, u)) - goto err; - } while (!BN_is_odd(a)); - } - } while (1); - - if (!BN_copy(r, u)) - goto err; - bn_check_top(r); - ret = 1; - - err: - BN_CTX_end(ctx); - return ret; -} -# endif /* * Divide yy by xx, reduce modulo p, and store the result in r. r could be xx @@ -918,7 +858,7 @@ int BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, bn_check_top(b); if (BN_is_zero(b)) - return (BN_one(r)); + return BN_one(r); if (BN_abs_is_word(b, 1)) return (BN_copy(r, a) != NULL); @@ -1091,7 +1031,7 @@ int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a_, const int p[], if (tmp == NULL) goto err; do { - if (!BN_rand(rho, p[0], BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY)) + if (!BN_priv_rand(rho, p[0], BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY)) goto err; if (!BN_GF2m_mod_arr(rho, rho, p)) goto err; diff --git a/deps/openssl/openssl/crypto/bn/bn_intern.c b/deps/openssl/openssl/crypto/bn/bn_intern.c index 7b25927f9b..46bc97575d 100644 --- a/deps/openssl/openssl/crypto/bn/bn_intern.c +++ b/deps/openssl/openssl/crypto/bn/bn_intern.c @@ -143,11 +143,6 @@ int bn_get_top(const BIGNUM *a) return a->top; } -void bn_set_top(BIGNUM *a, int top) -{ - a->top = top; -} - int bn_get_dmax(const BIGNUM *a) { return a->dmax; @@ -202,13 +197,3 @@ int bn_set_words(BIGNUM *a, const BN_ULONG *words, int num_words) bn_correct_top(a); return 1; } - -size_t bn_sizeof_BIGNUM(void) -{ - return sizeof(BIGNUM); -} - -BIGNUM *bn_array_el(BIGNUM *base, int el) -{ - return &base[el]; -} diff --git a/deps/openssl/openssl/crypto/bn/bn_lcl.h b/deps/openssl/openssl/crypto/bn/bn_lcl.h index 4d9808f5b8..8a36db2e8b 100644 --- a/deps/openssl/openssl/crypto/bn/bn_lcl.h +++ b/deps/openssl/openssl/crypto/bn/bn_lcl.h @@ -23,10 +23,6 @@ # include "internal/bn_int.h" -#ifdef __cplusplus -extern "C" { -#endif - /* * These preprocessor symbols control various aspects of the bignum headers * and library code. They're not defined by any "normal" configuration, as @@ -156,11 +152,6 @@ extern "C" { */ # define BN_FLG_FIXED_TOP 0x10000 # ifdef BN_DEBUG_RAND -/* To avoid "make update" cvs wars due to BN_DEBUG, use some tricks */ -# ifndef RAND_bytes -int RAND_bytes(unsigned char *buf, int num); -# define BN_DEBUG_TRIX -# endif # define bn_pollute(a) \ do { \ const BIGNUM *_bnum1 = (a); \ @@ -176,9 +167,6 @@ int RAND_bytes(unsigned char *buf, int num); sizeof(*_not_const) * (_bnum1->dmax - _bnum1->top)); \ } \ } while(0) -# ifdef BN_DEBUG_TRIX -# undef RAND_bytes -# endif # else # define bn_pollute(a) # endif @@ -187,9 +175,9 @@ int RAND_bytes(unsigned char *buf, int num); const BIGNUM *_bnum2 = (a); \ if (_bnum2 != NULL) { \ int _top = _bnum2->top; \ - OPENSSL_assert((_top == 0 && !_bnum2->neg) || \ - (_top && ((_bnum2->flags & BN_FLG_FIXED_TOP) \ - || _bnum2->d[_top - 1] != 0))); \ + (void)ossl_assert((_top == 0 && !_bnum2->neg) || \ + (_top && ((_bnum2->flags & BN_FLG_FIXED_TOP) \ + || _bnum2->d[_top - 1] != 0))); \ bn_pollute(_bnum2); \ } \ } while(0) @@ -200,8 +188,8 @@ int RAND_bytes(unsigned char *buf, int num); # define bn_wcheck_size(bn, words) \ do { \ const BIGNUM *_bnum2 = (bn); \ - OPENSSL_assert((words) <= (_bnum2)->dmax && \ - (words) >= (_bnum2)->top); \ + assert((words) <= (_bnum2)->dmax && \ + (words) >= (_bnum2)->top); \ /* avoid unused variable warning with NDEBUG */ \ (void)(_bnum2); \ } while(0) @@ -370,59 +358,58 @@ struct bn_gencb_st { # if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) && !defined(PEDANTIC) /* * BN_UMULT_HIGH section. - * - * No, I'm not trying to overwhelm you when stating that the - * product of N-bit numbers is 2*N bits wide:-) No, I don't expect - * you to be impressed when I say that if the compiler doesn't - * support 2*N integer type, then you have to replace every N*N - * multiplication with 4 (N/2)*(N/2) accompanied by some shifts - * and additions which unavoidably results in severe performance - * penalties. Of course provided that the hardware is capable of - * producing 2*N result... That's when you normally start - * considering assembler implementation. However! It should be - * pointed out that some CPUs (most notably Alpha, PowerPC and - * upcoming IA-64 family:-) provide *separate* instruction - * calculating the upper half of the product placing the result - * into a general purpose register. Now *if* the compiler supports - * inline assembler, then it's not impossible to implement the - * "bignum" routines (and have the compiler optimize 'em) - * exhibiting "native" performance in C. That's what BN_UMULT_HIGH - * macro is about:-) - * - * <appro@fy.chalmers.se> + * If the compiler doesn't support 2*N integer type, then you have to + * replace every N*N multiplication with 4 (N/2)*(N/2) accompanied by some + * shifts and additions which unavoidably results in severe performance + * penalties. Of course provided that the hardware is capable of producing + * 2*N result... That's when you normally start considering assembler + * implementation. However! It should be pointed out that some CPUs (e.g., + * PowerPC, Alpha, and IA-64) provide *separate* instruction calculating + * the upper half of the product placing the result into a general + * purpose register. Now *if* the compiler supports inline assembler, + * then it's not impossible to implement the "bignum" routines (and have + * the compiler optimize 'em) exhibiting "native" performance in C. That's + * what BN_UMULT_HIGH macro is about:-) Note that more recent compilers do + * support 2*64 integer type, which is also used here. */ -# if defined(__alpha) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT)) +# if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16 && \ + (defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)) +# define BN_UMULT_HIGH(a,b) (((__uint128_t)(a)*(b))>>64) +# define BN_UMULT_LOHI(low,high,a,b) ({ \ + __uint128_t ret=(__uint128_t)(a)*(b); \ + (high)=ret>>64; (low)=ret; }) +# elif defined(__alpha) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT)) # if defined(__DECC) # include <c_asm.h> # define BN_UMULT_HIGH(a,b) (BN_ULONG)asm("umulh %a0,%a1,%v0",(a),(b)) # elif defined(__GNUC__) && __GNUC__>=2 -# define BN_UMULT_HIGH(a,b) ({ \ +# define BN_UMULT_HIGH(a,b) ({ \ register BN_ULONG ret; \ asm ("umulh %1,%2,%0" \ : "=r"(ret) \ : "r"(a), "r"(b)); \ - ret; }) + ret; }) # endif /* compiler */ -# elif defined(_ARCH_PPC) && defined(__64BIT__) && defined(SIXTY_FOUR_BIT_LONG) +# elif defined(_ARCH_PPC64) && defined(SIXTY_FOUR_BIT_LONG) # if defined(__GNUC__) && __GNUC__>=2 -# define BN_UMULT_HIGH(a,b) ({ \ +# define BN_UMULT_HIGH(a,b) ({ \ register BN_ULONG ret; \ asm ("mulhdu %0,%1,%2" \ : "=r"(ret) \ : "r"(a), "r"(b)); \ - ret; }) + ret; }) # endif /* compiler */ # elif (defined(__x86_64) || defined(__x86_64__)) && \ (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT)) # if defined(__GNUC__) && __GNUC__>=2 -# define BN_UMULT_HIGH(a,b) ({ \ +# define BN_UMULT_HIGH(a,b) ({ \ register BN_ULONG ret,discard; \ asm ("mulq %3" \ : "=a"(discard),"=d"(ret) \ : "a"(a), "g"(b) \ : "cc"); \ - ret; }) -# define BN_UMULT_LOHI(low,high,a,b) \ + ret; }) +# define BN_UMULT_LOHI(low,high,a,b) \ asm ("mulq %3" \ : "=a"(low),"=d"(high) \ : "a"(a),"g"(b) \ @@ -439,43 +426,29 @@ unsigned __int64 _umul128(unsigned __int64 a, unsigned __int64 b, # endif # elif defined(__mips) && (defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)) # if defined(__GNUC__) && __GNUC__>=2 -# if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16 - /* "h" constraint is not an option on R6 and was removed in 4.4 */ -# define BN_UMULT_HIGH(a,b) (((__uint128_t)(a)*(b))>>64) -# define BN_UMULT_LOHI(low,high,a,b) ({ \ - __uint128_t ret=(__uint128_t)(a)*(b); \ - (high)=ret>>64; (low)=ret; }) -# else -# define BN_UMULT_HIGH(a,b) ({ \ +# define BN_UMULT_HIGH(a,b) ({ \ register BN_ULONG ret; \ asm ("dmultu %1,%2" \ : "=h"(ret) \ : "r"(a), "r"(b) : "l"); \ ret; }) -# define BN_UMULT_LOHI(low,high,a,b)\ +# define BN_UMULT_LOHI(low,high,a,b) \ asm ("dmultu %2,%3" \ : "=l"(low),"=h"(high) \ : "r"(a), "r"(b)); -# endif # endif # elif defined(__aarch64__) && defined(SIXTY_FOUR_BIT_LONG) # if defined(__GNUC__) && __GNUC__>=2 -# define BN_UMULT_HIGH(a,b) ({ \ +# define BN_UMULT_HIGH(a,b) ({ \ register BN_ULONG ret; \ asm ("umulh %0,%1,%2" \ : "=r"(ret) \ : "r"(a), "r"(b)); \ - ret; }) + ret; }) # endif # endif /* cpu */ # endif /* OPENSSL_NO_ASM */ -/************************************************************* - * Using the long long type - */ -# define Lw(t) (((BN_ULONG)(t))&BN_MASK2) -# define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2) - # ifdef BN_DEBUG_RAND # define bn_clear_top2max(a) \ { \ @@ -489,6 +462,12 @@ unsigned __int64 _umul128(unsigned __int64 a, unsigned __int64 b, # endif # ifdef BN_LLONG +/******************************************************************* + * Using the long long type, has to be twice as wide as BN_ULONG... + */ +# define Lw(t) (((BN_ULONG)(t))&BN_MASK2) +# define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2) + # define mul_add(r,a,w,c) { \ BN_ULLONG t; \ t=(BN_ULLONG)w * (a) + (r) + (c); \ @@ -666,10 +645,6 @@ void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t); void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n); void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, BN_ULONG *t); -void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2, - BN_ULONG *t); -BN_ULONG bn_add_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, - int cl, int dl); BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int cl, int dl); int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, @@ -681,8 +656,6 @@ BIGNUM *int_bn_mod_inverse(BIGNUM *in, int bn_probable_prime_dh(BIGNUM *rnd, int bits, const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx); -int bn_probable_prime_dh_retry(BIGNUM *rnd, int bits, BN_CTX *ctx); -int bn_probable_prime_dh_coprime(BIGNUM *rnd, int bits, BN_CTX *ctx); static ossl_inline BIGNUM *bn_expand(BIGNUM *a, int bits) { @@ -695,8 +668,4 @@ static ossl_inline BIGNUM *bn_expand(BIGNUM *a, int bits) return bn_expand2((a),(bits+BN_BITS2-1)/BN_BITS2); } -#ifdef __cplusplus -} -#endif - #endif diff --git a/deps/openssl/openssl/crypto/bn/bn_lib.c b/deps/openssl/openssl/crypto/bn/bn_lib.c index 3f3c7bbb2f..80f910c807 100644 --- a/deps/openssl/openssl/crypto/bn/bn_lib.c +++ b/deps/openssl/openssl/crypto/bn/bn_lib.c @@ -66,15 +66,15 @@ void BN_set_params(int mult, int high, int low, int mont) int BN_get_params(int which) { if (which == 0) - return (bn_limit_bits); + return bn_limit_bits; else if (which == 1) - return (bn_limit_bits_high); + return bn_limit_bits_high; else if (which == 2) - return (bn_limit_bits_low); + return bn_limit_bits_low; else if (which == 3) - return (bn_limit_bits_mont); + return bn_limit_bits_mont; else - return (0); + return 0; } #endif @@ -84,7 +84,7 @@ const BIGNUM *BN_value_one(void) static const BIGNUM const_one = { (BN_ULONG *)&data_one, 1, 1, 0, BN_FLG_STATIC_DATA }; - return (&const_one); + return &const_one; } int BN_num_bits_word(BN_ULONG l) @@ -153,37 +153,26 @@ static void bn_free_d(BIGNUM *a) void BN_clear_free(BIGNUM *a) { - int i; - if (a == NULL) return; - bn_check_top(a); - if (a->d != NULL) { + if (a->d != NULL && !BN_get_flags(a, BN_FLG_STATIC_DATA)) { OPENSSL_cleanse(a->d, a->dmax * sizeof(a->d[0])); - if (!BN_get_flags(a, BN_FLG_STATIC_DATA)) - bn_free_d(a); + bn_free_d(a); } - i = BN_get_flags(a, BN_FLG_MALLOCED); - OPENSSL_cleanse(a, sizeof(*a)); - if (i) + if (BN_get_flags(a, BN_FLG_MALLOCED)) { + OPENSSL_cleanse(a, sizeof(*a)); OPENSSL_free(a); + } } void BN_free(BIGNUM *a) { if (a == NULL) return; - bn_check_top(a); if (!BN_get_flags(a, BN_FLG_STATIC_DATA)) bn_free_d(a); if (a->flags & BN_FLG_MALLOCED) OPENSSL_free(a); - else { -#if OPENSSL_API_COMPAT < 0x00908000L - a->flags |= BN_FLG_FREE; -#endif - a->d = NULL; - } } void bn_init(BIGNUM *a) @@ -200,11 +189,11 @@ BIGNUM *BN_new(void) if ((ret = OPENSSL_zalloc(sizeof(*ret))) == NULL) { BNerr(BN_F_BN_NEW, ERR_R_MALLOC_FAILURE); - return (NULL); + return NULL; } ret->flags = BN_FLG_MALLOCED; bn_check_top(ret); - return (ret); + return ret; } BIGNUM *BN_secure_new(void) @@ -212,16 +201,14 @@ BIGNUM *BN_new(void) BIGNUM *ret = BN_new(); if (ret != NULL) ret->flags |= BN_FLG_SECURE; - return (ret); + return ret; } /* This is used by bn_expand2() */ /* The caller MUST check that words > b->dmax before calling this */ static BN_ULONG *bn_expand_internal(const BIGNUM *b, int words) { - BN_ULONG *A, *a = NULL; - const BN_ULONG *B; - int i; + BN_ULONG *a = NULL; if (words > (INT_MAX / (4 * BN_BITS2))) { BNerr(BN_F_BN_EXPAND_INTERNAL, BN_R_BIGNUM_TOO_LONG); @@ -229,62 +216,22 @@ static BN_ULONG *bn_expand_internal(const BIGNUM *b, int words) } if (BN_get_flags(b, BN_FLG_STATIC_DATA)) { BNerr(BN_F_BN_EXPAND_INTERNAL, BN_R_EXPAND_ON_STATIC_BIGNUM_DATA); - return (NULL); + return NULL; } if (BN_get_flags(b, BN_FLG_SECURE)) - a = A = OPENSSL_secure_zalloc(words * sizeof(*a)); + a = OPENSSL_secure_zalloc(words * sizeof(*a)); else - a = A = OPENSSL_zalloc(words * sizeof(*a)); - if (A == NULL) { + a = OPENSSL_zalloc(words * sizeof(*a)); + if (a == NULL) { BNerr(BN_F_BN_EXPAND_INTERNAL, ERR_R_MALLOC_FAILURE); - return (NULL); + return NULL; } -#if 1 - B = b->d; - /* Check if the previous number needs to be copied */ - if (B != NULL) { - for (i = b->top >> 2; i > 0; i--, A += 4, B += 4) { - /* - * The fact that the loop is unrolled - * 4-wise is a tribute to Intel. It's - * the one that doesn't have enough - * registers to accommodate more data. - * I'd unroll it 8-wise otherwise:-) - * - * <appro@fy.chalmers.se> - */ - BN_ULONG a0, a1, a2, a3; - a0 = B[0]; - a1 = B[1]; - a2 = B[2]; - a3 = B[3]; - A[0] = a0; - A[1] = a1; - A[2] = a2; - A[3] = a3; - } - switch (b->top & 3) { - case 3: - A[2] = B[2]; - /* fall thru */ - case 2: - A[1] = B[1]; - /* fall thru */ - case 1: - A[0] = B[0]; - /* fall thru */ - case 0: - /* Without the "case 0" some old optimizers got this wrong. */ - ; - } - } -#else - memset(A, 0, sizeof(*A) * words); - memcpy(A, b->d, sizeof(b->d[0]) * b->top); -#endif + assert(b->top <= words); + if (b->top > 0) + memcpy(a, b->d, sizeof(*a) * b->top); - return (a); + return a; } /* @@ -333,53 +280,21 @@ BIGNUM *BN_dup(const BIGNUM *a) BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b) { - int i; - BN_ULONG *A; - const BN_ULONG *B; - bn_check_top(b); if (a == b) - return (a); + return a; if (bn_wexpand(a, b->top) == NULL) - return (NULL); - -#if 1 - A = a->d; - B = b->d; - for (i = b->top >> 2; i > 0; i--, A += 4, B += 4) { - BN_ULONG a0, a1, a2, a3; - a0 = B[0]; - a1 = B[1]; - a2 = B[2]; - a3 = B[3]; - A[0] = a0; - A[1] = a1; - A[2] = a2; - A[3] = a3; - } - /* ultrix cc workaround, see comments in bn_expand_internal */ - switch (b->top & 3) { - case 3: - A[2] = B[2]; - /* fall thru */ - case 2: - A[1] = B[1]; - /* fall thru */ - case 1: - A[0] = B[0]; - /* fall thru */ - case 0:; - } -#else - memcpy(a->d, b->d, sizeof(b->d[0]) * b->top); -#endif + return NULL; + + if (b->top > 0) + memcpy(a->d, b->d, sizeof(b->d[0]) * b->top); a->neg = b->neg; a->top = b->top; a->flags |= b->flags & BN_FLG_FIXED_TOP; bn_check_top(a); - return (a); + return a; } #define FLAGS_DATA(flags) ((flags) & (BN_FLG_STATIC_DATA \ @@ -445,13 +360,13 @@ int BN_set_word(BIGNUM *a, BN_ULONG w) { bn_check_top(a); if (bn_expand(a, (int)sizeof(BN_ULONG) * 8) == NULL) - return (0); + return 0; a->neg = 0; a->d[0] = w; a->top = (w ? 1 : 0); a->flags &= ~BN_FLG_FIXED_TOP; bn_check_top(a); - return (1); + return 1; } BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret) @@ -464,7 +379,7 @@ BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret) if (ret == NULL) ret = bn = BN_new(); if (ret == NULL) - return (NULL); + return NULL; bn_check_top(ret); /* Skip leading zero's. */ for ( ; len > 0 && *s == 0; s++, len--) @@ -472,7 +387,7 @@ BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret) n = len; if (n == 0) { ret->top = 0; - return (ret); + return ret; } i = ((n - 1) / BN_BYTES) + 1; m = ((n - 1) % (BN_BYTES)); @@ -496,7 +411,7 @@ BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret) * bit set (-ve number) */ bn_correct_top(ret); - return (ret); + return ret; } /* ignore negative */ @@ -564,7 +479,7 @@ BIGNUM *BN_lebin2bn(const unsigned char *s, int len, BIGNUM *ret) if (ret == NULL) ret = bn = BN_new(); if (ret == NULL) - return (NULL); + return NULL; bn_check_top(ret); s += len; /* Skip trailing zeroes. */ @@ -631,7 +546,7 @@ int BN_ucmp(const BIGNUM *a, const BIGNUM *b) i = a->top - b->top; if (i != 0) - return (i); + return i; ap = a->d; bp = b->d; for (i = a->top - 1; i >= 0; i--) { @@ -640,7 +555,7 @@ int BN_ucmp(const BIGNUM *a, const BIGNUM *b) if (t1 != t2) return ((t1 > t2) ? 1 : -1); } - return (0); + return 0; } int BN_cmp(const BIGNUM *a, const BIGNUM *b) @@ -651,11 +566,11 @@ int BN_cmp(const BIGNUM *a, const BIGNUM *b) if ((a == NULL) || (b == NULL)) { if (a != NULL) - return (-1); + return -1; else if (b != NULL) - return (1); + return 1; else - return (0); + return 0; } bn_check_top(a); @@ -663,9 +578,9 @@ int BN_cmp(const BIGNUM *a, const BIGNUM *b) if (a->neg != b->neg) { if (a->neg) - return (-1); + return -1; else - return (1); + return 1; } if (a->neg == 0) { gt = 1; @@ -676,18 +591,18 @@ int BN_cmp(const BIGNUM *a, const BIGNUM *b) } if (a->top > b->top) - return (gt); + return gt; if (a->top < b->top) - return (lt); + return lt; for (i = a->top - 1; i >= 0; i--) { t1 = a->d[i]; t2 = b->d[i]; if (t1 > t2) - return (gt); + return gt; if (t1 < t2) - return (lt); + return lt; } - return (0); + return 0; } int BN_set_bit(BIGNUM *a, int n) @@ -701,7 +616,7 @@ int BN_set_bit(BIGNUM *a, int n) j = n % BN_BITS2; if (a->top <= i) { if (bn_wexpand(a, i + 1) == NULL) - return (0); + return 0; for (k = a->top; k < i + 1; k++) a->d[k] = 0; a->top = i + 1; @@ -710,7 +625,7 @@ int BN_set_bit(BIGNUM *a, int n) a->d[i] |= (((BN_ULONG)1) << j); bn_check_top(a); - return (1); + return 1; } int BN_clear_bit(BIGNUM *a, int n) @@ -724,11 +639,11 @@ int BN_clear_bit(BIGNUM *a, int n) i = n / BN_BITS2; j = n % BN_BITS2; if (a->top <= i) - return (0); + return 0; a->d[i] &= (~(((BN_ULONG)1) << j)); bn_correct_top(a); - return (1); + return 1; } int BN_is_bit_set(const BIGNUM *a, int n) @@ -764,7 +679,7 @@ int BN_mask_bits(BIGNUM *a, int n) a->d[w] &= ~(BN_MASK2 << b); } bn_correct_top(a); - return (1); + return 1; } void BN_set_negative(BIGNUM *a, int b) @@ -790,7 +705,7 @@ int bn_cmp_words(const BN_ULONG *a, const BN_ULONG *b, int n) if (aa != bb) return ((aa > bb) ? 1 : -1); } - return (0); + return 0; } /* @@ -1000,7 +915,7 @@ BN_GENCB *BN_GENCB_new(void) if ((ret = OPENSSL_malloc(sizeof(*ret))) == NULL) { BNerr(BN_F_BN_GENCB_NEW, ERR_R_MALLOC_FAILURE); - return (NULL); + return NULL; } return ret; diff --git a/deps/openssl/openssl/crypto/bn/bn_mod.c b/deps/openssl/openssl/crypto/bn/bn_mod.c index 2e98035bd8..712fc8ac14 100644 --- a/deps/openssl/openssl/crypto/bn/bn_mod.c +++ b/deps/openssl/openssl/crypto/bn/bn_mod.c @@ -216,7 +216,7 @@ int BN_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, ret = 1; err: BN_CTX_end(ctx); - return (ret); + return ret; } int BN_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx) diff --git a/deps/openssl/openssl/crypto/bn/bn_mont.c b/deps/openssl/openssl/crypto/bn/bn_mont.c index 41214334b8..393d27c392 100644 --- a/deps/openssl/openssl/crypto/bn/bn_mont.c +++ b/deps/openssl/openssl/crypto/bn/bn_mont.c @@ -44,12 +44,12 @@ int bn_mul_mont_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, #if defined(OPENSSL_BN_ASM_MONT) && defined(MONT_WORD) if (num > 1 && a->top == num && b->top == num) { if (bn_wexpand(r, num) == NULL) - return (0); + return 0; if (bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num)) { r->neg = a->neg ^ b->neg; r->top = num; r->flags |= BN_FLG_FIXED_TOP; - return (1); + return 1; } } #endif @@ -81,7 +81,7 @@ int bn_mul_mont_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, ret = 1; err: BN_CTX_end(ctx); - return (ret); + return ret; } #ifdef MONT_WORD @@ -96,17 +96,18 @@ static int bn_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) nl = n->top; if (nl == 0) { ret->top = 0; - return (1); + return 1; } max = (2 * nl); /* carry is stored separately */ if (bn_wexpand(r, max) == NULL) - return (0); + return 0; r->neg ^= n->neg; np = n->d; rp = r->d; + /* clear the top words of T */ for (rtop = r->top, i = 0; i < max; i++) { v = (BN_ULONG)0 - ((i - rtop) >> (8 * sizeof(rtop) - 1)); rp[i] &= v; @@ -130,7 +131,7 @@ static int bn_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) } if (bn_wexpand(ret, nl) == NULL) - return (0); + return 0; ret->top = nl; ret->flags |= BN_FLG_FIXED_TOP; ret->neg = r->neg; @@ -154,7 +155,7 @@ static int bn_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) ap[i] = 0; } - return (1); + return 1; } #endif /* MONT_WORD */ @@ -188,7 +189,7 @@ int bn_from_mont_fixed_top(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, BN_CTX_start(ctx); t1 = BN_CTX_get(ctx); t2 = BN_CTX_get(ctx); - if (t1 == NULL || t2 == NULL) + if (t2 == NULL) goto err; if (!BN_copy(t1, a)) @@ -215,7 +216,7 @@ int bn_from_mont_fixed_top(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, err: BN_CTX_end(ctx); #endif /* MONT_WORD */ - return (retn); + return retn; } int bn_to_mont_fixed_top(BIGNUM *r, const BIGNUM *a, BN_MONT_CTX *mont, @@ -228,20 +229,22 @@ BN_MONT_CTX *BN_MONT_CTX_new(void) { BN_MONT_CTX *ret; - if ((ret = OPENSSL_malloc(sizeof(*ret))) == NULL) - return (NULL); + if ((ret = OPENSSL_malloc(sizeof(*ret))) == NULL) { + BNerr(BN_F_BN_MONT_CTX_NEW, ERR_R_MALLOC_FAILURE); + return NULL; + } BN_MONT_CTX_init(ret); ret->flags = BN_FLG_MALLOCED; - return (ret); + return ret; } void BN_MONT_CTX_init(BN_MONT_CTX *ctx) { ctx->ri = 0; - bn_init(&(ctx->RR)); - bn_init(&(ctx->N)); - bn_init(&(ctx->Ni)); + bn_init(&ctx->RR); + bn_init(&ctx->N); + bn_init(&ctx->Ni); ctx->n0[0] = ctx->n0[1] = 0; ctx->flags = 0; } @@ -250,10 +253,9 @@ void BN_MONT_CTX_free(BN_MONT_CTX *mont) { if (mont == NULL) return; - - BN_clear_free(&(mont->RR)); - BN_clear_free(&(mont->N)); - BN_clear_free(&(mont->Ni)); + BN_clear_free(&mont->RR); + BN_clear_free(&mont->N); + BN_clear_free(&mont->Ni); if (mont->flags & BN_FLG_MALLOCED) OPENSSL_free(mont); } @@ -409,7 +411,7 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from) { if (to == from) - return (to); + return to; if (!BN_copy(&(to->RR), &(from->RR))) return NULL; @@ -420,7 +422,7 @@ BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from) to->ri = from->ri; to->n0[0] = from->n0[0]; to->n0[1] = from->n0[1]; - return (to); + return to; } BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, CRYPTO_RWLOCK *lock, diff --git a/deps/openssl/openssl/crypto/bn/bn_mul.c b/deps/openssl/openssl/crypto/bn/bn_mul.c index 237d7df106..5eda65cfbb 100644 --- a/deps/openssl/openssl/crypto/bn/bn_mul.c +++ b/deps/openssl/openssl/crypto/bn/bn_mul.c @@ -154,170 +154,6 @@ BN_ULONG bn_sub_part_words(BN_ULONG *r, } #endif -BN_ULONG bn_add_part_words(BN_ULONG *r, - const BN_ULONG *a, const BN_ULONG *b, - int cl, int dl) -{ - BN_ULONG c, l, t; - - assert(cl >= 0); - c = bn_add_words(r, a, b, cl); - - if (dl == 0) - return c; - - r += cl; - a += cl; - b += cl; - - if (dl < 0) { - int save_dl = dl; - while (c) { - l = (c + b[0]) & BN_MASK2; - c = (l < c); - r[0] = l; - if (++dl >= 0) - break; - - l = (c + b[1]) & BN_MASK2; - c = (l < c); - r[1] = l; - if (++dl >= 0) - break; - - l = (c + b[2]) & BN_MASK2; - c = (l < c); - r[2] = l; - if (++dl >= 0) - break; - - l = (c + b[3]) & BN_MASK2; - c = (l < c); - r[3] = l; - if (++dl >= 0) - break; - - save_dl = dl; - b += 4; - r += 4; - } - if (dl < 0) { - if (save_dl < dl) { - switch (dl - save_dl) { - case 1: - r[1] = b[1]; - if (++dl >= 0) - break; - /* fall thru */ - case 2: - r[2] = b[2]; - if (++dl >= 0) - break; - /* fall thru */ - case 3: - r[3] = b[3]; - if (++dl >= 0) - break; - } - b += 4; - r += 4; - } - } - if (dl < 0) { - for (;;) { - r[0] = b[0]; - if (++dl >= 0) - break; - r[1] = b[1]; - if (++dl >= 0) - break; - r[2] = b[2]; - if (++dl >= 0) - break; - r[3] = b[3]; - if (++dl >= 0) - break; - - b += 4; - r += 4; - } - } - } else { - int save_dl = dl; - while (c) { - t = (a[0] + c) & BN_MASK2; - c = (t < c); - r[0] = t; - if (--dl <= 0) - break; - - t = (a[1] + c) & BN_MASK2; - c = (t < c); - r[1] = t; - if (--dl <= 0) - break; - - t = (a[2] + c) & BN_MASK2; - c = (t < c); - r[2] = t; - if (--dl <= 0) - break; - - t = (a[3] + c) & BN_MASK2; - c = (t < c); - r[3] = t; - if (--dl <= 0) - break; - - save_dl = dl; - a += 4; - r += 4; - } - if (dl > 0) { - if (save_dl > dl) { - switch (save_dl - dl) { - case 1: - r[1] = a[1]; - if (--dl <= 0) - break; - /* fall thru */ - case 2: - r[2] = a[2]; - if (--dl <= 0) - break; - /* fall thru */ - case 3: - r[3] = a[3]; - if (--dl <= 0) - break; - } - a += 4; - r += 4; - } - } - if (dl > 0) { - for (;;) { - r[0] = a[0]; - if (--dl <= 0) - break; - r[1] = a[1]; - if (--dl <= 0) - break; - r[2] = a[2]; - if (--dl <= 0) - break; - r[3] = a[3]; - if (--dl <= 0) - break; - - a += 4; - r += 4; - } - } - } - return c; -} - #ifdef BN_RECURSION /* * Karatsuba recursive multiplication algorithm (cf. Knuth, The Art of @@ -505,7 +341,6 @@ void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ break; case -3: - /* break; */ case -2: bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */ @@ -514,14 +349,12 @@ void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, case -1: case 0: case 1: - /* break; */ case 2: bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */ bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ neg = 1; break; case 3: - /* break; */ case 4: bn_sub_part_words(t, a, &(a[n]), tna, n - tna); bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); @@ -659,176 +492,6 @@ void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, bn_add_words(&(r[n]), &(r[n]), &(t[n]), n); } } - -/*- - * a and b must be the same size, which is n2. - * r needs to be n2 words and t needs to be n2*2 - * l is the low words of the output. - * t needs to be n2*3 - */ -void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2, - BN_ULONG *t) -{ - int i, n; - int c1, c2; - int neg, oneg, zero; - BN_ULONG ll, lc, *lp, *mp; - - n = n2 / 2; - - /* Calculate (al-ah)*(bh-bl) */ - neg = zero = 0; - c1 = bn_cmp_words(&(a[0]), &(a[n]), n); - c2 = bn_cmp_words(&(b[n]), &(b[0]), n); - switch (c1 * 3 + c2) { - case -4: - bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n); - bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n); - break; - case -3: - zero = 1; - break; - case -2: - bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n); - bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n); - neg = 1; - break; - case -1: - case 0: - case 1: - zero = 1; - break; - case 2: - bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n); - bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n); - neg = 1; - break; - case 3: - zero = 1; - break; - case 4: - bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n); - bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n); - break; - } - - oneg = neg; - /* t[10] = (a[0]-a[1])*(b[1]-b[0]) */ - /* r[10] = (a[1]*b[1]) */ -# ifdef BN_MUL_COMBA - if (n == 8) { - bn_mul_comba8(&(t[0]), &(r[0]), &(r[n])); - bn_mul_comba8(r, &(a[n]), &(b[n])); - } else -# endif - { - bn_mul_recursive(&(t[0]), &(r[0]), &(r[n]), n, 0, 0, &(t[n2])); - bn_mul_recursive(r, &(a[n]), &(b[n]), n, 0, 0, &(t[n2])); - } - - /*- - * s0 == low(al*bl) - * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl) - * We know s0 and s1 so the only unknown is high(al*bl) - * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl)) - * high(al*bl) == s1 - (r[0]+l[0]+t[0]) - */ - if (l != NULL) { - lp = &(t[n2 + n]); - bn_add_words(lp, &(r[0]), &(l[0]), n); - } else { - lp = &(r[0]); - } - - if (neg) - neg = (int)(bn_sub_words(&(t[n2]), lp, &(t[0]), n)); - else { - bn_add_words(&(t[n2]), lp, &(t[0]), n); - neg = 0; - } - - if (l != NULL) { - bn_sub_words(&(t[n2 + n]), &(l[n]), &(t[n2]), n); - } else { - lp = &(t[n2 + n]); - mp = &(t[n2]); - for (i = 0; i < n; i++) - lp[i] = ((~mp[i]) + 1) & BN_MASK2; - } - - /*- - * s[0] = low(al*bl) - * t[3] = high(al*bl) - * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign - * r[10] = (a[1]*b[1]) - */ - /*- - * R[10] = al*bl - * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0]) - * R[32] = ah*bh - */ - /*- - * R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow) - * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow) - * R[3]=r[1]+(carry/borrow) - */ - if (l != NULL) { - lp = &(t[n2]); - c1 = (int)(bn_add_words(lp, &(t[n2 + n]), &(l[0]), n)); - } else { - lp = &(t[n2 + n]); - c1 = 0; - } - c1 += (int)(bn_add_words(&(t[n2]), lp, &(r[0]), n)); - if (oneg) - c1 -= (int)(bn_sub_words(&(t[n2]), &(t[n2]), &(t[0]), n)); - else - c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), &(t[0]), n)); - - c2 = (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n2 + n]), n)); - c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(r[n]), n)); - if (oneg) - c2 -= (int)(bn_sub_words(&(r[0]), &(r[0]), &(t[n]), n)); - else - c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n]), n)); - - if (c1 != 0) { /* Add starting at r[0], could be +ve or -ve */ - i = 0; - if (c1 > 0) { - lc = c1; - do { - ll = (r[i] + lc) & BN_MASK2; - r[i++] = ll; - lc = (lc > ll); - } while (lc); - } else { - lc = -c1; - do { - ll = r[i]; - r[i++] = (ll - lc) & BN_MASK2; - lc = (lc > ll); - } while (lc); - } - } - if (c2 != 0) { /* Add starting at r[1] */ - i = n; - if (c2 > 0) { - lc = c2; - do { - ll = (r[i] + lc) & BN_MASK2; - r[i++] = ll; - lc = (lc > ll); - } while (lc); - } else { - lc = -c2; - do { - ll = r[i]; - r[i++] = (ll - lc) & BN_MASK2; - lc = (lc > ll); - } while (lc); - } - } -} #endif /* BN_RECURSION */ int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) @@ -863,7 +526,7 @@ int bn_mul_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) if ((al == 0) || (bl == 0)) { BN_zero(r); - return (1); + return 1; } top = al + bl; @@ -953,7 +616,7 @@ int bn_mul_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) err: bn_check_top(r); BN_CTX_end(ctx); - return (ret); + return ret; } void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) diff --git a/deps/openssl/openssl/crypto/bn/bn_nist.c b/deps/openssl/openssl/crypto/bn/bn_nist.c index 53598f97ef..dcdd321c66 100644 --- a/deps/openssl/openssl/crypto/bn/bn_nist.c +++ b/deps/openssl/openssl/crypto/bn/bn_nist.c @@ -1,5 +1,5 @@ /* - * Copyright 2002-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2002-2018 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -254,7 +254,7 @@ static void nist_cp_bn_0(BN_ULONG *dst, const BN_ULONG *src, int top, int max) int i; #ifdef BN_DEBUG - OPENSSL_assert(top <= max); + (void)ossl_assert(top <= max); #endif for (i = 0; i < top; i++) dst[i] = src[i]; diff --git a/deps/openssl/openssl/crypto/bn/bn_prime.c b/deps/openssl/openssl/crypto/bn/bn_prime.c index 616389cfa6..b91b31b1f3 100644 --- a/deps/openssl/openssl/crypto/bn/bn_prime.c +++ b/deps/openssl/openssl/crypto/bn/bn_prime.c @@ -1,7 +1,5 @@ /* - * WARNING: do not edit! - * Generated by crypto/bn/bn_prime.pl - * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -29,53 +27,6 @@ static int probable_prime_dh_safe(BIGNUM *rnd, int bits, const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx); -static const int prime_offsets[480] = { - 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, - 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, - 167, 169, 173, 179, 181, 191, 193, 197, 199, 211, 221, 223, 227, 229, - 233, 239, 241, 247, 251, 257, 263, 269, 271, 277, 281, 283, 289, 293, - 299, 307, 311, 313, 317, 323, 331, 337, 347, 349, 353, 359, 361, 367, - 373, 377, 379, 383, 389, 391, 397, 401, 403, 409, 419, 421, 431, 433, - 437, 439, 443, 449, 457, 461, 463, 467, 479, 481, 487, 491, 493, 499, - 503, 509, 521, 523, 527, 529, 533, 541, 547, 551, 557, 559, 563, 569, - 571, 577, 587, 589, 593, 599, 601, 607, 611, 613, 617, 619, 629, 631, - 641, 643, 647, 653, 659, 661, 667, 673, 677, 683, 689, 691, 697, 701, - 703, 709, 713, 719, 727, 731, 733, 739, 743, 751, 757, 761, 767, 769, - 773, 779, 787, 793, 797, 799, 809, 811, 817, 821, 823, 827, 829, 839, - 841, 851, 853, 857, 859, 863, 871, 877, 881, 883, 887, 893, 899, 901, - 907, 911, 919, 923, 929, 937, 941, 943, 947, 949, 953, 961, 967, 971, - 977, 983, 989, 991, 997, 1003, 1007, 1009, 1013, 1019, 1021, 1027, 1031, - 1033, 1037, 1039, 1049, 1051, 1061, 1063, 1069, 1073, 1079, 1081, 1087, - 1091, 1093, 1097, 1103, 1109, 1117, 1121, 1123, 1129, 1139, 1147, 1151, - 1153, 1157, 1159, 1163, 1171, 1181, 1187, 1189, 1193, 1201, 1207, 1213, - 1217, 1219, 1223, 1229, 1231, 1237, 1241, 1247, 1249, 1259, 1261, 1271, - 1273, 1277, 1279, 1283, 1289, 1291, 1297, 1301, 1303, 1307, 1313, 1319, - 1321, 1327, 1333, 1339, 1343, 1349, 1357, 1361, 1363, 1367, 1369, 1373, - 1381, 1387, 1391, 1399, 1403, 1409, 1411, 1417, 1423, 1427, 1429, 1433, - 1439, 1447, 1451, 1453, 1457, 1459, 1469, 1471, 1481, 1483, 1487, 1489, - 1493, 1499, 1501, 1511, 1513, 1517, 1523, 1531, 1537, 1541, 1543, 1549, - 1553, 1559, 1567, 1571, 1577, 1579, 1583, 1591, 1597, 1601, 1607, 1609, - 1613, 1619, 1621, 1627, 1633, 1637, 1643, 1649, 1651, 1657, 1663, 1667, - 1669, 1679, 1681, 1691, 1693, 1697, 1699, 1703, 1709, 1711, 1717, 1721, - 1723, 1733, 1739, 1741, 1747, 1751, 1753, 1759, 1763, 1769, 1777, 1781, - 1783, 1787, 1789, 1801, 1807, 1811, 1817, 1819, 1823, 1829, 1831, 1843, - 1847, 1849, 1853, 1861, 1867, 1871, 1873, 1877, 1879, 1889, 1891, 1901, - 1907, 1909, 1913, 1919, 1921, 1927, 1931, 1933, 1937, 1943, 1949, 1951, - 1957, 1961, 1963, 1973, 1979, 1987, 1993, 1997, 1999, 2003, 2011, 2017, - 2021, 2027, 2029, 2033, 2039, 2041, 2047, 2053, 2059, 2063, 2069, 2071, - 2077, 2081, 2083, 2087, 2089, 2099, 2111, 2113, 2117, 2119, 2129, 2131, - 2137, 2141, 2143, 2147, 2153, 2159, 2161, 2171, 2173, 2179, 2183, 2197, - 2201, 2203, 2207, 2209, 2213, 2221, 2227, 2231, 2237, 2239, 2243, 2249, - 2251, 2257, 2263, 2267, 2269, 2273, 2279, 2281, 2287, 2291, 2293, 2297, - 2309, 2311 -}; - -static const int prime_offset_count = 480; -static const int prime_multiplier = 2310; -static const int prime_multiplier_bits = 11; /* 2^|prime_multiplier_bits| <= - * |prime_multiplier| */ -static const int first_prime_index = 5; - int BN_GENCB_call(BN_GENCB *cb, int a, int b) { /* No callback means continue */ @@ -127,7 +78,7 @@ int BN_generate_prime_ex(BIGNUM *ret, int bits, int safe, goto err; BN_CTX_start(ctx); t = BN_CTX_get(ctx); - if (!t) + if (t == NULL) goto err; loop: /* make a random number and set the top and bottom bits */ @@ -203,26 +154,28 @@ int BN_is_prime_fasttest_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed, int i, j, ret = -1; int k; BN_CTX *ctx = NULL; - BIGNUM *A1, *A1_odd, *check; /* taken from ctx */ + BIGNUM *A1, *A1_odd, *A3, *check; /* taken from ctx */ BN_MONT_CTX *mont = NULL; - if (BN_cmp(a, BN_value_one()) <= 0) + /* Take care of the really small primes 2 & 3 */ + if (BN_is_word(a, 2) || BN_is_word(a, 3)) + return 1; + + /* Check odd and bigger than 1 */ + if (!BN_is_odd(a) || BN_cmp(a, BN_value_one()) <= 0) return 0; if (checks == BN_prime_checks) checks = BN_prime_checks_for_size(BN_num_bits(a)); /* first look for small factors */ - if (!BN_is_odd(a)) - /* a is even => a is prime if and only if a == 2 */ - return BN_is_word(a, 2); if (do_trial_division) { for (i = 1; i < NUMPRIMES; i++) { BN_ULONG mod = BN_mod_word(a, primes[i]); if (mod == (BN_ULONG)-1) goto err; if (mod == 0) - return 0; + return BN_is_word(a, primes[i]); } if (!BN_GENCB_call(cb, 1, -1)) goto err; @@ -235,20 +188,18 @@ int BN_is_prime_fasttest_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed, BN_CTX_start(ctx); A1 = BN_CTX_get(ctx); + A3 = BN_CTX_get(ctx); A1_odd = BN_CTX_get(ctx); check = BN_CTX_get(ctx); if (check == NULL) goto err; /* compute A1 := a - 1 */ - if (!BN_copy(A1, a)) + if (!BN_copy(A1, a) || !BN_sub_word(A1, 1)) goto err; - if (!BN_sub_word(A1, 1)) + /* compute A3 := a - 3 */ + if (!BN_copy(A3, a) || !BN_sub_word(A3, 3)) goto err; - if (BN_is_zero(A1)) { - ret = 0; - goto err; - } /* write A1 as A1_odd * 2^k */ k = 1; @@ -265,11 +216,9 @@ int BN_is_prime_fasttest_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed, goto err; for (i = 0; i < checks; i++) { - if (!BN_pseudo_rand_range(check, A1)) + /* 1 < check < a-1 */ + if (!BN_priv_rand_range(check, A3) || !BN_add_word(check, 2)) goto err; - if (!BN_add_word(check, 1)) - goto err; - /* now 1 <= check < a */ j = witness(check, a, A1, A1_odd, k, ctx, mont); if (j == -1) @@ -290,83 +239,6 @@ int BN_is_prime_fasttest_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed, } BN_MONT_CTX_free(mont); - return (ret); -} - -int bn_probable_prime_dh_retry(BIGNUM *rnd, int bits, BN_CTX *ctx) -{ - int i; - int ret = 0; - - loop: - if (!BN_rand(rnd, bits, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ODD)) - goto err; - - /* we now have a random number 'rand' to test. */ - - for (i = 1; i < NUMPRIMES; i++) { - /* check that rnd is a prime */ - BN_ULONG mod = BN_mod_word(rnd, (BN_ULONG)primes[i]); - if (mod == (BN_ULONG)-1) - goto err; - if (mod <= 1) { - goto loop; - } - } - ret = 1; - - err: - bn_check_top(rnd); - return (ret); -} - -int bn_probable_prime_dh_coprime(BIGNUM *rnd, int bits, BN_CTX *ctx) -{ - int i; - BIGNUM *offset_index; - BIGNUM *offset_count; - int ret = 0; - - OPENSSL_assert(bits > prime_multiplier_bits); - - BN_CTX_start(ctx); - if ((offset_index = BN_CTX_get(ctx)) == NULL) - goto err; - if ((offset_count = BN_CTX_get(ctx)) == NULL) - goto err; - - if (!BN_add_word(offset_count, prime_offset_count)) - goto err; - - loop: - if (!BN_rand(rnd, bits - prime_multiplier_bits, - BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ODD)) - goto err; - if (BN_is_bit_set(rnd, bits)) - goto loop; - if (!BN_rand_range(offset_index, offset_count)) - goto err; - - if (!BN_mul_word(rnd, prime_multiplier) - || !BN_add_word(rnd, prime_offsets[BN_get_word(offset_index)])) - goto err; - - /* we now have a random number 'rand' to test. */ - - /* skip coprimes */ - for (i = first_prime_index; i < NUMPRIMES; i++) { - /* check that rnd is a prime */ - BN_ULONG mod = BN_mod_word(rnd, (BN_ULONG)primes[i]); - if (mod == (BN_ULONG)-1) - goto err; - if (mod <= 1) - goto loop; - } - ret = 1; - - err: - BN_CTX_end(ctx); - bn_check_top(rnd); return ret; } @@ -405,8 +277,9 @@ static int probable_prime(BIGNUM *rnd, int bits, prime_t *mods) char is_single_word = bits <= BN_BITS2; again: - if (!BN_rand(rnd, bits, BN_RAND_TOP_TWO, BN_RAND_BOTTOM_ODD)) - return (0); + /* TODO: Not all primes are private */ + if (!BN_priv_rand(rnd, bits, BN_RAND_TOP_TWO, BN_RAND_BOTTOM_ODD)) + return 0; /* we now have a random number 'rnd' to test. */ for (i = 1; i < NUMPRIMES; i++) { BN_ULONG mod = BN_mod_word(rnd, (BN_ULONG)primes[i]); @@ -472,11 +345,11 @@ static int probable_prime(BIGNUM *rnd, int bits, prime_t *mods) } } if (!BN_add_word(rnd, delta)) - return (0); + return 0; if (BN_num_bits(rnd) != bits) goto again; bn_check_top(rnd); - return (1); + return 1; } int bn_probable_prime_dh(BIGNUM *rnd, int bits, @@ -525,7 +398,7 @@ int bn_probable_prime_dh(BIGNUM *rnd, int bits, err: BN_CTX_end(ctx); bn_check_top(rnd); - return (ret); + return ret; } static int probable_prime_dh_safe(BIGNUM *p, int bits, const BIGNUM *padd, @@ -592,5 +465,5 @@ static int probable_prime_dh_safe(BIGNUM *p, int bits, const BIGNUM *padd, err: BN_CTX_end(ctx); bn_check_top(p); - return (ret); + return ret; } diff --git a/deps/openssl/openssl/crypto/bn/bn_prime.h b/deps/openssl/openssl/crypto/bn/bn_prime.h index 41440fa4e1..a64c9630f3 100644 --- a/deps/openssl/openssl/crypto/bn/bn_prime.h +++ b/deps/openssl/openssl/crypto/bn/bn_prime.h @@ -2,7 +2,7 @@ * WARNING: do not edit! * Generated by crypto/bn/bn_prime.pl * - * Copyright 1998-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1998-2018 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -14,261 +14,260 @@ typedef unsigned short prime_t; # define NUMPRIMES 2048 static const prime_t primes[2048] = { - - 2, 3, 5, 7, 11, 13, 17, 19, - 23, 29, 31, 37, 41, 43, 47, 53, - 59, 61, 67, 71, 73, 79, 83, 89, - 97, 101, 103, 107, 109, 113, 127, 131, - 137, 139, 149, 151, 157, 163, 167, 173, - 179, 181, 191, 193, 197, 199, 211, 223, - 227, 229, 233, 239, 241, 251, 257, 263, - 269, 271, 277, 281, 283, 293, 307, 311, - 313, 317, 331, 337, 347, 349, 353, 359, - 367, 373, 379, 383, 389, 397, 401, 409, - 419, 421, 431, 433, 439, 443, 449, 457, - 461, 463, 467, 479, 487, 491, 499, 503, - 509, 521, 523, 541, 547, 557, 563, 569, - 571, 577, 587, 593, 599, 601, 607, 613, - 617, 619, 631, 641, 643, 647, 653, 659, - 661, 673, 677, 683, 691, 701, 709, 719, - 727, 733, 739, 743, 751, 757, 761, 769, - 773, 787, 797, 809, 811, 821, 823, 827, - 829, 839, 853, 857, 859, 863, 877, 881, - 883, 887, 907, 911, 919, 929, 937, 941, - 947, 953, 967, 971, 977, 983, 991, 997, - 1009, 1013, 1019, 1021, 1031, 1033, 1039, 1049, - 1051, 1061, 1063, 1069, 1087, 1091, 1093, 1097, - 1103, 1109, 1117, 1123, 1129, 1151, 1153, 1163, - 1171, 1181, 1187, 1193, 1201, 1213, 1217, 1223, - 1229, 1231, 1237, 1249, 1259, 1277, 1279, 1283, - 1289, 1291, 1297, 1301, 1303, 1307, 1319, 1321, - 1327, 1361, 1367, 1373, 1381, 1399, 1409, 1423, - 1427, 1429, 1433, 1439, 1447, 1451, 1453, 1459, - 1471, 1481, 1483, 1487, 1489, 1493, 1499, 1511, - 1523, 1531, 1543, 1549, 1553, 1559, 1567, 1571, - 1579, 1583, 1597, 1601, 1607, 1609, 1613, 1619, - 1621, 1627, 1637, 1657, 1663, 1667, 1669, 1693, - 1697, 1699, 1709, 1721, 1723, 1733, 1741, 1747, - 1753, 1759, 1777, 1783, 1787, 1789, 1801, 1811, - 1823, 1831, 1847, 1861, 1867, 1871, 1873, 1877, - 1879, 1889, 1901, 1907, 1913, 1931, 1933, 1949, - 1951, 1973, 1979, 1987, 1993, 1997, 1999, 2003, - 2011, 2017, 2027, 2029, 2039, 2053, 2063, 2069, - 2081, 2083, 2087, 2089, 2099, 2111, 2113, 2129, - 2131, 2137, 2141, 2143, 2153, 2161, 2179, 2203, - 2207, 2213, 2221, 2237, 2239, 2243, 2251, 2267, - 2269, 2273, 2281, 2287, 2293, 2297, 2309, 2311, - 2333, 2339, 2341, 2347, 2351, 2357, 2371, 2377, - 2381, 2383, 2389, 2393, 2399, 2411, 2417, 2423, - 2437, 2441, 2447, 2459, 2467, 2473, 2477, 2503, - 2521, 2531, 2539, 2543, 2549, 2551, 2557, 2579, - 2591, 2593, 2609, 2617, 2621, 2633, 2647, 2657, - 2659, 2663, 2671, 2677, 2683, 2687, 2689, 2693, - 2699, 2707, 2711, 2713, 2719, 2729, 2731, 2741, - 2749, 2753, 2767, 2777, 2789, 2791, 2797, 2801, - 2803, 2819, 2833, 2837, 2843, 2851, 2857, 2861, - 2879, 2887, 2897, 2903, 2909, 2917, 2927, 2939, - 2953, 2957, 2963, 2969, 2971, 2999, 3001, 3011, - 3019, 3023, 3037, 3041, 3049, 3061, 3067, 3079, - 3083, 3089, 3109, 3119, 3121, 3137, 3163, 3167, - 3169, 3181, 3187, 3191, 3203, 3209, 3217, 3221, - 3229, 3251, 3253, 3257, 3259, 3271, 3299, 3301, - 3307, 3313, 3319, 3323, 3329, 3331, 3343, 3347, - 3359, 3361, 3371, 3373, 3389, 3391, 3407, 3413, - 3433, 3449, 3457, 3461, 3463, 3467, 3469, 3491, - 3499, 3511, 3517, 3527, 3529, 3533, 3539, 3541, - 3547, 3557, 3559, 3571, 3581, 3583, 3593, 3607, - 3613, 3617, 3623, 3631, 3637, 3643, 3659, 3671, - 3673, 3677, 3691, 3697, 3701, 3709, 3719, 3727, - 3733, 3739, 3761, 3767, 3769, 3779, 3793, 3797, - 3803, 3821, 3823, 3833, 3847, 3851, 3853, 3863, - 3877, 3881, 3889, 3907, 3911, 3917, 3919, 3923, - 3929, 3931, 3943, 3947, 3967, 3989, 4001, 4003, - 4007, 4013, 4019, 4021, 4027, 4049, 4051, 4057, - 4073, 4079, 4091, 4093, 4099, 4111, 4127, 4129, - 4133, 4139, 4153, 4157, 4159, 4177, 4201, 4211, - 4217, 4219, 4229, 4231, 4241, 4243, 4253, 4259, - 4261, 4271, 4273, 4283, 4289, 4297, 4327, 4337, - 4339, 4349, 4357, 4363, 4373, 4391, 4397, 4409, - 4421, 4423, 4441, 4447, 4451, 4457, 4463, 4481, - 4483, 4493, 4507, 4513, 4517, 4519, 4523, 4547, - 4549, 4561, 4567, 4583, 4591, 4597, 4603, 4621, - 4637, 4639, 4643, 4649, 4651, 4657, 4663, 4673, - 4679, 4691, 4703, 4721, 4723, 4729, 4733, 4751, - 4759, 4783, 4787, 4789, 4793, 4799, 4801, 4813, - 4817, 4831, 4861, 4871, 4877, 4889, 4903, 4909, - 4919, 4931, 4933, 4937, 4943, 4951, 4957, 4967, - 4969, 4973, 4987, 4993, 4999, 5003, 5009, 5011, - 5021, 5023, 5039, 5051, 5059, 5077, 5081, 5087, - 5099, 5101, 5107, 5113, 5119, 5147, 5153, 5167, - 5171, 5179, 5189, 5197, 5209, 5227, 5231, 5233, - 5237, 5261, 5273, 5279, 5281, 5297, 5303, 5309, - 5323, 5333, 5347, 5351, 5381, 5387, 5393, 5399, - 5407, 5413, 5417, 5419, 5431, 5437, 5441, 5443, - 5449, 5471, 5477, 5479, 5483, 5501, 5503, 5507, - 5519, 5521, 5527, 5531, 5557, 5563, 5569, 5573, - 5581, 5591, 5623, 5639, 5641, 5647, 5651, 5653, - 5657, 5659, 5669, 5683, 5689, 5693, 5701, 5711, - 5717, 5737, 5741, 5743, 5749, 5779, 5783, 5791, - 5801, 5807, 5813, 5821, 5827, 5839, 5843, 5849, - 5851, 5857, 5861, 5867, 5869, 5879, 5881, 5897, - 5903, 5923, 5927, 5939, 5953, 5981, 5987, 6007, - 6011, 6029, 6037, 6043, 6047, 6053, 6067, 6073, - 6079, 6089, 6091, 6101, 6113, 6121, 6131, 6133, - 6143, 6151, 6163, 6173, 6197, 6199, 6203, 6211, - 6217, 6221, 6229, 6247, 6257, 6263, 6269, 6271, - 6277, 6287, 6299, 6301, 6311, 6317, 6323, 6329, - 6337, 6343, 6353, 6359, 6361, 6367, 6373, 6379, - 6389, 6397, 6421, 6427, 6449, 6451, 6469, 6473, - 6481, 6491, 6521, 6529, 6547, 6551, 6553, 6563, - 6569, 6571, 6577, 6581, 6599, 6607, 6619, 6637, - 6653, 6659, 6661, 6673, 6679, 6689, 6691, 6701, - 6703, 6709, 6719, 6733, 6737, 6761, 6763, 6779, - 6781, 6791, 6793, 6803, 6823, 6827, 6829, 6833, - 6841, 6857, 6863, 6869, 6871, 6883, 6899, 6907, - 6911, 6917, 6947, 6949, 6959, 6961, 6967, 6971, - 6977, 6983, 6991, 6997, 7001, 7013, 7019, 7027, - 7039, 7043, 7057, 7069, 7079, 7103, 7109, 7121, - 7127, 7129, 7151, 7159, 7177, 7187, 7193, 7207, - 7211, 7213, 7219, 7229, 7237, 7243, 7247, 7253, - 7283, 7297, 7307, 7309, 7321, 7331, 7333, 7349, - 7351, 7369, 7393, 7411, 7417, 7433, 7451, 7457, - 7459, 7477, 7481, 7487, 7489, 7499, 7507, 7517, - 7523, 7529, 7537, 7541, 7547, 7549, 7559, 7561, - 7573, 7577, 7583, 7589, 7591, 7603, 7607, 7621, - 7639, 7643, 7649, 7669, 7673, 7681, 7687, 7691, - 7699, 7703, 7717, 7723, 7727, 7741, 7753, 7757, - 7759, 7789, 7793, 7817, 7823, 7829, 7841, 7853, - 7867, 7873, 7877, 7879, 7883, 7901, 7907, 7919, - 7927, 7933, 7937, 7949, 7951, 7963, 7993, 8009, - 8011, 8017, 8039, 8053, 8059, 8069, 8081, 8087, - 8089, 8093, 8101, 8111, 8117, 8123, 8147, 8161, - 8167, 8171, 8179, 8191, 8209, 8219, 8221, 8231, - 8233, 8237, 8243, 8263, 8269, 8273, 8287, 8291, - 8293, 8297, 8311, 8317, 8329, 8353, 8363, 8369, - 8377, 8387, 8389, 8419, 8423, 8429, 8431, 8443, - 8447, 8461, 8467, 8501, 8513, 8521, 8527, 8537, - 8539, 8543, 8563, 8573, 8581, 8597, 8599, 8609, - 8623, 8627, 8629, 8641, 8647, 8663, 8669, 8677, - 8681, 8689, 8693, 8699, 8707, 8713, 8719, 8731, - 8737, 8741, 8747, 8753, 8761, 8779, 8783, 8803, - 8807, 8819, 8821, 8831, 8837, 8839, 8849, 8861, - 8863, 8867, 8887, 8893, 8923, 8929, 8933, 8941, - 8951, 8963, 8969, 8971, 8999, 9001, 9007, 9011, - 9013, 9029, 9041, 9043, 9049, 9059, 9067, 9091, - 9103, 9109, 9127, 9133, 9137, 9151, 9157, 9161, - 9173, 9181, 9187, 9199, 9203, 9209, 9221, 9227, - 9239, 9241, 9257, 9277, 9281, 9283, 9293, 9311, - 9319, 9323, 9337, 9341, 9343, 9349, 9371, 9377, - 9391, 9397, 9403, 9413, 9419, 9421, 9431, 9433, - 9437, 9439, 9461, 9463, 9467, 9473, 9479, 9491, - 9497, 9511, 9521, 9533, 9539, 9547, 9551, 9587, - 9601, 9613, 9619, 9623, 9629, 9631, 9643, 9649, - 9661, 9677, 9679, 9689, 9697, 9719, 9721, 9733, - 9739, 9743, 9749, 9767, 9769, 9781, 9787, 9791, - 9803, 9811, 9817, 9829, 9833, 9839, 9851, 9857, - 9859, 9871, 9883, 9887, 9901, 9907, 9923, 9929, - 9931, 9941, 9949, 9967, 9973, 10007, 10009, 10037, - 10039, 10061, 10067, 10069, 10079, 10091, 10093, 10099, - 10103, 10111, 10133, 10139, 10141, 10151, 10159, 10163, - 10169, 10177, 10181, 10193, 10211, 10223, 10243, 10247, - 10253, 10259, 10267, 10271, 10273, 10289, 10301, 10303, - 10313, 10321, 10331, 10333, 10337, 10343, 10357, 10369, - 10391, 10399, 10427, 10429, 10433, 10453, 10457, 10459, - 10463, 10477, 10487, 10499, 10501, 10513, 10529, 10531, - 10559, 10567, 10589, 10597, 10601, 10607, 10613, 10627, - 10631, 10639, 10651, 10657, 10663, 10667, 10687, 10691, - 10709, 10711, 10723, 10729, 10733, 10739, 10753, 10771, - 10781, 10789, 10799, 10831, 10837, 10847, 10853, 10859, - 10861, 10867, 10883, 10889, 10891, 10903, 10909, 10937, - 10939, 10949, 10957, 10973, 10979, 10987, 10993, 11003, - 11027, 11047, 11057, 11059, 11069, 11071, 11083, 11087, - 11093, 11113, 11117, 11119, 11131, 11149, 11159, 11161, - 11171, 11173, 11177, 11197, 11213, 11239, 11243, 11251, - 11257, 11261, 11273, 11279, 11287, 11299, 11311, 11317, - 11321, 11329, 11351, 11353, 11369, 11383, 11393, 11399, - 11411, 11423, 11437, 11443, 11447, 11467, 11471, 11483, - 11489, 11491, 11497, 11503, 11519, 11527, 11549, 11551, - 11579, 11587, 11593, 11597, 11617, 11621, 11633, 11657, - 11677, 11681, 11689, 11699, 11701, 11717, 11719, 11731, - 11743, 11777, 11779, 11783, 11789, 11801, 11807, 11813, - 11821, 11827, 11831, 11833, 11839, 11863, 11867, 11887, - 11897, 11903, 11909, 11923, 11927, 11933, 11939, 11941, - 11953, 11959, 11969, 11971, 11981, 11987, 12007, 12011, - 12037, 12041, 12043, 12049, 12071, 12073, 12097, 12101, - 12107, 12109, 12113, 12119, 12143, 12149, 12157, 12161, - 12163, 12197, 12203, 12211, 12227, 12239, 12241, 12251, - 12253, 12263, 12269, 12277, 12281, 12289, 12301, 12323, - 12329, 12343, 12347, 12373, 12377, 12379, 12391, 12401, - 12409, 12413, 12421, 12433, 12437, 12451, 12457, 12473, - 12479, 12487, 12491, 12497, 12503, 12511, 12517, 12527, - 12539, 12541, 12547, 12553, 12569, 12577, 12583, 12589, - 12601, 12611, 12613, 12619, 12637, 12641, 12647, 12653, - 12659, 12671, 12689, 12697, 12703, 12713, 12721, 12739, - 12743, 12757, 12763, 12781, 12791, 12799, 12809, 12821, - 12823, 12829, 12841, 12853, 12889, 12893, 12899, 12907, - 12911, 12917, 12919, 12923, 12941, 12953, 12959, 12967, - 12973, 12979, 12983, 13001, 13003, 13007, 13009, 13033, - 13037, 13043, 13049, 13063, 13093, 13099, 13103, 13109, - 13121, 13127, 13147, 13151, 13159, 13163, 13171, 13177, - 13183, 13187, 13217, 13219, 13229, 13241, 13249, 13259, - 13267, 13291, 13297, 13309, 13313, 13327, 13331, 13337, - 13339, 13367, 13381, 13397, 13399, 13411, 13417, 13421, - 13441, 13451, 13457, 13463, 13469, 13477, 13487, 13499, - 13513, 13523, 13537, 13553, 13567, 13577, 13591, 13597, - 13613, 13619, 13627, 13633, 13649, 13669, 13679, 13681, - 13687, 13691, 13693, 13697, 13709, 13711, 13721, 13723, - 13729, 13751, 13757, 13759, 13763, 13781, 13789, 13799, - 13807, 13829, 13831, 13841, 13859, 13873, 13877, 13879, - 13883, 13901, 13903, 13907, 13913, 13921, 13931, 13933, - 13963, 13967, 13997, 13999, 14009, 14011, 14029, 14033, - 14051, 14057, 14071, 14081, 14083, 14087, 14107, 14143, - 14149, 14153, 14159, 14173, 14177, 14197, 14207, 14221, - 14243, 14249, 14251, 14281, 14293, 14303, 14321, 14323, - 14327, 14341, 14347, 14369, 14387, 14389, 14401, 14407, - 14411, 14419, 14423, 14431, 14437, 14447, 14449, 14461, - 14479, 14489, 14503, 14519, 14533, 14537, 14543, 14549, - 14551, 14557, 14561, 14563, 14591, 14593, 14621, 14627, - 14629, 14633, 14639, 14653, 14657, 14669, 14683, 14699, - 14713, 14717, 14723, 14731, 14737, 14741, 14747, 14753, - 14759, 14767, 14771, 14779, 14783, 14797, 14813, 14821, - 14827, 14831, 14843, 14851, 14867, 14869, 14879, 14887, - 14891, 14897, 14923, 14929, 14939, 14947, 14951, 14957, - 14969, 14983, 15013, 15017, 15031, 15053, 15061, 15073, - 15077, 15083, 15091, 15101, 15107, 15121, 15131, 15137, - 15139, 15149, 15161, 15173, 15187, 15193, 15199, 15217, - 15227, 15233, 15241, 15259, 15263, 15269, 15271, 15277, - 15287, 15289, 15299, 15307, 15313, 15319, 15329, 15331, - 15349, 15359, 15361, 15373, 15377, 15383, 15391, 15401, - 15413, 15427, 15439, 15443, 15451, 15461, 15467, 15473, - 15493, 15497, 15511, 15527, 15541, 15551, 15559, 15569, - 15581, 15583, 15601, 15607, 15619, 15629, 15641, 15643, - 15647, 15649, 15661, 15667, 15671, 15679, 15683, 15727, - 15731, 15733, 15737, 15739, 15749, 15761, 15767, 15773, - 15787, 15791, 15797, 15803, 15809, 15817, 15823, 15859, - 15877, 15881, 15887, 15889, 15901, 15907, 15913, 15919, - 15923, 15937, 15959, 15971, 15973, 15991, 16001, 16007, - 16033, 16057, 16061, 16063, 16067, 16069, 16073, 16087, - 16091, 16097, 16103, 16111, 16127, 16139, 16141, 16183, - 16187, 16189, 16193, 16217, 16223, 16229, 16231, 16249, - 16253, 16267, 16273, 16301, 16319, 16333, 16339, 16349, - 16361, 16363, 16369, 16381, 16411, 16417, 16421, 16427, - 16433, 16447, 16451, 16453, 16477, 16481, 16487, 16493, - 16519, 16529, 16547, 16553, 16561, 16567, 16573, 16603, - 16607, 16619, 16631, 16633, 16649, 16651, 16657, 16661, - 16673, 16691, 16693, 16699, 16703, 16729, 16741, 16747, - 16759, 16763, 16787, 16811, 16823, 16829, 16831, 16843, - 16871, 16879, 16883, 16889, 16901, 16903, 16921, 16927, - 16931, 16937, 16943, 16963, 16979, 16981, 16987, 16993, - 17011, 17021, 17027, 17029, 17033, 17041, 17047, 17053, - 17077, 17093, 17099, 17107, 17117, 17123, 17137, 17159, - 17167, 17183, 17189, 17191, 17203, 17207, 17209, 17231, - 17239, 17257, 17291, 17293, 17299, 17317, 17321, 17327, - 17333, 17341, 17351, 17359, 17377, 17383, 17387, 17389, - 17393, 17401, 17417, 17419, 17431, 17443, 17449, 17467, - 17471, 17477, 17483, 17489, 17491, 17497, 17509, 17519, - 17539, 17551, 17569, 17573, 17579, 17581, 17597, 17599, - 17609, 17623, 17627, 17657, 17659, 17669, 17681, 17683, - 17707, 17713, 17729, 17737, 17747, 17749, 17761, 17783, - 17789, 17791, 17807, 17827, 17837, 17839, 17851, 17863, + 2, 3, 5, 7, 11, 13, 17, 19, + 23, 29, 31, 37, 41, 43, 47, 53, + 59, 61, 67, 71, 73, 79, 83, 89, + 97, 101, 103, 107, 109, 113, 127, 131, + 137, 139, 149, 151, 157, 163, 167, 173, + 179, 181, 191, 193, 197, 199, 211, 223, + 227, 229, 233, 239, 241, 251, 257, 263, + 269, 271, 277, 281, 283, 293, 307, 311, + 313, 317, 331, 337, 347, 349, 353, 359, + 367, 373, 379, 383, 389, 397, 401, 409, + 419, 421, 431, 433, 439, 443, 449, 457, + 461, 463, 467, 479, 487, 491, 499, 503, + 509, 521, 523, 541, 547, 557, 563, 569, + 571, 577, 587, 593, 599, 601, 607, 613, + 617, 619, 631, 641, 643, 647, 653, 659, + 661, 673, 677, 683, 691, 701, 709, 719, + 727, 733, 739, 743, 751, 757, 761, 769, + 773, 787, 797, 809, 811, 821, 823, 827, + 829, 839, 853, 857, 859, 863, 877, 881, + 883, 887, 907, 911, 919, 929, 937, 941, + 947, 953, 967, 971, 977, 983, 991, 997, + 1009, 1013, 1019, 1021, 1031, 1033, 1039, 1049, + 1051, 1061, 1063, 1069, 1087, 1091, 1093, 1097, + 1103, 1109, 1117, 1123, 1129, 1151, 1153, 1163, + 1171, 1181, 1187, 1193, 1201, 1213, 1217, 1223, + 1229, 1231, 1237, 1249, 1259, 1277, 1279, 1283, + 1289, 1291, 1297, 1301, 1303, 1307, 1319, 1321, + 1327, 1361, 1367, 1373, 1381, 1399, 1409, 1423, + 1427, 1429, 1433, 1439, 1447, 1451, 1453, 1459, + 1471, 1481, 1483, 1487, 1489, 1493, 1499, 1511, + 1523, 1531, 1543, 1549, 1553, 1559, 1567, 1571, + 1579, 1583, 1597, 1601, 1607, 1609, 1613, 1619, + 1621, 1627, 1637, 1657, 1663, 1667, 1669, 1693, + 1697, 1699, 1709, 1721, 1723, 1733, 1741, 1747, + 1753, 1759, 1777, 1783, 1787, 1789, 1801, 1811, + 1823, 1831, 1847, 1861, 1867, 1871, 1873, 1877, + 1879, 1889, 1901, 1907, 1913, 1931, 1933, 1949, + 1951, 1973, 1979, 1987, 1993, 1997, 1999, 2003, + 2011, 2017, 2027, 2029, 2039, 2053, 2063, 2069, + 2081, 2083, 2087, 2089, 2099, 2111, 2113, 2129, + 2131, 2137, 2141, 2143, 2153, 2161, 2179, 2203, + 2207, 2213, 2221, 2237, 2239, 2243, 2251, 2267, + 2269, 2273, 2281, 2287, 2293, 2297, 2309, 2311, + 2333, 2339, 2341, 2347, 2351, 2357, 2371, 2377, + 2381, 2383, 2389, 2393, 2399, 2411, 2417, 2423, + 2437, 2441, 2447, 2459, 2467, 2473, 2477, 2503, + 2521, 2531, 2539, 2543, 2549, 2551, 2557, 2579, + 2591, 2593, 2609, 2617, 2621, 2633, 2647, 2657, + 2659, 2663, 2671, 2677, 2683, 2687, 2689, 2693, + 2699, 2707, 2711, 2713, 2719, 2729, 2731, 2741, + 2749, 2753, 2767, 2777, 2789, 2791, 2797, 2801, + 2803, 2819, 2833, 2837, 2843, 2851, 2857, 2861, + 2879, 2887, 2897, 2903, 2909, 2917, 2927, 2939, + 2953, 2957, 2963, 2969, 2971, 2999, 3001, 3011, + 3019, 3023, 3037, 3041, 3049, 3061, 3067, 3079, + 3083, 3089, 3109, 3119, 3121, 3137, 3163, 3167, + 3169, 3181, 3187, 3191, 3203, 3209, 3217, 3221, + 3229, 3251, 3253, 3257, 3259, 3271, 3299, 3301, + 3307, 3313, 3319, 3323, 3329, 3331, 3343, 3347, + 3359, 3361, 3371, 3373, 3389, 3391, 3407, 3413, + 3433, 3449, 3457, 3461, 3463, 3467, 3469, 3491, + 3499, 3511, 3517, 3527, 3529, 3533, 3539, 3541, + 3547, 3557, 3559, 3571, 3581, 3583, 3593, 3607, + 3613, 3617, 3623, 3631, 3637, 3643, 3659, 3671, + 3673, 3677, 3691, 3697, 3701, 3709, 3719, 3727, + 3733, 3739, 3761, 3767, 3769, 3779, 3793, 3797, + 3803, 3821, 3823, 3833, 3847, 3851, 3853, 3863, + 3877, 3881, 3889, 3907, 3911, 3917, 3919, 3923, + 3929, 3931, 3943, 3947, 3967, 3989, 4001, 4003, + 4007, 4013, 4019, 4021, 4027, 4049, 4051, 4057, + 4073, 4079, 4091, 4093, 4099, 4111, 4127, 4129, + 4133, 4139, 4153, 4157, 4159, 4177, 4201, 4211, + 4217, 4219, 4229, 4231, 4241, 4243, 4253, 4259, + 4261, 4271, 4273, 4283, 4289, 4297, 4327, 4337, + 4339, 4349, 4357, 4363, 4373, 4391, 4397, 4409, + 4421, 4423, 4441, 4447, 4451, 4457, 4463, 4481, + 4483, 4493, 4507, 4513, 4517, 4519, 4523, 4547, + 4549, 4561, 4567, 4583, 4591, 4597, 4603, 4621, + 4637, 4639, 4643, 4649, 4651, 4657, 4663, 4673, + 4679, 4691, 4703, 4721, 4723, 4729, 4733, 4751, + 4759, 4783, 4787, 4789, 4793, 4799, 4801, 4813, + 4817, 4831, 4861, 4871, 4877, 4889, 4903, 4909, + 4919, 4931, 4933, 4937, 4943, 4951, 4957, 4967, + 4969, 4973, 4987, 4993, 4999, 5003, 5009, 5011, + 5021, 5023, 5039, 5051, 5059, 5077, 5081, 5087, + 5099, 5101, 5107, 5113, 5119, 5147, 5153, 5167, + 5171, 5179, 5189, 5197, 5209, 5227, 5231, 5233, + 5237, 5261, 5273, 5279, 5281, 5297, 5303, 5309, + 5323, 5333, 5347, 5351, 5381, 5387, 5393, 5399, + 5407, 5413, 5417, 5419, 5431, 5437, 5441, 5443, + 5449, 5471, 5477, 5479, 5483, 5501, 5503, 5507, + 5519, 5521, 5527, 5531, 5557, 5563, 5569, 5573, + 5581, 5591, 5623, 5639, 5641, 5647, 5651, 5653, + 5657, 5659, 5669, 5683, 5689, 5693, 5701, 5711, + 5717, 5737, 5741, 5743, 5749, 5779, 5783, 5791, + 5801, 5807, 5813, 5821, 5827, 5839, 5843, 5849, + 5851, 5857, 5861, 5867, 5869, 5879, 5881, 5897, + 5903, 5923, 5927, 5939, 5953, 5981, 5987, 6007, + 6011, 6029, 6037, 6043, 6047, 6053, 6067, 6073, + 6079, 6089, 6091, 6101, 6113, 6121, 6131, 6133, + 6143, 6151, 6163, 6173, 6197, 6199, 6203, 6211, + 6217, 6221, 6229, 6247, 6257, 6263, 6269, 6271, + 6277, 6287, 6299, 6301, 6311, 6317, 6323, 6329, + 6337, 6343, 6353, 6359, 6361, 6367, 6373, 6379, + 6389, 6397, 6421, 6427, 6449, 6451, 6469, 6473, + 6481, 6491, 6521, 6529, 6547, 6551, 6553, 6563, + 6569, 6571, 6577, 6581, 6599, 6607, 6619, 6637, + 6653, 6659, 6661, 6673, 6679, 6689, 6691, 6701, + 6703, 6709, 6719, 6733, 6737, 6761, 6763, 6779, + 6781, 6791, 6793, 6803, 6823, 6827, 6829, 6833, + 6841, 6857, 6863, 6869, 6871, 6883, 6899, 6907, + 6911, 6917, 6947, 6949, 6959, 6961, 6967, 6971, + 6977, 6983, 6991, 6997, 7001, 7013, 7019, 7027, + 7039, 7043, 7057, 7069, 7079, 7103, 7109, 7121, + 7127, 7129, 7151, 7159, 7177, 7187, 7193, 7207, + 7211, 7213, 7219, 7229, 7237, 7243, 7247, 7253, + 7283, 7297, 7307, 7309, 7321, 7331, 7333, 7349, + 7351, 7369, 7393, 7411, 7417, 7433, 7451, 7457, + 7459, 7477, 7481, 7487, 7489, 7499, 7507, 7517, + 7523, 7529, 7537, 7541, 7547, 7549, 7559, 7561, + 7573, 7577, 7583, 7589, 7591, 7603, 7607, 7621, + 7639, 7643, 7649, 7669, 7673, 7681, 7687, 7691, + 7699, 7703, 7717, 7723, 7727, 7741, 7753, 7757, + 7759, 7789, 7793, 7817, 7823, 7829, 7841, 7853, + 7867, 7873, 7877, 7879, 7883, 7901, 7907, 7919, + 7927, 7933, 7937, 7949, 7951, 7963, 7993, 8009, + 8011, 8017, 8039, 8053, 8059, 8069, 8081, 8087, + 8089, 8093, 8101, 8111, 8117, 8123, 8147, 8161, + 8167, 8171, 8179, 8191, 8209, 8219, 8221, 8231, + 8233, 8237, 8243, 8263, 8269, 8273, 8287, 8291, + 8293, 8297, 8311, 8317, 8329, 8353, 8363, 8369, + 8377, 8387, 8389, 8419, 8423, 8429, 8431, 8443, + 8447, 8461, 8467, 8501, 8513, 8521, 8527, 8537, + 8539, 8543, 8563, 8573, 8581, 8597, 8599, 8609, + 8623, 8627, 8629, 8641, 8647, 8663, 8669, 8677, + 8681, 8689, 8693, 8699, 8707, 8713, 8719, 8731, + 8737, 8741, 8747, 8753, 8761, 8779, 8783, 8803, + 8807, 8819, 8821, 8831, 8837, 8839, 8849, 8861, + 8863, 8867, 8887, 8893, 8923, 8929, 8933, 8941, + 8951, 8963, 8969, 8971, 8999, 9001, 9007, 9011, + 9013, 9029, 9041, 9043, 9049, 9059, 9067, 9091, + 9103, 9109, 9127, 9133, 9137, 9151, 9157, 9161, + 9173, 9181, 9187, 9199, 9203, 9209, 9221, 9227, + 9239, 9241, 9257, 9277, 9281, 9283, 9293, 9311, + 9319, 9323, 9337, 9341, 9343, 9349, 9371, 9377, + 9391, 9397, 9403, 9413, 9419, 9421, 9431, 9433, + 9437, 9439, 9461, 9463, 9467, 9473, 9479, 9491, + 9497, 9511, 9521, 9533, 9539, 9547, 9551, 9587, + 9601, 9613, 9619, 9623, 9629, 9631, 9643, 9649, + 9661, 9677, 9679, 9689, 9697, 9719, 9721, 9733, + 9739, 9743, 9749, 9767, 9769, 9781, 9787, 9791, + 9803, 9811, 9817, 9829, 9833, 9839, 9851, 9857, + 9859, 9871, 9883, 9887, 9901, 9907, 9923, 9929, + 9931, 9941, 9949, 9967, 9973, 10007, 10009, 10037, + 10039, 10061, 10067, 10069, 10079, 10091, 10093, 10099, + 10103, 10111, 10133, 10139, 10141, 10151, 10159, 10163, + 10169, 10177, 10181, 10193, 10211, 10223, 10243, 10247, + 10253, 10259, 10267, 10271, 10273, 10289, 10301, 10303, + 10313, 10321, 10331, 10333, 10337, 10343, 10357, 10369, + 10391, 10399, 10427, 10429, 10433, 10453, 10457, 10459, + 10463, 10477, 10487, 10499, 10501, 10513, 10529, 10531, + 10559, 10567, 10589, 10597, 10601, 10607, 10613, 10627, + 10631, 10639, 10651, 10657, 10663, 10667, 10687, 10691, + 10709, 10711, 10723, 10729, 10733, 10739, 10753, 10771, + 10781, 10789, 10799, 10831, 10837, 10847, 10853, 10859, + 10861, 10867, 10883, 10889, 10891, 10903, 10909, 10937, + 10939, 10949, 10957, 10973, 10979, 10987, 10993, 11003, + 11027, 11047, 11057, 11059, 11069, 11071, 11083, 11087, + 11093, 11113, 11117, 11119, 11131, 11149, 11159, 11161, + 11171, 11173, 11177, 11197, 11213, 11239, 11243, 11251, + 11257, 11261, 11273, 11279, 11287, 11299, 11311, 11317, + 11321, 11329, 11351, 11353, 11369, 11383, 11393, 11399, + 11411, 11423, 11437, 11443, 11447, 11467, 11471, 11483, + 11489, 11491, 11497, 11503, 11519, 11527, 11549, 11551, + 11579, 11587, 11593, 11597, 11617, 11621, 11633, 11657, + 11677, 11681, 11689, 11699, 11701, 11717, 11719, 11731, + 11743, 11777, 11779, 11783, 11789, 11801, 11807, 11813, + 11821, 11827, 11831, 11833, 11839, 11863, 11867, 11887, + 11897, 11903, 11909, 11923, 11927, 11933, 11939, 11941, + 11953, 11959, 11969, 11971, 11981, 11987, 12007, 12011, + 12037, 12041, 12043, 12049, 12071, 12073, 12097, 12101, + 12107, 12109, 12113, 12119, 12143, 12149, 12157, 12161, + 12163, 12197, 12203, 12211, 12227, 12239, 12241, 12251, + 12253, 12263, 12269, 12277, 12281, 12289, 12301, 12323, + 12329, 12343, 12347, 12373, 12377, 12379, 12391, 12401, + 12409, 12413, 12421, 12433, 12437, 12451, 12457, 12473, + 12479, 12487, 12491, 12497, 12503, 12511, 12517, 12527, + 12539, 12541, 12547, 12553, 12569, 12577, 12583, 12589, + 12601, 12611, 12613, 12619, 12637, 12641, 12647, 12653, + 12659, 12671, 12689, 12697, 12703, 12713, 12721, 12739, + 12743, 12757, 12763, 12781, 12791, 12799, 12809, 12821, + 12823, 12829, 12841, 12853, 12889, 12893, 12899, 12907, + 12911, 12917, 12919, 12923, 12941, 12953, 12959, 12967, + 12973, 12979, 12983, 13001, 13003, 13007, 13009, 13033, + 13037, 13043, 13049, 13063, 13093, 13099, 13103, 13109, + 13121, 13127, 13147, 13151, 13159, 13163, 13171, 13177, + 13183, 13187, 13217, 13219, 13229, 13241, 13249, 13259, + 13267, 13291, 13297, 13309, 13313, 13327, 13331, 13337, + 13339, 13367, 13381, 13397, 13399, 13411, 13417, 13421, + 13441, 13451, 13457, 13463, 13469, 13477, 13487, 13499, + 13513, 13523, 13537, 13553, 13567, 13577, 13591, 13597, + 13613, 13619, 13627, 13633, 13649, 13669, 13679, 13681, + 13687, 13691, 13693, 13697, 13709, 13711, 13721, 13723, + 13729, 13751, 13757, 13759, 13763, 13781, 13789, 13799, + 13807, 13829, 13831, 13841, 13859, 13873, 13877, 13879, + 13883, 13901, 13903, 13907, 13913, 13921, 13931, 13933, + 13963, 13967, 13997, 13999, 14009, 14011, 14029, 14033, + 14051, 14057, 14071, 14081, 14083, 14087, 14107, 14143, + 14149, 14153, 14159, 14173, 14177, 14197, 14207, 14221, + 14243, 14249, 14251, 14281, 14293, 14303, 14321, 14323, + 14327, 14341, 14347, 14369, 14387, 14389, 14401, 14407, + 14411, 14419, 14423, 14431, 14437, 14447, 14449, 14461, + 14479, 14489, 14503, 14519, 14533, 14537, 14543, 14549, + 14551, 14557, 14561, 14563, 14591, 14593, 14621, 14627, + 14629, 14633, 14639, 14653, 14657, 14669, 14683, 14699, + 14713, 14717, 14723, 14731, 14737, 14741, 14747, 14753, + 14759, 14767, 14771, 14779, 14783, 14797, 14813, 14821, + 14827, 14831, 14843, 14851, 14867, 14869, 14879, 14887, + 14891, 14897, 14923, 14929, 14939, 14947, 14951, 14957, + 14969, 14983, 15013, 15017, 15031, 15053, 15061, 15073, + 15077, 15083, 15091, 15101, 15107, 15121, 15131, 15137, + 15139, 15149, 15161, 15173, 15187, 15193, 15199, 15217, + 15227, 15233, 15241, 15259, 15263, 15269, 15271, 15277, + 15287, 15289, 15299, 15307, 15313, 15319, 15329, 15331, + 15349, 15359, 15361, 15373, 15377, 15383, 15391, 15401, + 15413, 15427, 15439, 15443, 15451, 15461, 15467, 15473, + 15493, 15497, 15511, 15527, 15541, 15551, 15559, 15569, + 15581, 15583, 15601, 15607, 15619, 15629, 15641, 15643, + 15647, 15649, 15661, 15667, 15671, 15679, 15683, 15727, + 15731, 15733, 15737, 15739, 15749, 15761, 15767, 15773, + 15787, 15791, 15797, 15803, 15809, 15817, 15823, 15859, + 15877, 15881, 15887, 15889, 15901, 15907, 15913, 15919, + 15923, 15937, 15959, 15971, 15973, 15991, 16001, 16007, + 16033, 16057, 16061, 16063, 16067, 16069, 16073, 16087, + 16091, 16097, 16103, 16111, 16127, 16139, 16141, 16183, + 16187, 16189, 16193, 16217, 16223, 16229, 16231, 16249, + 16253, 16267, 16273, 16301, 16319, 16333, 16339, 16349, + 16361, 16363, 16369, 16381, 16411, 16417, 16421, 16427, + 16433, 16447, 16451, 16453, 16477, 16481, 16487, 16493, + 16519, 16529, 16547, 16553, 16561, 16567, 16573, 16603, + 16607, 16619, 16631, 16633, 16649, 16651, 16657, 16661, + 16673, 16691, 16693, 16699, 16703, 16729, 16741, 16747, + 16759, 16763, 16787, 16811, 16823, 16829, 16831, 16843, + 16871, 16879, 16883, 16889, 16901, 16903, 16921, 16927, + 16931, 16937, 16943, 16963, 16979, 16981, 16987, 16993, + 17011, 17021, 17027, 17029, 17033, 17041, 17047, 17053, + 17077, 17093, 17099, 17107, 17117, 17123, 17137, 17159, + 17167, 17183, 17189, 17191, 17203, 17207, 17209, 17231, + 17239, 17257, 17291, 17293, 17299, 17317, 17321, 17327, + 17333, 17341, 17351, 17359, 17377, 17383, 17387, 17389, + 17393, 17401, 17417, 17419, 17431, 17443, 17449, 17467, + 17471, 17477, 17483, 17489, 17491, 17497, 17509, 17519, + 17539, 17551, 17569, 17573, 17579, 17581, 17597, 17599, + 17609, 17623, 17627, 17657, 17659, 17669, 17681, 17683, + 17707, 17713, 17729, 17737, 17747, 17749, 17761, 17783, + 17789, 17791, 17807, 17827, 17837, 17839, 17851, 17863, }; diff --git a/deps/openssl/openssl/crypto/bn/bn_prime.pl b/deps/openssl/openssl/crypto/bn/bn_prime.pl index 163d4a9d30..eeca475b93 100644 --- a/deps/openssl/openssl/crypto/bn/bn_prime.pl +++ b/deps/openssl/openssl/crypto/bn/bn_prime.pl @@ -1,17 +1,19 @@ #! /usr/bin/env perl -# Copyright 1998-2016 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 1998-2018 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy # in the file LICENSE in the source distribution or at # https://www.openssl.org/source/license.html +# Output year depends on the year of the script. +my $YEAR = [localtime([stat($0)]->[9])]->[5] + 1900; print <<"EOF"; /* * WARNING: do not edit! * Generated by crypto/bn/bn_prime.pl * - * Copyright 1998-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1998-$YEAR The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -38,9 +40,9 @@ loop: while ($#primes < $num-1) { print "typedef unsigned short prime_t;\n"; printf "# define NUMPRIMES %d\n\n", $num; -printf "static const prime_t primes[%d] = {\n", $num; +printf "static const prime_t primes[%d] = {", $num; for (my $i = 0; $i <= $#primes; $i++) { - printf "\n " if ($i % 8) == 0; - printf "%4d, ", $primes[$i]; + printf "\n " if ($i % 8) == 0; + printf " %5d,", $primes[$i]; } print "\n};\n"; diff --git a/deps/openssl/openssl/crypto/bn/bn_print.c b/deps/openssl/openssl/crypto/bn/bn_print.c index 5ffe2fc9ba..1853269d90 100644 --- a/deps/openssl/openssl/crypto/bn/bn_print.c +++ b/deps/openssl/openssl/crypto/bn/bn_print.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2017 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -8,7 +8,7 @@ */ #include <stdio.h> -#include <ctype.h> +#include "internal/ctype.h" #include <limits.h> #include "internal/cryptlib.h" #include <openssl/buffer.h> @@ -32,27 +32,27 @@ char *BN_bn2hex(const BIGNUM *a) } p = buf; if (a->neg) - *(p++) = '-'; + *p++ = '-'; for (i = a->top - 1; i >= 0; i--) { for (j = BN_BITS2 - 8; j >= 0; j -= 8) { /* strip leading zeros */ - v = ((int)(a->d[i] >> (long)j)) & 0xff; - if (z || (v != 0)) { - *(p++) = Hex[v >> 4]; - *(p++) = Hex[v & 0x0f]; + v = (int)((a->d[i] >> j) & 0xff); + if (z || v != 0) { + *p++ = Hex[v >> 4]; + *p++ = Hex[v & 0x0f]; z = 1; } } } *p = '\0'; err: - return (buf); + return buf; } /* Must 'OPENSSL_free' the returned data */ char *BN_bn2dec(const BIGNUM *a) { - int i = 0, num, ok = 0; + int i = 0, num, ok = 0, n, tbytes; char *buf = NULL; char *p; BIGNUM *t = NULL; @@ -67,22 +67,22 @@ char *BN_bn2dec(const BIGNUM *a) */ i = BN_num_bits(a) * 3; num = (i / 10 + i / 1000 + 1) + 1; + tbytes = num + 3; /* negative and terminator and one spare? */ bn_data_num = num / BN_DEC_NUM + 1; bn_data = OPENSSL_malloc(bn_data_num * sizeof(BN_ULONG)); - buf = OPENSSL_malloc(num + 3); - if ((buf == NULL) || (bn_data == NULL)) { + buf = OPENSSL_malloc(tbytes); + if (buf == NULL || bn_data == NULL) { BNerr(BN_F_BN_BN2DEC, ERR_R_MALLOC_FAILURE); goto err; } if ((t = BN_dup(a)) == NULL) goto err; -#define BUF_REMAIN (num+3 - (size_t)(p - buf)) p = buf; lp = bn_data; if (BN_is_zero(t)) { - *(p++) = '0'; - *(p++) = '\0'; + *p++ = '0'; + *p++ = '\0'; } else { if (BN_is_negative(t)) *p++ = '-'; @@ -101,14 +101,16 @@ char *BN_bn2dec(const BIGNUM *a) * the last one needs truncation. The blocks need to be reversed in * order. */ - BIO_snprintf(p, BUF_REMAIN, BN_DEC_FMT1, *lp); - while (*p) - p++; + n = BIO_snprintf(p, tbytes - (size_t)(p - buf), BN_DEC_FMT1, *lp); + if (n < 0) + goto err; + p += n; while (lp != bn_data) { lp--; - BIO_snprintf(p, BUF_REMAIN, BN_DEC_FMT2, *lp); - while (*p) - p++; + n = BIO_snprintf(p, tbytes - (size_t)(p - buf), BN_DEC_FMT2, *lp); + if (n < 0) + goto err; + p += n; } } ok = 1; @@ -128,28 +130,28 @@ int BN_hex2bn(BIGNUM **bn, const char *a) int neg = 0, h, m, i, j, k, c; int num; - if ((a == NULL) || (*a == '\0')) - return (0); + if (a == NULL || *a == '\0') + return 0; if (*a == '-') { neg = 1; a++; } - for (i = 0; i <= (INT_MAX/4) && isxdigit((unsigned char)a[i]); i++) + for (i = 0; i <= INT_MAX / 4 && ossl_isxdigit(a[i]); i++) continue; - if (i == 0 || i > INT_MAX/4) + if (i == 0 || i > INT_MAX / 4) goto err; num = i + neg; if (bn == NULL) - return (num); + return num; /* a is the start of the hex digits, and it is 'i' long */ if (*bn == NULL) { if ((ret = BN_new()) == NULL) - return (0); + return 0; } else { ret = *bn; BN_zero(ret); @@ -163,7 +165,7 @@ int BN_hex2bn(BIGNUM **bn, const char *a) m = 0; h = 0; while (j > 0) { - m = ((BN_BYTES * 2) <= j) ? (BN_BYTES * 2) : j; + m = (BN_BYTES * 2 <= j) ? BN_BYTES * 2 : j; l = 0; for (;;) { c = a[j - m]; @@ -177,7 +179,7 @@ int BN_hex2bn(BIGNUM **bn, const char *a) break; } } - j -= (BN_BYTES * 2); + j -= BN_BYTES * 2; } ret->top = h; bn_correct_top(ret); @@ -187,11 +189,11 @@ int BN_hex2bn(BIGNUM **bn, const char *a) /* Don't set the negative flag if it's zero. */ if (ret->top != 0) ret->neg = neg; - return (num); + return num; err: if (*bn == NULL) BN_free(ret); - return (0); + return 0; } int BN_dec2bn(BIGNUM **bn, const char *a) @@ -201,22 +203,22 @@ int BN_dec2bn(BIGNUM **bn, const char *a) int neg = 0, i, j; int num; - if ((a == NULL) || (*a == '\0')) - return (0); + if (a == NULL || *a == '\0') + return 0; if (*a == '-') { neg = 1; a++; } - for (i = 0; i <= (INT_MAX/4) && isdigit((unsigned char)a[i]); i++) + for (i = 0; i <= INT_MAX / 4 && ossl_isdigit(a[i]); i++) continue; - if (i == 0 || i > INT_MAX/4) + if (i == 0 || i > INT_MAX / 4) goto err; num = i + neg; if (bn == NULL) - return (num); + return num; /* * a is the start of the digits, and it is 'i' long. We chop it into @@ -224,7 +226,7 @@ int BN_dec2bn(BIGNUM **bn, const char *a) */ if (*bn == NULL) { if ((ret = BN_new()) == NULL) - return (0); + return 0; } else { ret = *bn; BN_zero(ret); @@ -234,7 +236,7 @@ int BN_dec2bn(BIGNUM **bn, const char *a) if (bn_expand(ret, i * 4) == NULL) goto err; - j = BN_DEC_NUM - (i % BN_DEC_NUM); + j = BN_DEC_NUM - i % BN_DEC_NUM; if (j == BN_DEC_NUM) j = 0; l = 0; @@ -257,11 +259,11 @@ int BN_dec2bn(BIGNUM **bn, const char *a) /* Don't set the negative flag if it's zero. */ if (ret->top != 0) ret->neg = neg; - return (num); + return num; err: if (*bn == NULL) BN_free(ret); - return (0); + return 0; } int BN_asc2bn(BIGNUM **bn, const char *a) @@ -291,11 +293,11 @@ int BN_print_fp(FILE *fp, const BIGNUM *a) int ret; if ((b = BIO_new(BIO_s_file())) == NULL) - return (0); + return 0; BIO_set_fp(b, fp, BIO_NOCLOSE); ret = BN_print(b, a); BIO_free(b); - return (ret); + return ret; } # endif @@ -304,16 +306,16 @@ int BN_print(BIO *bp, const BIGNUM *a) int i, j, v, z = 0; int ret = 0; - if ((a->neg) && (BIO_write(bp, "-", 1) != 1)) + if ((a->neg) && BIO_write(bp, "-", 1) != 1) goto end; - if (BN_is_zero(a) && (BIO_write(bp, "0", 1) != 1)) + if (BN_is_zero(a) && BIO_write(bp, "0", 1) != 1) goto end; for (i = a->top - 1; i >= 0; i--) { for (j = BN_BITS2 - 4; j >= 0; j -= 4) { /* strip leading zeros */ - v = ((int)(a->d[i] >> (long)j)) & 0x0f; - if (z || (v != 0)) { - if (BIO_write(bp, &(Hex[v]), 1) != 1) + v = (int)((a->d[i] >> j) & 0x0f); + if (z || v != 0) { + if (BIO_write(bp, &Hex[v], 1) != 1) goto end; z = 1; } @@ -321,7 +323,7 @@ int BN_print(BIO *bp, const BIGNUM *a) } ret = 1; end: - return (ret); + return ret; } char *BN_options(void) @@ -332,12 +334,12 @@ char *BN_options(void) if (!init) { init++; #ifdef BN_LLONG - BIO_snprintf(data, sizeof(data), "bn(%d,%d)", - (int)sizeof(BN_ULLONG) * 8, (int)sizeof(BN_ULONG) * 8); + BIO_snprintf(data, sizeof(data), "bn(%zu,%zu)", + sizeof(BN_ULLONG) * 8, sizeof(BN_ULONG) * 8); #else - BIO_snprintf(data, sizeof(data), "bn(%d,%d)", - (int)sizeof(BN_ULONG) * 8, (int)sizeof(BN_ULONG) * 8); + BIO_snprintf(data, sizeof(data), "bn(%zu,%zu)", + sizeof(BN_ULONG) * 8, sizeof(BN_ULONG) * 8); #endif } - return (data); + return data; } diff --git a/deps/openssl/openssl/crypto/bn/bn_rand.c b/deps/openssl/openssl/crypto/bn/bn_rand.c index 9ce4c5f606..c0d1a32292 100644 --- a/deps/openssl/openssl/crypto/bn/bn_rand.c +++ b/deps/openssl/openssl/crypto/bn/bn_rand.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -14,11 +14,14 @@ #include <openssl/rand.h> #include <openssl/sha.h> -static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom) +typedef enum bnrand_flag_e { + NORMAL, TESTING, PRIVATE +} BNRAND_FLAG; + +static int bnrand(BNRAND_FLAG flag, BIGNUM *rnd, int bits, int top, int bottom) { unsigned char *buf = NULL; - int ret = 0, bit, bytes, mask; - time_t tim; + int b, ret = 0, bit, bytes, mask; if (bits == 0) { if (top != BN_RAND_TOP_ANY || bottom != BN_RAND_BOTTOM_ANY) @@ -40,13 +43,11 @@ static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom) } /* make a random number and set the top and bottom bits */ - time(&tim); - RAND_add(&tim, sizeof(tim), 0.0); - - if (RAND_bytes(buf, bytes) <= 0) + b = flag == NORMAL ? RAND_bytes(buf, bytes) : RAND_priv_bytes(buf, bytes); + if (b <= 0) goto err; - if (pseudorand == 2) { + if (flag == TESTING) { /* * generate patterns that are more likely to trigger BN library bugs */ @@ -86,7 +87,7 @@ static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom) err: OPENSSL_clear_free(buf, bytes); bn_check_top(rnd); - return (ret); + return ret; toosmall: BNerr(BN_F_BNRAND, BN_R_BITS_TOO_SMALL); @@ -95,29 +96,27 @@ toosmall: int BN_rand(BIGNUM *rnd, int bits, int top, int bottom) { - return bnrand(0, rnd, bits, top, bottom); + return bnrand(NORMAL, rnd, bits, top, bottom); } -int BN_pseudo_rand(BIGNUM *rnd, int bits, int top, int bottom) +int BN_bntest_rand(BIGNUM *rnd, int bits, int top, int bottom) { - return bnrand(1, rnd, bits, top, bottom); + return bnrand(TESTING, rnd, bits, top, bottom); } -int BN_bntest_rand(BIGNUM *rnd, int bits, int top, int bottom) +int BN_priv_rand(BIGNUM *rnd, int bits, int top, int bottom) { - return bnrand(2, rnd, bits, top, bottom); + return bnrand(PRIVATE, rnd, bits, top, bottom); } /* random number r: 0 <= r < range */ -static int bn_rand_range(int pseudo, BIGNUM *r, const BIGNUM *range) +static int bnrand_range(BNRAND_FLAG flag, BIGNUM *r, const BIGNUM *range) { - int (*bn_rand) (BIGNUM *, int, int, int) = - pseudo ? BN_pseudo_rand : BN_rand; int n; int count = 100; if (range->neg || BN_is_zero(range)) { - BNerr(BN_F_BN_RAND_RANGE, BN_R_INVALID_RANGE); + BNerr(BN_F_BNRAND_RANGE, BN_R_INVALID_RANGE); return 0; } @@ -133,8 +132,9 @@ static int bn_rand_range(int pseudo, BIGNUM *r, const BIGNUM *range) * than range */ do { - if (!bn_rand(r, n + 1, BN_RAND_TOP_ANY, BN_RAND_BOTTOM_ANY)) + if (!bnrand(flag, r, n + 1, BN_RAND_TOP_ANY, BN_RAND_BOTTOM_ANY)) return 0; + /* * If r < 3*range, use r := r MOD range (which is either r, r - * range, or r - 2*range). Otherwise, iterate once more. Since @@ -150,7 +150,7 @@ static int bn_rand_range(int pseudo, BIGNUM *r, const BIGNUM *range) } if (!--count) { - BNerr(BN_F_BN_RAND_RANGE, BN_R_TOO_MANY_ITERATIONS); + BNerr(BN_F_BNRAND_RANGE, BN_R_TOO_MANY_ITERATIONS); return 0; } @@ -159,11 +159,11 @@ static int bn_rand_range(int pseudo, BIGNUM *r, const BIGNUM *range) } else { do { /* range = 11..._2 or range = 101..._2 */ - if (!bn_rand(r, n, BN_RAND_TOP_ANY, BN_RAND_BOTTOM_ANY)) + if (!bnrand(flag, r, n, BN_RAND_TOP_ANY, BN_RAND_BOTTOM_ANY)) return 0; if (!--count) { - BNerr(BN_F_BN_RAND_RANGE, BN_R_TOO_MANY_ITERATIONS); + BNerr(BN_F_BNRAND_RANGE, BN_R_TOO_MANY_ITERATIONS); return 0; } } @@ -176,12 +176,22 @@ static int bn_rand_range(int pseudo, BIGNUM *r, const BIGNUM *range) int BN_rand_range(BIGNUM *r, const BIGNUM *range) { - return bn_rand_range(0, r, range); + return bnrand_range(NORMAL, r, range); +} + +int BN_priv_rand_range(BIGNUM *r, const BIGNUM *range) +{ + return bnrand_range(PRIVATE, r, range); +} + +int BN_pseudo_rand(BIGNUM *rnd, int bits, int top, int bottom) +{ + return BN_rand(rnd, bits, top, bottom); } int BN_pseudo_rand_range(BIGNUM *r, const BIGNUM *range) { - return bn_rand_range(1, r, range); + return BN_rand_range(r, range); } /* @@ -229,7 +239,7 @@ int BN_generate_dsa_nonce(BIGNUM *out, const BIGNUM *range, memset(private_bytes + todo, 0, sizeof(private_bytes) - todo); for (done = 0; done < num_k_bytes;) { - if (RAND_bytes(random_bytes, sizeof(random_bytes)) != 1) + if (RAND_priv_bytes(random_bytes, sizeof(random_bytes)) != 1) goto err; SHA512_Init(&sha); SHA512_Update(&sha, &done, sizeof(done)); diff --git a/deps/openssl/openssl/crypto/bn/bn_recp.c b/deps/openssl/openssl/crypto/bn/bn_recp.c index 20585b9d4b..9ab767f42f 100644 --- a/deps/openssl/openssl/crypto/bn/bn_recp.c +++ b/deps/openssl/openssl/crypto/bn/bn_recp.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -21,22 +21,23 @@ BN_RECP_CTX *BN_RECP_CTX_new(void) { BN_RECP_CTX *ret; - if ((ret = OPENSSL_zalloc(sizeof(*ret))) == NULL) - return (NULL); + if ((ret = OPENSSL_zalloc(sizeof(*ret))) == NULL) { + BNerr(BN_F_BN_RECP_CTX_NEW, ERR_R_MALLOC_FAILURE); + return NULL; + } bn_init(&(ret->N)); bn_init(&(ret->Nr)); ret->flags = BN_FLG_MALLOCED; - return (ret); + return ret; } void BN_RECP_CTX_free(BN_RECP_CTX *recp) { if (recp == NULL) return; - - BN_free(&(recp->N)); - BN_free(&(recp->Nr)); + BN_free(&recp->N); + BN_free(&recp->Nr); if (recp->flags & BN_FLG_MALLOCED) OPENSSL_free(recp); } @@ -48,7 +49,7 @@ int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx) BN_zero(&(recp->Nr)); recp->num_bits = BN_num_bits(d); recp->shift = 0; - return (1); + return 1; } int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y, @@ -77,7 +78,7 @@ int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y, err: BN_CTX_end(ctx); bn_check_top(r); - return (ret); + return ret; } int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, @@ -87,17 +88,11 @@ int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, BIGNUM *a, *b, *d, *r; BN_CTX_start(ctx); + d = (dv != NULL) ? dv : BN_CTX_get(ctx); + r = (rem != NULL) ? rem : BN_CTX_get(ctx); a = BN_CTX_get(ctx); b = BN_CTX_get(ctx); - if (dv != NULL) - d = dv; - else - d = BN_CTX_get(ctx); - if (rem != NULL) - r = rem; - else - r = BN_CTX_get(ctx); - if (a == NULL || b == NULL || d == NULL || r == NULL) + if (b == NULL) goto err; if (BN_ucmp(m, &(recp->N)) < 0) { @@ -107,7 +102,7 @@ int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, return 0; } BN_CTX_end(ctx); - return (1); + return 1; } /* @@ -167,7 +162,7 @@ int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, BN_CTX_end(ctx); bn_check_top(dv); bn_check_top(rem); - return (ret); + return ret; } /* @@ -195,5 +190,5 @@ int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx) err: bn_check_top(r); BN_CTX_end(ctx); - return (ret); + return ret; } diff --git a/deps/openssl/openssl/crypto/bn/bn_shift.c b/deps/openssl/openssl/crypto/bn/bn_shift.c index 6a1eec80af..15d4b321ba 100644 --- a/deps/openssl/openssl/crypto/bn/bn_shift.c +++ b/deps/openssl/openssl/crypto/bn/bn_shift.c @@ -21,11 +21,11 @@ int BN_lshift1(BIGNUM *r, const BIGNUM *a) if (r != a) { r->neg = a->neg; if (bn_wexpand(r, a->top + 1) == NULL) - return (0); + return 0; r->top = a->top; } else { if (bn_wexpand(r, a->top + 1) == NULL) - return (0); + return 0; } ap = a->d; rp = r->d; @@ -40,7 +40,7 @@ int BN_lshift1(BIGNUM *r, const BIGNUM *a) r->top++; } bn_check_top(r); - return (1); + return 1; } int BN_rshift1(BIGNUM *r, const BIGNUM *a) @@ -53,14 +53,14 @@ int BN_rshift1(BIGNUM *r, const BIGNUM *a) if (BN_is_zero(a)) { BN_zero(r); - return (1); + return 1; } i = a->top; ap = a->d; j = i - (ap[i - 1] == 1); if (a != r) { if (bn_wexpand(r, j) == NULL) - return (0); + return 0; r->neg = a->neg; } rp = r->d; @@ -77,7 +77,7 @@ int BN_rshift1(BIGNUM *r, const BIGNUM *a) if (!r->top) r->neg = 0; /* don't allow negative zero */ bn_check_top(r); - return (1); + return 1; } int BN_lshift(BIGNUM *r, const BIGNUM *a, int n) @@ -96,7 +96,7 @@ int BN_lshift(BIGNUM *r, const BIGNUM *a, int n) nw = n / BN_BITS2; if (bn_wexpand(r, a->top + nw + 1) == NULL) - return (0); + return 0; r->neg = a->neg; lb = n % BN_BITS2; rb = BN_BITS2 - lb; @@ -116,7 +116,7 @@ int BN_lshift(BIGNUM *r, const BIGNUM *a, int n) r->top = a->top + nw + 1; bn_correct_top(r); bn_check_top(r); - return (1); + return 1; } int BN_rshift(BIGNUM *r, const BIGNUM *a, int n) @@ -138,12 +138,12 @@ int BN_rshift(BIGNUM *r, const BIGNUM *a, int n) lb = BN_BITS2 - rb; if (nw >= a->top || a->top == 0) { BN_zero(r); - return (1); + return 1; } i = (BN_num_bits(a) - n + (BN_BITS2 - 1)) / BN_BITS2; if (r != a) { if (bn_wexpand(r, i) == NULL) - return (0); + return 0; r->neg = a->neg; } else { if (n == 0) @@ -171,5 +171,5 @@ int BN_rshift(BIGNUM *r, const BIGNUM *a, int n) if (!r->top) r->neg = 0; /* don't allow negative zero */ bn_check_top(r); - return (1); + return 1; } diff --git a/deps/openssl/openssl/crypto/bn/bn_sqr.c b/deps/openssl/openssl/crypto/bn/bn_sqr.c index db72bf28a6..0c0a590f0c 100644 --- a/deps/openssl/openssl/crypto/bn/bn_sqr.c +++ b/deps/openssl/openssl/crypto/bn/bn_sqr.c @@ -42,7 +42,7 @@ int bn_sqr_fixed_top(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) BN_CTX_start(ctx); rr = (a != r) ? r : BN_CTX_get(ctx); tmp = BN_CTX_get(ctx); - if (!rr || !tmp) + if (rr == NULL || tmp == NULL) goto err; max = 2 * al; /* Non-zero (from above) */ @@ -102,7 +102,7 @@ int bn_sqr_fixed_top(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) bn_check_top(rr); bn_check_top(tmp); BN_CTX_end(ctx); - return (ret); + return ret; } /* tmp must have 2*n words */ diff --git a/deps/openssl/openssl/crypto/bn/bn_sqrt.c b/deps/openssl/openssl/crypto/bn/bn_sqrt.c index 84376c78e5..b97d8ca43b 100644 --- a/deps/openssl/openssl/crypto/bn/bn_sqrt.c +++ b/deps/openssl/openssl/crypto/bn/bn_sqrt.c @@ -1,5 +1,5 @@ /* - * Copyright 2000-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2000-2018 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -39,7 +39,7 @@ BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) } BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME); - return (NULL); + return NULL; } if (BN_is_zero(a) || BN_is_one(a)) { @@ -179,7 +179,7 @@ BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) if (!BN_set_word(y, i)) goto end; } else { - if (!BN_pseudo_rand(y, BN_num_bits(p), 0, 0)) + if (!BN_priv_rand(y, BN_num_bits(p), 0, 0)) goto end; if (BN_ucmp(y, p) >= 0) { if (!(p->neg ? BN_add : BN_sub) (y, y, p)) diff --git a/deps/openssl/openssl/crypto/bn/bn_srp.c b/deps/openssl/openssl/crypto/bn/bn_srp.c index 58b1691eee..27b6ebe518 100644 --- a/deps/openssl/openssl/crypto/bn/bn_srp.c +++ b/deps/openssl/openssl/crypto/bn/bn_srp.c @@ -1,5 +1,5 @@ /* - * Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2014-2017 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -8,12 +8,12 @@ */ #include "bn_lcl.h" -#include "e_os.h" +#include "internal/nelem.h" #ifndef OPENSSL_NO_SRP #include <openssl/srp.h> -#include <internal/bn_srp.h> +#include "internal/bn_srp.h" # if (BN_BYTES == 8) # if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__) diff --git a/deps/openssl/openssl/crypto/bn/bn_word.c b/deps/openssl/openssl/crypto/bn/bn_word.c index 1af13a53fb..262d7668fc 100644 --- a/deps/openssl/openssl/crypto/bn/bn_word.c +++ b/deps/openssl/openssl/crypto/bn/bn_word.c @@ -55,7 +55,7 @@ BN_ULONG BN_mod_word(const BIGNUM *a, BN_ULONG w) (BN_ULLONG) w); #endif } - return ((BN_ULONG)ret); + return (BN_ULONG)ret; } BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w) @@ -92,7 +92,7 @@ BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w) if (!a->top) a->neg = 0; /* don't allow negative zero */ bn_check_top(a); - return (ret); + return ret; } int BN_add_word(BIGNUM *a, BN_ULONG w) @@ -115,7 +115,7 @@ int BN_add_word(BIGNUM *a, BN_ULONG w) i = BN_sub_word(a, w); if (!BN_is_zero(a)) a->neg = !(a->neg); - return (i); + return i; } for (i = 0; w != 0 && i < a->top; i++) { a->d[i] = l = (a->d[i] + w) & BN_MASK2; @@ -128,7 +128,7 @@ int BN_add_word(BIGNUM *a, BN_ULONG w) a->d[i] = w; } bn_check_top(a); - return (1); + return 1; } int BN_sub_word(BIGNUM *a, BN_ULONG w) @@ -153,13 +153,13 @@ int BN_sub_word(BIGNUM *a, BN_ULONG w) a->neg = 0; i = BN_add_word(a, w); a->neg = 1; - return (i); + return i; } if ((a->top == 1) && (a->d[0] < w)) { a->d[0] = w - a->d[0]; a->neg = 1; - return (1); + return 1; } i = 0; for (;;) { @@ -175,7 +175,7 @@ int BN_sub_word(BIGNUM *a, BN_ULONG w) if ((a->d[i] == 0) && (i == (a->top - 1))) a->top--; bn_check_top(a); - return (1); + return 1; } int BN_mul_word(BIGNUM *a, BN_ULONG w) @@ -191,11 +191,11 @@ int BN_mul_word(BIGNUM *a, BN_ULONG w) ll = bn_mul_words(a->d, a->d, a->top, w); if (ll) { if (bn_wexpand(a, a->top + 1) == NULL) - return (0); + return 0; a->d[a->top++] = ll; } } } bn_check_top(a); - return (1); + return 1; } diff --git a/deps/openssl/openssl/crypto/bn/bn_x931p.c b/deps/openssl/openssl/crypto/bn/bn_x931p.c index d01f12cadc..9eb8384fde 100644 --- a/deps/openssl/openssl/crypto/bn/bn_x931p.c +++ b/deps/openssl/openssl/crypto/bn/bn_x931p.c @@ -62,10 +62,10 @@ int BN_X931_derive_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2, return 0; BN_CTX_start(ctx); - if (!p1) + if (p1 == NULL) p1 = BN_CTX_get(ctx); - if (!p2) + if (p2 == NULL) p2 = BN_CTX_get(ctx); t = BN_CTX_get(ctx); @@ -173,7 +173,7 @@ int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx) * - 1. By setting the top two bits we ensure that the lower bound is * exceeded. */ - if (!BN_rand(Xp, nbits, BN_RAND_TOP_TWO, BN_RAND_BOTTOM_ANY)) + if (!BN_priv_rand(Xp, nbits, BN_RAND_TOP_TWO, BN_RAND_BOTTOM_ANY)) goto err; BN_CTX_start(ctx); @@ -182,7 +182,7 @@ int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx) goto err; for (i = 0; i < 1000; i++) { - if (!BN_rand(Xq, nbits, BN_RAND_TOP_TWO, BN_RAND_BOTTOM_ANY)) + if (!BN_priv_rand(Xq, nbits, BN_RAND_TOP_TWO, BN_RAND_BOTTOM_ANY)) goto err; /* Check that |Xp - Xq| > 2^(nbits - 100) */ @@ -227,9 +227,9 @@ int BN_X931_generate_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2, if (Xp1 == NULL || Xp2 == NULL) goto error; - if (!BN_rand(Xp1, 101, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY)) + if (!BN_priv_rand(Xp1, 101, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY)) goto error; - if (!BN_rand(Xp2, 101, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY)) + if (!BN_priv_rand(Xp2, 101, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY)) goto error; if (!BN_X931_derive_prime_ex(p, p1, p2, Xp, Xp1, Xp2, e, ctx, cb)) goto error; diff --git a/deps/openssl/openssl/crypto/bn/build.info b/deps/openssl/openssl/crypto/bn/build.info index c608ecce82..a463eddabb 100644 --- a/deps/openssl/openssl/crypto/bn/build.info +++ b/deps/openssl/openssl/crypto/bn/build.info @@ -11,16 +11,16 @@ INCLUDE[../../libcrypto]=../../crypto/include INCLUDE[bn_exp.o]=.. GENERATE[bn-586.s]=asm/bn-586.pl \ - $(PERLASM_SCHEME) $(CFLAGS) $(LIB_CFLAGS) $(PROCESSOR) + $(PERLASM_SCHEME) $(LIB_CFLAGS) $(LIB_CPPFLAGS) $(PROCESSOR) DEPEND[bn-586.s]=../perlasm/x86asm.pl GENERATE[co-586.s]=asm/co-586.pl \ - $(PERLASM_SCHEME) $(CFLAGS) $(LIB_CFLAGS) $(PROCESSOR) + $(PERLASM_SCHEME) $(LIB_CFLAGS) $(LIB_CPPFLAGS) $(PROCESSOR) DEPEND[co-586.s]=../perlasm/x86asm.pl GENERATE[x86-mont.s]=asm/x86-mont.pl \ - $(PERLASM_SCHEME) $(CFLAGS) $(LIB_CFLAGS) $(PROCESSOR) + $(PERLASM_SCHEME) $(LIB_CFLAGS) $(LIB_CPPFLAGS) $(PROCESSOR) DEPEND[x86-mont.s]=../perlasm/x86asm.pl GENERATE[x86-gf2m.s]=asm/x86-gf2m.pl \ - $(PERLASM_SCHEME) $(CFLAGS) $(LIB_CFLAGS) $(PROCESSOR) + $(PERLASM_SCHEME) $(LIB_CFLAGS) $(LIB_CPPFLAGS) $(PROCESSOR) DEPEND[x86-gf2m.s]=../perlasm/x86asm.pl GENERATE[sparcv9a-mont.S]=asm/sparcv9a-mont.pl $(PERLASM_SCHEME) @@ -34,8 +34,10 @@ INCLUDE[sparct4-mont.o]=.. GENERATE[sparcv9-gf2m.S]=asm/sparcv9-gf2m.pl $(PERLASM_SCHEME) INCLUDE[sparcv9-gf2m.o]=.. -GENERATE[bn-mips.s]=asm/mips.pl $(PERLASM_SCHEME) -GENERATE[mips-mont.s]=asm/mips-mont.pl $(PERLASM_SCHEME) +GENERATE[bn-mips.S]=asm/mips.pl $(PERLASM_SCHEME) +INCLUDE[bn-mips.o]=.. +GENERATE[mips-mont.S]=asm/mips-mont.pl $(PERLASM_SCHEME) +INCLUDE[mips-mont.o]=.. GENERATE[s390x-mont.S]=asm/s390x-mont.pl $(PERLASM_SCHEME) GENERATE[s390x-gf2m.s]=asm/s390x-gf2m.pl $(PERLASM_SCHEME) @@ -47,7 +49,7 @@ GENERATE[rsaz-x86_64.s]=asm/rsaz-x86_64.pl $(PERLASM_SCHEME) GENERATE[rsaz-avx2.s]=asm/rsaz-avx2.pl $(PERLASM_SCHEME) GENERATE[bn-ia64.s]=asm/ia64.S -GENERATE[ia64-mont.s]=asm/ia64-mont.pl $(CFLAGS) $(LIB_CFLAGS) +GENERATE[ia64-mont.s]=asm/ia64-mont.pl $(LIB_CFLAGS) $(LIB_CPPFLAGS) GENERATE[parisc-mont.s]=asm/parisc-mont.pl $(PERLASM_SCHEME) @@ -63,22 +65,3 @@ INCLUDE[armv4-mont.o]=.. GENERATE[armv4-gf2m.S]=asm/armv4-gf2m.pl $(PERLASM_SCHEME) INCLUDE[armv4-gf2m.o]=.. GENERATE[armv8-mont.S]=asm/armv8-mont.pl $(PERLASM_SCHEME) - -OVERRIDES=bn-mips3.o pa-risc2W.o pa-risc2.c -BEGINRAW[Makefile] -##### BN assembler implementations - -{- $builddir -}/bn-mips3.o: {- $sourcedir -}/asm/mips3.s - @if [ "$(CC)" = "gcc" ]; then \ - ABI=`expr "$(CFLAGS)" : ".*-mabi=\([n3264]*\)"` && \ - as -$$ABI -O -o $@ {- $sourcedir -}/asm/mips3.s; \ - else $(CC) -c $(CFLAGS) $(LIB_CFLAGS) -o $@ {- $sourcedir -}/asm/mips3.s; fi - -# GNU assembler fails to compile PA-RISC2 modules, insist on calling -# vendor assembler... -{- $builddir -}/pa-risc2W.o: {- $sourcedir -}/asm/pa-risc2W.s - CC="$(CC)" $(PERL) $(SRCDIR)/util/fipsas.pl $(SRCDIR) $< /usr/ccs/bin/as -o pa-risc2W.o {- $sourcedir -}/asm/pa-risc2W.s -{- $builddir -}/pa-risc2.o: {- $sourcedir -}/asm/pa-risc2.s - CC="$(CC)" $(PERL) $(SRCDIR)/util/fipsas.pl $(SRCDIR) $< /usr/ccs/bin/as -o pa-risc2.o {- $sourcedir -}/asm/pa-risc2.s - -ENDRAW[Makefile] diff --git a/deps/openssl/openssl/crypto/bn/rsaz_exp.c b/deps/openssl/openssl/crypto/bn/rsaz_exp.c index 1a70f6cade..22455b8a63 100644 --- a/deps/openssl/openssl/crypto/bn/rsaz_exp.c +++ b/deps/openssl/openssl/crypto/bn/rsaz_exp.c @@ -1,54 +1,17 @@ /* * Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright (c) 2012, Intel Corporation. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy * in the file LICENSE in the source distribution or at * https://www.openssl.org/source/license.html + * + * Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1) + * (1) Intel Corporation, Israel Development Center, Haifa, Israel + * (2) University of Haifa, Israel */ -/***************************************************************************** -* * -* Copyright (c) 2012, Intel Corporation * -* * -* All rights reserved. * -* * -* Redistribution and use in source and binary forms, with or without * -* modification, are permitted provided that the following conditions are * -* met: * -* * -* * Redistributions of source code must retain the above copyright * -* notice, this list of conditions and the following disclaimer. * -* * -* * Redistributions in binary form must reproduce the above copyright * -* notice, this list of conditions and the following disclaimer in the * -* documentation and/or other materials provided with the * -* distribution. * -* * -* * Neither the name of the Intel Corporation nor the names of its * -* contributors may be used to endorse or promote products derived from * -* this software without specific prior written permission. * -* * -* * -* THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY * -* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * -* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * -* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR * -* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * -* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * -* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * -* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * -* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * -* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * -* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * -* * -****************************************************************************** -* Developers and authors: * -* Shay Gueron (1, 2), and Vlad Krasnov (1) * -* (1) Intel Corporation, Israel Development Center, Haifa, Israel * -* (2) University of Haifa, Israel * -*****************************************************************************/ - #include <openssl/opensslconf.h> #include "rsaz_exp.h" diff --git a/deps/openssl/openssl/crypto/bn/rsaz_exp.h b/deps/openssl/openssl/crypto/bn/rsaz_exp.h index 9501cc8089..c5864f8aaa 100644 --- a/deps/openssl/openssl/crypto/bn/rsaz_exp.h +++ b/deps/openssl/openssl/crypto/bn/rsaz_exp.h @@ -1,54 +1,17 @@ /* - * Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2013-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright (c) 2012, Intel Corporation. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy * in the file LICENSE in the source distribution or at * https://www.openssl.org/source/license.html + * + * Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1) + * (1) Intel Corporation, Israel Development Center, Haifa, Israel + * (2) University of Haifa, Israel */ -/***************************************************************************** -* * -* Copyright (c) 2012, Intel Corporation * -* * -* All rights reserved. * -* * -* Redistribution and use in source and binary forms, with or without * -* modification, are permitted provided that the following conditions are * -* met: * -* * -* * Redistributions of source code must retain the above copyright * -* notice, this list of conditions and the following disclaimer. * -* * -* * Redistributions in binary form must reproduce the above copyright * -* notice, this list of conditions and the following disclaimer in the * -* documentation and/or other materials provided with the * -* distribution. * -* * -* * Neither the name of the Intel Corporation nor the names of its * -* contributors may be used to endorse or promote products derived from * -* this software without specific prior written permission. * -* * -* * -* THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY * -* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * -* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * -* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR * -* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * -* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * -* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * -* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * -* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * -* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * -* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * -* * -****************************************************************************** -* Developers and authors: * -* Shay Gueron (1, 2), and Vlad Krasnov (1) * -* (1) Intel Corporation, Israel Development Center, Haifa, Israel * -* (2) University of Haifa, Israel * -*****************************************************************************/ - #ifndef RSAZ_EXP_H # define RSAZ_EXP_H @@ -65,7 +28,7 @@ void RSAZ_1024_mod_exp_avx2(BN_ULONG result[16], const BN_ULONG exponent[16], const BN_ULONG m_norm[16], const BN_ULONG RR[16], BN_ULONG k0); -int rsaz_avx2_eligible(); +int rsaz_avx2_eligible(void); void RSAZ_512_mod_exp(BN_ULONG result[8], const BN_ULONG base_norm[8], const BN_ULONG exponent[8], |