67 files changed, 3970 insertions, 5742 deletions
diff --git a/deps/openssl/openssl/crypto/bn/README.pod b/deps/openssl/openssl/crypto/bn/README.pod
index 109ab0d914..706a140342 100644
--- a/deps/openssl/openssl/crypto/bn/README.pod
+++ b/deps/openssl/openssl/crypto/bn/README.pod
@@ -6,7 +6,7 @@ bn_mul_words, bn_mul_add_words, bn_sqr_words, bn_div_words,
 bn_add_words, bn_sub_words, bn_mul_comba4, bn_mul_comba8,
 bn_sqr_comba4, bn_sqr_comba8, bn_cmp_words, bn_mul_normal,
 bn_mul_low_normal, bn_mul_recursive, bn_mul_part_recursive,
-bn_mul_low_recursive, bn_mul_high, bn_sqr_normal, bn_sqr_recursive,
+bn_mul_low_recursive, bn_sqr_normal, bn_sqr_recursive,
 bn_expand, bn_wexpand, bn_expand2, bn_fix_top, bn_check_top,
 bn_print, bn_dump, bn_set_max, bn_set_high, bn_set_low - BIGNUM
 library internal functions
@@ -41,8 +41,6 @@ library internal functions
    int n, int tna, int tnb, BN_ULONG *tmp);
  void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b,
    int n2, BN_ULONG *tmp);
- void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l,
-   int n2, BN_ULONG *tmp);
 
  void bn_sqr_normal(BN_ULONG *r, BN_ULONG *a, int n, BN_ULONG *tmp);
  void bn_sqr_recursive(BN_ULONG *r, BN_ULONG *a, int n2, BN_ULONG *tmp);
@@ -178,10 +176,6 @@ bn_mul_low_recursive(B<r>, B<a>, B<b>, B<n2>, B<tmp>) operates on the
 B<n2> word arrays B<r> and B<tmp> and the B<n2>/2 word arrays B<a>
 and B<b>.
 
-bn_mul_high(B<r>, B<a>, B<b>, B<l>, B<n2>, B<tmp>) operates on the
-B<n2> word arrays B<r>, B<a>, B<b> and B<l> (?) and the 3*B<n2> word
-array B<tmp>.
-
 BN_mul() calls bn_mul_normal(), or an optimized implementation if the
 factors have the same size: bn_mul_comba8() is used if they are 8
 words long, bn_mul_recursive() if they are larger than
diff --git a/deps/openssl/openssl/crypto/bn/asm/alpha-mont.pl b/deps/openssl/openssl/crypto/bn/asm/alpha-mont.pl
index 9632133090..c9b962a150 100644
--- a/deps/openssl/openssl/crypto/bn/asm/alpha-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/alpha-mont.pl
@@ -8,7 +8,7 @@
 
 #
 # ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
diff --git a/deps/openssl/openssl/crypto/bn/asm/armv4-gf2m.pl b/deps/openssl/openssl/crypto/bn/asm/armv4-gf2m.pl
index 0bb5433075..7a0cdb2e8a 100644
--- a/deps/openssl/openssl/crypto/bn/asm/armv4-gf2m.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/armv4-gf2m.pl
@@ -36,7 +36,7 @@
 #
 # Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
 # Polynomial Multiplication on ARM Processors using the NEON Engine.
-# 
+#
 # http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf
 
 $flavour = shift;
diff --git a/deps/openssl/openssl/crypto/bn/asm/armv4-mont.pl b/deps/openssl/openssl/crypto/bn/asm/armv4-mont.pl
index ddee8b7fa1..6bedc62ba6 100644
--- a/deps/openssl/openssl/crypto/bn/asm/armv4-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/armv4-mont.pl
@@ -23,7 +23,7 @@
 # [depending on key length, less for longer keys] on ARM920T, and
 # +115-80% on Intel IXP425. This is compared to pre-bn_mul_mont code
 # base and compiler generated code with in-lined umull and even umlal
-# instructions. The latter means that this code didn't really have an 
+# instructions. The latter means that this code didn't really have an
 # "advantage" of utilizing some "secret" instruction.
 #
 # The code is interoperable with Thumb ISA and is rather compact, less
diff --git a/deps/openssl/openssl/crypto/bn/asm/bn-586.pl b/deps/openssl/openssl/crypto/bn/asm/bn-586.pl
index 1ca1bbf7d4..58effc8808 100644
--- a/deps/openssl/openssl/crypto/bn/asm/bn-586.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/bn-586.pl
@@ -14,7 +14,7 @@ require "x86asm.pl";
 $output = pop;
 open STDOUT,">$output";
 
-&asm_init($ARGV[0],$0);
+&asm_init($ARGV[0]);
 
 $sse2=0;
 for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
@@ -54,7 +54,7 @@ sub bn_mul_add_words
 		&movd("mm0",&wparam(3));	# mm0 = w
 		&pxor("mm1","mm1");		# mm1 = carry_in
 		&jmp(&label("maw_sse2_entry"));
-		
+
 	&set_label("maw_sse2_unrolled",16);
 		&movd("mm3",&DWP(0,$r,"",0));	# mm3 = r[0]
 		&paddq("mm1","mm3");		# mm1 = carry_in + r[0]
@@ -675,20 +675,20 @@ sub bn_sub_part_words
 	    &adc($c,0);
 	    &mov(&DWP($i*4,$r,"",0),$tmp1); 	# *r
 	}
-	    
+
 	&comment("");
 	&add($b,32);
 	&add($r,32);
 	&sub($num,8);
 	&jnz(&label("pw_neg_loop"));
-	    
+
 	&set_label("pw_neg_finish",0);
 	&mov($tmp2,&wparam(4));	# get dl
 	&mov($num,0);
 	&sub($num,$tmp2);
 	&and($num,7);
 	&jz(&label("pw_end"));
-	    
+
 	for ($i=0; $i<7; $i++)
 	{
 	    &comment("dl<0 Tail Round $i");
@@ -705,9 +705,9 @@ sub bn_sub_part_words
 	}
 
 	&jmp(&label("pw_end"));
-	
+
 	&set_label("pw_pos",0);
-	
+
 	&and($num,0xfffffff8);	# num / 8
 	&jz(&label("pw_pos_finish"));
 
@@ -722,18 +722,18 @@ sub bn_sub_part_words
 	    &mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
 	    &jnc(&label("pw_nc".$i));
 	}
-	    
+
 	&comment("");
 	&add($a,32);
 	&add($r,32);
 	&sub($num,8);
 	&jnz(&label("pw_pos_loop"));
-	    
+
 	&set_label("pw_pos_finish",0);
 	&mov($num,&wparam(4));	# get dl
 	&and($num,7);
 	&jz(&label("pw_end"));
-	    
+
 	for ($i=0; $i<7; $i++)
 	{
 	    &comment("dl>0 Tail Round $i");
@@ -754,17 +754,17 @@ sub bn_sub_part_words
 	    &mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
 	    &set_label("pw_nc".$i,0);
 	}
-	    
+
 	&comment("");
 	&add($a,32);
 	&add($r,32);
 	&sub($num,8);
 	&jnz(&label("pw_nc_loop"));
-	    
+
 	&mov($num,&wparam(4));	# get dl
 	&and($num,7);
 	&jz(&label("pw_nc_end"));
-	    
+
 	for ($i=0; $i<7; $i++)
 	{
 	    &mov($tmp1,&DWP($i*4,$a,"",0));	# *a
diff --git a/deps/openssl/openssl/crypto/bn/asm/c64xplus-gf2m.pl b/deps/openssl/openssl/crypto/bn/asm/c64xplus-gf2m.pl
index c0e5400807..9c46da3af8 100644
--- a/deps/openssl/openssl/crypto/bn/asm/c64xplus-gf2m.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/c64xplus-gf2m.pl
@@ -43,7 +43,7 @@ $code.=<<___;
 	SHRU	$A,16,   $Ahi		; smash $A to two halfwords
 ||	EXTU	$A,16,16,$Alo
 
-	XORMPY	$Alo,$B_2,$Alox2	; 16x8 bits muliplication
+	XORMPY	$Alo,$B_2,$Alox2	; 16x8 bits multiplication
 ||	XORMPY	$Ahi,$B_2,$Ahix2
 ||	EXTU	$B,16,24,$B_1
 	XORMPY	$Alo,$B_0,$Alox0
diff --git a/deps/openssl/openssl/crypto/bn/asm/co-586.pl b/deps/openssl/openssl/crypto/bn/asm/co-586.pl
index 60d0363660..97f5e3a19f 100644
--- a/deps/openssl/openssl/crypto/bn/asm/co-586.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/co-586.pl
@@ -13,7 +13,7 @@ require "x86asm.pl";
 $output = pop;
 open STDOUT,">$output";
 
-&asm_init($ARGV[0],$0);
+&asm_init($ARGV[0]);
 
 &bn_mul_comba("bn_mul_comba8",8);
 &bn_mul_comba("bn_mul_comba4",4);
@@ -47,7 +47,7 @@ sub mul_add_c
 	 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1;	# laod next b
 	 ###
 	&adc($c2,0);
-	 # is pos > 1, it means it is the last loop 
+	 # is pos > 1, it means it is the last loop
 	 &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0;		# save r[];
 	&mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1;		# laod next a
 	}
@@ -76,7 +76,7 @@ sub sqr_add_c
 	 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb);
 	 ###
 	&adc($c2,0);
-	 # is pos > 1, it means it is the last loop 
+	 # is pos > 1, it means it is the last loop
 	 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0;		# save r[];
 	&mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1;		# load next b
 	}
@@ -127,7 +127,7 @@ sub bn_mul_comba
 	$c2="ebp";
 	$a="esi";
 	$b="edi";
-	
+
 	$as=0;
 	$ae=0;
 	$bs=0;
@@ -142,9 +142,9 @@ sub bn_mul_comba
 	 &push("ebx");
 
 	&xor($c0,$c0);
-	 &mov("eax",&DWP(0,$a,"",0));	# load the first word 
+	 &mov("eax",&DWP(0,$a,"",0));	# load the first word
 	&xor($c1,$c1);
-	 &mov("edx",&DWP(0,$b,"",0));	# load the first second 
+	 &mov("edx",&DWP(0,$b,"",0));	# load the first second
 
 	for ($i=0; $i<$tot; $i++)
 		{
@@ -152,7 +152,7 @@ sub bn_mul_comba
 		$bi=$bs;
 		$end=$be+1;
 
-		&comment("################## Calculate word $i"); 
+		&comment("################## Calculate word $i");
 
 		for ($j=$bs; $j<$end; $j++)
 			{
diff --git a/deps/openssl/openssl/crypto/bn/asm/ia64-mont.pl b/deps/openssl/openssl/crypto/bn/asm/ia64-mont.pl
index 0df1fad115..ec486f7779 100644
--- a/deps/openssl/openssl/crypto/bn/asm/ia64-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/ia64-mont.pl
@@ -8,7 +8,7 @@
 
 #
 # ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
@@ -80,7 +80,7 @@ $code=<<___;
 
 // int bn_mul_mont (BN_ULONG *rp,const BN_ULONG *ap,
 //		    const BN_ULONG *bp,const BN_ULONG *np,
-//		    const BN_ULONG *n0p,int num);			
+//		    const BN_ULONG *n0p,int num);
 .align	64
 .global	bn_mul_mont#
 .proc	bn_mul_mont#
@@ -203,7 +203,7 @@ bn_mul_mont_general:
 { .mmi;	.pred.rel	"mutex",p39,p41
 (p39)	add		topbit=r0,r0
 (p41)	add		topbit=r0,r0,1
-	nop.i		0		}	
+	nop.i		0		}
 { .mmi;	st8		[tp_1]=n[0]
 	add		tptr=16,sp
 	add		tp_1=8,sp	};;
diff --git a/deps/openssl/openssl/crypto/bn/asm/ia64.S b/deps/openssl/openssl/crypto/bn/asm/ia64.S
index f2404a3c1e..d235c45e2d 100644
--- a/deps/openssl/openssl/crypto/bn/asm/ia64.S
+++ b/deps/openssl/openssl/crypto/bn/asm/ia64.S
@@ -1,9 +1,9 @@
 .explicit
 .text
 .ident	"ia64.S, Version 2.1"
-.ident	"IA-64 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
+.ident	"IA-64 ISA artwork by Andy Polyakov <appro@openssl.org>"
 
-// Copyright 2001-2016 The OpenSSL Project Authors. All Rights Reserved.
+// Copyright 2001-2018 The OpenSSL Project Authors. All Rights Reserved.
 //
 // Licensed under the OpenSSL license (the "License").  You may not use
 // this file except in compliance with the License.  You can obtain a copy
@@ -12,7 +12,7 @@
 
 //
 // ====================================================================
-// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 // project.
 //
 // Rights for redistribution and usage in source and binary forms are
@@ -20,7 +20,7 @@
 // disclaimed.
 // ====================================================================
 //
-// Version 2.x is Itanium2 re-tune. Few words about how Itanum2 is
+// Version 2.x is Itanium2 re-tune. Few words about how Itanium2 is
 // different from Itanium to this module viewpoint. Most notably, is it
 // "wider" than Itanium? Can you experience loop scalability as
 // discussed in commentary sections? Not really:-( Itanium2 has 6
@@ -141,7 +141,7 @@
 //	User Mask I want to excuse the kernel from preserving upper
 //	(f32-f128) FP register bank over process context switch, thus
 //	minimizing bus bandwidth consumption during the switch (i.e.
-//	after PKI opration completes and the program is off doing
+//	after PKI operation completes and the program is off doing
 //	something else like bulk symmetric encryption). Having said
 //	this, I also want to point out that it might be good idea
 //	to compile the whole toolkit (as well as majority of the
@@ -157,12 +157,15 @@
 #else
 #define	ADDP	add
 #endif
+#ifdef __VMS
+.alias abort, "decc$abort"
+#endif
 
 #if 1
 //
 // bn_[add|sub]_words routines.
 //
-// Loops are spinning in 2*(n+5) ticks on Itanuim (provided that the
+// Loops are spinning in 2*(n+5) ticks on Itanium (provided that the
 // data reside in L1 cache, i.e. 2 ticks away). It's possible to
 // compress the epilogue and get down to 2*n+6, but at the cost of
 // scalability (the neat feature of this implementation is that it
@@ -500,7 +503,7 @@ bn_sqr_words:
 // possible to compress the epilogue (I'm getting tired to write this
 // comment over and over) and get down to 2*n+16 at the cost of
 // scalability. The decision will very likely be reconsidered after the
-// benchmark program is profiled. I.e. if perfomance gain on Itanium
+// benchmark program is profiled. I.e. if performance gain on Itanium
 // will appear larger than loss on "wider" IA-64, then the loop should
 // be explicitly split and the epilogue compressed.
 .L_bn_sqr_words_ctop:
@@ -936,7 +939,7 @@ bn_mul_comba8:
 		xma.hu	f118=f39,f127,f117	}
 { .mfi;		xma.lu	f117=f39,f127,f117	};;//
 //-------------------------------------------------//
-// Leaving muliplier's heaven... Quite a ride, huh?
+// Leaving multiplier's heaven... Quite a ride, huh?
 
 { .mii;	getf.sig	r31=f47
 	add		r25=r25,r24
@@ -1428,6 +1431,7 @@ bn_div_words:
 	mov		ar.ec=0		// don't rotate at exit
 	mov		pr.rot=0	}
 { .mii;	mov		L=r33		// save l
+	mov		r25=r0		// needed if abort is called on VMS
 	mov		r36=r0		};;
 
 .L_divw_shift:	// -vv- note signed comparison
@@ -1529,9 +1533,8 @@ bn_div_words:
 // output:	f8 = (int)(a/b)
 // clobbered:	f8,f9,f10,f11,pred
 pred=p15
-// One can argue that this snippet is copyrighted to Intel
-// Corporation, as it's essentially identical to one of those
-// found in "Divide, Square Root and Remainder" section at
+// This snippet is based on text found in the "Divide, Square
+// Root and Remainder" section at
 // http://www.intel.com/software/products/opensource/libraries/num.htm.
 // Yes, I admit that the referred code was used as template,
 // but after I realized that there hardly is any other instruction
diff --git a/deps/openssl/openssl/crypto/bn/asm/mips-mont.pl b/deps/openssl/openssl/crypto/bn/asm/mips-mont.pl
index e141e1a925..fbe5d04f71 100644
--- a/deps/openssl/openssl/crypto/bn/asm/mips-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/mips-mont.pl
@@ -56,14 +56,14 @@
 $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
 
 if ($flavour =~ /64|n32/i) {
-	$PTR_ADD="dadd";	# incidentally works even on n32
-	$PTR_SUB="dsub";	# incidentally works even on n32
+	$PTR_ADD="daddu";	# incidentally works even on n32
+	$PTR_SUB="dsubu";	# incidentally works even on n32
 	$REG_S="sd";
 	$REG_L="ld";
 	$SZREG=8;
 } else {
-	$PTR_ADD="add";
-	$PTR_SUB="sub";
+	$PTR_ADD="addu";
+	$PTR_SUB="subu";
 	$REG_S="sw";
 	$REG_L="lw";
 	$SZREG=4;
@@ -121,6 +121,8 @@ $m1=$s11;
 $FRAMESIZE=14;
 
 $code=<<___;
+#include "mips_arch.h"
+
 .text
 
 .set	noat
@@ -183,27 +185,27 @@ $code.=<<___;
 	$PTR_SUB $sp,$num
 	and	$sp,$at
 
-	$MULTU	$aj,$bi
-	$LD	$alo,$BNSZ($ap)
-	$LD	$nlo,$BNSZ($np)
-	mflo	$lo0
-	mfhi	$hi0
-	$MULTU	$lo0,$n0
-	mflo	$m1
-
-	$MULTU	$alo,$bi
-	mflo	$alo
-	mfhi	$ahi
-
-	$MULTU	$nj,$m1
-	mflo	$lo1
-	mfhi	$hi1
-	$MULTU	$nlo,$m1
+	$MULTU	($aj,$bi)
+	$LD	$ahi,$BNSZ($ap)
+	$LD	$nhi,$BNSZ($np)
+	mflo	($lo0,$aj,$bi)
+	mfhi	($hi0,$aj,$bi)
+	$MULTU	($lo0,$n0)
+	mflo	($m1,$lo0,$n0)
+
+	$MULTU	($ahi,$bi)
+	mflo	($alo,$ahi,$bi)
+	mfhi	($ahi,$ahi,$bi)
+
+	$MULTU	($nj,$m1)
+	mflo	($lo1,$nj,$m1)
+	mfhi	($hi1,$nj,$m1)
+	$MULTU	($nhi,$m1)
 	$ADDU	$lo1,$lo0
 	sltu	$at,$lo1,$lo0
 	$ADDU	$hi1,$at
-	mflo	$nlo
-	mfhi	$nhi
+	mflo	($nlo,$nhi,$m1)
+	mfhi	($nhi,$nhi,$m1)
 
 	move	$tp,$sp
 	li	$j,2*$BNSZ
@@ -215,25 +217,25 @@ $code.=<<___;
 	$LD	$aj,($aj)
 	$LD	$nj,($nj)
 
-	$MULTU	$aj,$bi
+	$MULTU	($aj,$bi)
 	$ADDU	$lo0,$alo,$hi0
 	$ADDU	$lo1,$nlo,$hi1
 	sltu	$at,$lo0,$hi0
 	sltu	$t0,$lo1,$hi1
 	$ADDU	$hi0,$ahi,$at
 	$ADDU	$hi1,$nhi,$t0
-	mflo	$alo
-	mfhi	$ahi
+	mflo	($alo,$aj,$bi)
+	mfhi	($ahi,$aj,$bi)
 
 	$ADDU	$lo1,$lo0
 	sltu	$at,$lo1,$lo0
-	$MULTU	$nj,$m1
+	$MULTU	($nj,$m1)
 	$ADDU	$hi1,$at
 	addu	$j,$BNSZ
 	$ST	$lo1,($tp)
 	sltu	$t0,$j,$num
-	mflo	$nlo
-	mfhi	$nhi
+	mflo	($nlo,$nj,$m1)
+	mfhi	($nhi,$nj,$m1)
 
 	bnez	$t0,.L1st
 	$PTR_ADD $tp,$BNSZ
@@ -263,34 +265,34 @@ $code.=<<___;
 	$PTR_ADD $bi,$bp,$i
 	$LD	$bi,($bi)
 	$LD	$aj,($ap)
-	$LD	$alo,$BNSZ($ap)
+	$LD	$ahi,$BNSZ($ap)
 	$LD	$tj,($sp)
 
-	$MULTU	$aj,$bi
+	$MULTU	($aj,$bi)
 	$LD	$nj,($np)
-	$LD	$nlo,$BNSZ($np)
-	mflo	$lo0
-	mfhi	$hi0
+	$LD	$nhi,$BNSZ($np)
+	mflo	($lo0,$aj,$bi)
+	mfhi	($hi0,$aj,$bi)
 	$ADDU	$lo0,$tj
-	$MULTU	$lo0,$n0
+	$MULTU	($lo0,$n0)
 	sltu	$at,$lo0,$tj
 	$ADDU	$hi0,$at
-	mflo	$m1
+	mflo	($m1,$lo0,$n0)
 
-	$MULTU	$alo,$bi
-	mflo	$alo
-	mfhi	$ahi
+	$MULTU	($ahi,$bi)
+	mflo	($alo,$ahi,$bi)
+	mfhi	($ahi,$ahi,$bi)
 
-	$MULTU	$nj,$m1
-	mflo	$lo1
-	mfhi	$hi1
+	$MULTU	($nj,$m1)
+	mflo	($lo1,$nj,$m1)
+	mfhi	($hi1,$nj,$m1)
 
-	$MULTU	$nlo,$m1
+	$MULTU	($nhi,$m1)
 	$ADDU	$lo1,$lo0
 	sltu	$at,$lo1,$lo0
 	$ADDU	$hi1,$at
-	mflo	$nlo
-	mfhi	$nhi
+	mflo	($nlo,$nhi,$m1)
+	mfhi	($nhi,$nhi,$m1)
 
 	move	$tp,$sp
 	li	$j,2*$BNSZ
@@ -303,19 +305,19 @@ $code.=<<___;
 	$LD	$aj,($aj)
 	$LD	$nj,($nj)
 
-	$MULTU	$aj,$bi
+	$MULTU	($aj,$bi)
 	$ADDU	$lo0,$alo,$hi0
 	$ADDU	$lo1,$nlo,$hi1
 	sltu	$at,$lo0,$hi0
 	sltu	$t0,$lo1,$hi1
 	$ADDU	$hi0,$ahi,$at
 	$ADDU	$hi1,$nhi,$t0
-	mflo	$alo
-	mfhi	$ahi
+	mflo	($alo,$aj,$bi)
+	mfhi	($ahi,$aj,$bi)
 
 	$ADDU	$lo0,$tj
 	addu	$j,$BNSZ
-	$MULTU	$nj,$m1
+	$MULTU	($nj,$m1)
 	sltu	$at,$lo0,$tj
 	$ADDU	$lo1,$lo0
 	$ADDU	$hi0,$at
@@ -323,8 +325,8 @@ $code.=<<___;
 	$LD	$tj,2*$BNSZ($tp)
 	$ADDU	$hi1,$t0
 	sltu	$at,$j,$num
-	mflo	$nlo
-	mfhi	$nhi
+	mflo	($nlo,$nj,$m1)
+	mfhi	($nhi,$nj,$m1)
 	$ST	$lo1,($tp)
 	bnez	$at,.Linner
 	$PTR_ADD $tp,$BNSZ
diff --git a/deps/openssl/openssl/crypto/bn/asm/mips.pl b/deps/openssl/openssl/crypto/bn/asm/mips.pl
index 420f01f3a4..da35ec1b30 100644
--- a/deps/openssl/openssl/crypto/bn/asm/mips.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/mips.pl
@@ -8,7 +8,7 @@
 
 #
 # ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project.
 #
 # Rights for redistribution and usage in source and binary forms are
@@ -42,7 +42,7 @@
 # Performance improvement is astonishing! 'apps/openssl speed rsa dsa'
 # goes way over 3 times faster!
 #
-#					<appro@fy.chalmers.se>
+#					<appro@openssl.org>
 
 # October 2010
 #
@@ -109,6 +109,22 @@ $gp=$v1 if ($flavour =~ /nubi/i);
 $minus4=$v1;
 
 $code.=<<___;
+#include "mips_arch.h"
+
+#if defined(_MIPS_ARCH_MIPS64R6)
+# define ddivu(rs,rt)
+# define mfqt(rd,rs,rt)	ddivu	rd,rs,rt
+# define mfrm(rd,rs,rt)	dmodu	rd,rs,rt
+#elif defined(_MIPS_ARCH_MIPS32R6)
+# define divu(rs,rt)
+# define mfqt(rd,rs,rt)	divu	rd,rs,rt
+# define mfrm(rd,rs,rt)	modu	rd,rs,rt
+#else
+# define $DIVU(rs,rt)	$DIVU	$zero,rs,rt
+# define mfqt(rd,rs,rt)	mflo	rd
+# define mfrm(rd,rs,rt)	mfhi	rd
+#endif
+
 .rdata
 .asciiz	"mips3.s, Version 1.2"
 .asciiz	"MIPS II/III/IV ISA artwork by Andy Polyakov <appro\@fy.chalmers.se>"
@@ -151,7 +167,7 @@ $code.=<<___;
 
 .L_bn_mul_add_words_loop:
 	$LD	$t0,0($a1)
-	$MULTU	$t0,$a3
+	$MULTU	($t0,$a3)
 	$LD	$t1,0($a0)
 	$LD	$t2,$BNSZ($a1)
 	$LD	$t3,$BNSZ($a0)
@@ -161,11 +177,11 @@ $code.=<<___;
 	sltu	$v0,$t1,$v0	# All manuals say it "compares 32-bit
 				# values", but it seems to work fine
 				# even on 64-bit registers.
-	mflo	$at
-	mfhi	$t0
+	mflo	($at,$t0,$a3)
+	mfhi	($t0,$t0,$a3)
 	$ADDU	$t1,$at
 	$ADDU	$v0,$t0
-	 $MULTU	$t2,$a3
+	 $MULTU	($t2,$a3)
 	sltu	$at,$t1,$at
 	$ST	$t1,0($a0)
 	$ADDU	$v0,$at
@@ -174,11 +190,11 @@ $code.=<<___;
 	$LD	$ta3,3*$BNSZ($a0)
 	$ADDU	$t3,$v0
 	sltu	$v0,$t3,$v0
-	mflo	$at
-	mfhi	$t2
+	mflo	($at,$t2,$a3)
+	mfhi	($t2,$t2,$a3)
 	$ADDU	$t3,$at
 	$ADDU	$v0,$t2
-	 $MULTU	$ta0,$a3
+	 $MULTU	($ta0,$a3)
 	sltu	$at,$t3,$at
 	$ST	$t3,$BNSZ($a0)
 	$ADDU	$v0,$at
@@ -188,11 +204,11 @@ $code.=<<___;
 	$PTR_ADD $a1,4*$BNSZ
 	$ADDU	$ta1,$v0
 	sltu	$v0,$ta1,$v0
-	mflo	$at
-	mfhi	$ta0
+	mflo	($at,$ta0,$a3)
+	mfhi	($ta0,$ta0,$a3)
 	$ADDU	$ta1,$at
 	$ADDU	$v0,$ta0
-	 $MULTU	$ta2,$a3
+	 $MULTU	($ta2,$a3)
 	sltu	$at,$ta1,$at
 	$ST	$ta1,-2*$BNSZ($a0)
 	$ADDU	$v0,$at
@@ -201,8 +217,8 @@ $code.=<<___;
 	and	$ta0,$a2,$minus4
 	$ADDU	$ta3,$v0
 	sltu	$v0,$ta3,$v0
-	mflo	$at
-	mfhi	$ta2
+	mflo	($at,$ta2,$a3)
+	mfhi	($ta2,$ta2,$a3)
 	$ADDU	$ta3,$at
 	$ADDU	$v0,$ta2
 	sltu	$at,$ta3,$at
@@ -217,13 +233,13 @@ $code.=<<___;
 .L_bn_mul_add_words_tail:
 	.set	reorder
 	$LD	$t0,0($a1)
-	$MULTU	$t0,$a3
+	$MULTU	($t0,$a3)
 	$LD	$t1,0($a0)
 	subu	$a2,1
 	$ADDU	$t1,$v0
 	sltu	$v0,$t1,$v0
-	mflo	$at
-	mfhi	$t0
+	mflo	($at,$t0,$a3)
+	mfhi	($t0,$t0,$a3)
 	$ADDU	$t1,$at
 	$ADDU	$v0,$t0
 	sltu	$at,$t1,$at
@@ -232,13 +248,13 @@ $code.=<<___;
 	beqz	$a2,.L_bn_mul_add_words_return
 
 	$LD	$t0,$BNSZ($a1)
-	$MULTU	$t0,$a3
+	$MULTU	($t0,$a3)
 	$LD	$t1,$BNSZ($a0)
 	subu	$a2,1
 	$ADDU	$t1,$v0
 	sltu	$v0,$t1,$v0
-	mflo	$at
-	mfhi	$t0
+	mflo	($at,$t0,$a3)
+	mfhi	($t0,$t0,$a3)
 	$ADDU	$t1,$at
 	$ADDU	$v0,$t0
 	sltu	$at,$t1,$at
@@ -247,12 +263,12 @@ $code.=<<___;
 	beqz	$a2,.L_bn_mul_add_words_return
 
 	$LD	$t0,2*$BNSZ($a1)
-	$MULTU	$t0,$a3
+	$MULTU	($t0,$a3)
 	$LD	$t1,2*$BNSZ($a0)
 	$ADDU	$t1,$v0
 	sltu	$v0,$t1,$v0
-	mflo	$at
-	mfhi	$t0
+	mflo	($at,$t0,$a3)
+	mfhi	($t0,$t0,$a3)
 	$ADDU	$t1,$at
 	$ADDU	$v0,$t0
 	sltu	$at,$t1,$at
@@ -310,40 +326,40 @@ $code.=<<___;
 
 .L_bn_mul_words_loop:
 	$LD	$t0,0($a1)
-	$MULTU	$t0,$a3
+	$MULTU	($t0,$a3)
 	$LD	$t2,$BNSZ($a1)
 	$LD	$ta0,2*$BNSZ($a1)
 	$LD	$ta2,3*$BNSZ($a1)
-	mflo	$at
-	mfhi	$t0
+	mflo	($at,$t0,$a3)
+	mfhi	($t0,$t0,$a3)
 	$ADDU	$v0,$at
 	sltu	$t1,$v0,$at
-	 $MULTU	$t2,$a3
+	 $MULTU	($t2,$a3)
 	$ST	$v0,0($a0)
 	$ADDU	$v0,$t1,$t0
 
 	subu	$a2,4
 	$PTR_ADD $a0,4*$BNSZ
 	$PTR_ADD $a1,4*$BNSZ
-	mflo	$at
-	mfhi	$t2
+	mflo	($at,$t2,$a3)
+	mfhi	($t2,$t2,$a3)
 	$ADDU	$v0,$at
 	sltu	$t3,$v0,$at
-	 $MULTU	$ta0,$a3
+	 $MULTU	($ta0,$a3)
 	$ST	$v0,-3*$BNSZ($a0)
 	$ADDU	$v0,$t3,$t2
 
-	mflo	$at
-	mfhi	$ta0
+	mflo	($at,$ta0,$a3)
+	mfhi	($ta0,$ta0,$a3)
 	$ADDU	$v0,$at
 	sltu	$ta1,$v0,$at
-	 $MULTU	$ta2,$a3
+	 $MULTU	($ta2,$a3)
 	$ST	$v0,-2*$BNSZ($a0)
 	$ADDU	$v0,$ta1,$ta0
 
 	and	$ta0,$a2,$minus4
-	mflo	$at
-	mfhi	$ta2
+	mflo	($at,$ta2,$a3)
+	mfhi	($ta2,$ta2,$a3)
 	$ADDU	$v0,$at
 	sltu	$ta3,$v0,$at
 	$ST	$v0,-$BNSZ($a0)
@@ -357,10 +373,10 @@ $code.=<<___;
 .L_bn_mul_words_tail:
 	.set	reorder
 	$LD	$t0,0($a1)
-	$MULTU	$t0,$a3
+	$MULTU	($t0,$a3)
 	subu	$a2,1
-	mflo	$at
-	mfhi	$t0
+	mflo	($at,$t0,$a3)
+	mfhi	($t0,$t0,$a3)
 	$ADDU	$v0,$at
 	sltu	$t1,$v0,$at
 	$ST	$v0,0($a0)
@@ -368,10 +384,10 @@ $code.=<<___;
 	beqz	$a2,.L_bn_mul_words_return
 
 	$LD	$t0,$BNSZ($a1)
-	$MULTU	$t0,$a3
+	$MULTU	($t0,$a3)
 	subu	$a2,1
-	mflo	$at
-	mfhi	$t0
+	mflo	($at,$t0,$a3)
+	mfhi	($t0,$t0,$a3)
 	$ADDU	$v0,$at
 	sltu	$t1,$v0,$at
 	$ST	$v0,$BNSZ($a0)
@@ -379,9 +395,9 @@ $code.=<<___;
 	beqz	$a2,.L_bn_mul_words_return
 
 	$LD	$t0,2*$BNSZ($a1)
-	$MULTU	$t0,$a3
-	mflo	$at
-	mfhi	$t0
+	$MULTU	($t0,$a3)
+	mflo	($at,$t0,$a3)
+	mfhi	($t0,$t0,$a3)
 	$ADDU	$v0,$at
 	sltu	$t1,$v0,$at
 	$ST	$v0,2*$BNSZ($a0)
@@ -438,35 +454,35 @@ $code.=<<___;
 
 .L_bn_sqr_words_loop:
 	$LD	$t0,0($a1)
-	$MULTU	$t0,$t0
+	$MULTU	($t0,$t0)
 	$LD	$t2,$BNSZ($a1)
 	$LD	$ta0,2*$BNSZ($a1)
 	$LD	$ta2,3*$BNSZ($a1)
-	mflo	$t1
-	mfhi	$t0
+	mflo	($t1,$t0,$t0)
+	mfhi	($t0,$t0,$t0)
 	$ST	$t1,0($a0)
 	$ST	$t0,$BNSZ($a0)
 
-	$MULTU	$t2,$t2
+	$MULTU	($t2,$t2)
 	subu	$a2,4
 	$PTR_ADD $a0,8*$BNSZ
 	$PTR_ADD $a1,4*$BNSZ
-	mflo	$t3
-	mfhi	$t2
+	mflo	($t3,$t2,$t2)
+	mfhi	($t2,$t2,$t2)
 	$ST	$t3,-6*$BNSZ($a0)
 	$ST	$t2,-5*$BNSZ($a0)
 
-	$MULTU	$ta0,$ta0
-	mflo	$ta1
-	mfhi	$ta0
+	$MULTU	($ta0,$ta0)
+	mflo	($ta1,$ta0,$ta0)
+	mfhi	($ta0,$ta0,$ta0)
 	$ST	$ta1,-4*$BNSZ($a0)
 	$ST	$ta0,-3*$BNSZ($a0)
 
 
-	$MULTU	$ta2,$ta2
+	$MULTU	($ta2,$ta2)
 	and	$ta0,$a2,$minus4
-	mflo	$ta3
-	mfhi	$ta2
+	mflo	($ta3,$ta2,$ta2)
+	mfhi	($ta2,$ta2,$ta2)
 	$ST	$ta3,-2*$BNSZ($a0)
 
 	.set	noreorder
@@ -479,27 +495,27 @@ $code.=<<___;
 .L_bn_sqr_words_tail:
 	.set	reorder
 	$LD	$t0,0($a1)
-	$MULTU	$t0,$t0
+	$MULTU	($t0,$t0)
 	subu	$a2,1
-	mflo	$t1
-	mfhi	$t0
+	mflo	($t1,$t0,$t0)
+	mfhi	($t0,$t0,$t0)
 	$ST	$t1,0($a0)
 	$ST	$t0,$BNSZ($a0)
 	beqz	$a2,.L_bn_sqr_words_return
 
 	$LD	$t0,$BNSZ($a1)
-	$MULTU	$t0,$t0
+	$MULTU	($t0,$t0)
 	subu	$a2,1
-	mflo	$t1
-	mfhi	$t0
+	mflo	($t1,$t0,$t0)
+	mfhi	($t0,$t0,$t0)
 	$ST	$t1,2*$BNSZ($a0)
 	$ST	$t0,3*$BNSZ($a0)
 	beqz	$a2,.L_bn_sqr_words_return
 
 	$LD	$t0,2*$BNSZ($a1)
-	$MULTU	$t0,$t0
-	mflo	$t1
-	mfhi	$t0
+	$MULTU	($t0,$t0)
+	mflo	($t1,$t0,$t0)
+	mfhi	($t0,$t0,$t0)
 	$ST	$t1,4*$BNSZ($a0)
 	$ST	$t0,5*$BNSZ($a0)
 
@@ -587,13 +603,13 @@ $code.=<<___;
 	sltu	$v0,$t2,$ta2
 	$ST	$t2,-2*$BNSZ($a0)
 	$ADDU	$v0,$t8
-	
+
 	$ADDU	$ta3,$t3
 	sltu	$t9,$ta3,$t3
 	$ADDU	$t3,$ta3,$v0
 	sltu	$v0,$t3,$ta3
 	$ST	$t3,-$BNSZ($a0)
-	
+
 	.set	noreorder
 	bgtz	$at,.L_bn_add_words_loop
 	$ADDU	$v0,$t9
@@ -792,7 +808,7 @@ bn_div_3_words:
 				# so that we can save two arguments
 				# and return address in registers
 				# instead of stack:-)
-				
+
 	$LD	$a0,($a3)
 	move	$ta2,$a1
 	bne	$a0,$a2,bn_div_3_words_internal
@@ -823,11 +839,11 @@ $code.=<<___;
 	move	$ta3,$ra
 	bal	bn_div_words_internal
 	move	$ra,$ta3
-	$MULTU	$ta2,$v0
+	$MULTU	($ta2,$v0)
 	$LD	$t2,-2*$BNSZ($a3)
 	move	$ta0,$zero
-	mfhi	$t1
-	mflo	$t0
+	mfhi	($t1,$ta2,$v0)
+	mflo	($t0,$ta2,$v0)
 	sltu	$t8,$t1,$a1
 .L_bn_div_3_words_inner_loop:
 	bnez	$t8,.L_bn_div_3_words_inner_loop_done
@@ -930,15 +946,15 @@ $code.=<<___;
 	$SRL	$HH,$a0,4*$BNSZ	# bits
 	$SRL	$QT,4*$BNSZ	# q=0xffffffff
 	beq	$DH,$HH,.L_bn_div_words_skip_div1
-	$DIVU	$zero,$a0,$DH
-	mflo	$QT
+	$DIVU	($a0,$DH)
+	mfqt	($QT,$a0,$DH)
 .L_bn_div_words_skip_div1:
-	$MULTU	$a2,$QT
+	$MULTU	($a2,$QT)
 	$SLL	$t3,$a0,4*$BNSZ	# bits
 	$SRL	$at,$a1,4*$BNSZ	# bits
 	or	$t3,$at
-	mflo	$t0
-	mfhi	$t1
+	mflo	($t0,$a2,$QT)
+	mfhi	($t1,$a2,$QT)
 .L_bn_div_words_inner_loop1:
 	sltu	$t2,$t3,$t0
 	seq	$t8,$HH,$t1
@@ -963,15 +979,15 @@ $code.=<<___;
 	$SRL	$HH,$a0,4*$BNSZ	# bits
 	$SRL	$QT,4*$BNSZ	# q=0xffffffff
 	beq	$DH,$HH,.L_bn_div_words_skip_div2
-	$DIVU	$zero,$a0,$DH
-	mflo	$QT
+	$DIVU	($a0,$DH)
+	mfqt	($QT,$a0,$DH)
 .L_bn_div_words_skip_div2:
-	$MULTU	$a2,$QT
+	$MULTU	($a2,$QT)
 	$SLL	$t3,$a0,4*$BNSZ	# bits
 	$SRL	$at,$a1,4*$BNSZ	# bits
 	or	$t3,$at
-	mflo	$t0
-	mfhi	$t1
+	mflo	($t0,$a2,$QT)
+	mfhi	($t1,$a2,$QT)
 .L_bn_div_words_inner_loop2:
 	sltu	$t2,$t3,$t0
 	seq	$t8,$HH,$t1
@@ -1070,592 +1086,592 @@ $code.=<<___;
 	$LD	$b_0,0($a2)
 	$LD	$a_1,$BNSZ($a1)
 	$LD	$a_2,2*$BNSZ($a1)
-	$MULTU	$a_0,$b_0		# mul_add_c(a[0],b[0],c1,c2,c3);
+	$MULTU	($a_0,$b_0)		# mul_add_c(a[0],b[0],c1,c2,c3);
 	$LD	$a_3,3*$BNSZ($a1)
 	$LD	$b_1,$BNSZ($a2)
 	$LD	$b_2,2*$BNSZ($a2)
 	$LD	$b_3,3*$BNSZ($a2)
-	mflo	$c_1
-	mfhi	$c_2
+	mflo	($c_1,$a_0,$b_0)
+	mfhi	($c_2,$a_0,$b_0)
 
 	$LD	$a_4,4*$BNSZ($a1)
 	$LD	$a_5,5*$BNSZ($a1)
-	$MULTU	$a_0,$b_1		# mul_add_c(a[0],b[1],c2,c3,c1);
+	$MULTU	($a_0,$b_1)		# mul_add_c(a[0],b[1],c2,c3,c1);
 	$LD	$a_6,6*$BNSZ($a1)
 	$LD	$a_7,7*$BNSZ($a1)
 	$LD	$b_4,4*$BNSZ($a2)
 	$LD	$b_5,5*$BNSZ($a2)
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_0,$b_1)
+	mfhi	($t_2,$a_0,$b_1)
 	$ADDU	$c_2,$t_1
 	sltu	$at,$c_2,$t_1
-	$MULTU	$a_1,$b_0		# mul_add_c(a[1],b[0],c2,c3,c1);
+	$MULTU	($a_1,$b_0)		# mul_add_c(a[1],b[0],c2,c3,c1);
 	$ADDU	$c_3,$t_2,$at
 	$LD	$b_6,6*$BNSZ($a2)
 	$LD	$b_7,7*$BNSZ($a2)
 	$ST	$c_1,0($a0)	# r[0]=c1;
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_1,$b_0)
+	mfhi	($t_2,$a_1,$b_0)
 	$ADDU	$c_2,$t_1
 	sltu	$at,$c_2,$t_1
-	 $MULTU	$a_2,$b_0		# mul_add_c(a[2],b[0],c3,c1,c2);
+	 $MULTU	($a_2,$b_0)		# mul_add_c(a[2],b[0],c3,c1,c2);
 	$ADDU	$t_2,$at
 	$ADDU	$c_3,$t_2
 	sltu	$c_1,$c_3,$t_2
 	$ST	$c_2,$BNSZ($a0)	# r[1]=c2;
 
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_2,$b_0)
+	mfhi	($t_2,$a_2,$b_0)
 	$ADDU	$c_3,$t_1
 	sltu	$at,$c_3,$t_1
-	$MULTU	$a_1,$b_1		# mul_add_c(a[1],b[1],c3,c1,c2);
+	$MULTU	($a_1,$b_1)		# mul_add_c(a[1],b[1],c3,c1,c2);
 	$ADDU	$t_2,$at
 	$ADDU	$c_1,$t_2
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_1,$b_1)
+	mfhi	($t_2,$a_1,$b_1)
 	$ADDU	$c_3,$t_1
 	sltu	$at,$c_3,$t_1
-	$MULTU	$a_0,$b_2		# mul_add_c(a[0],b[2],c3,c1,c2);
+	$MULTU	($a_0,$b_2)		# mul_add_c(a[0],b[2],c3,c1,c2);
 	$ADDU	$t_2,$at
 	$ADDU	$c_1,$t_2
 	sltu	$c_2,$c_1,$t_2
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_0,$b_2)
+	mfhi	($t_2,$a_0,$b_2)
 	$ADDU	$c_3,$t_1
 	sltu	$at,$c_3,$t_1
-	 $MULTU	$a_0,$b_3		# mul_add_c(a[0],b[3],c1,c2,c3);
+	 $MULTU	($a_0,$b_3)		# mul_add_c(a[0],b[3],c1,c2,c3);
 	$ADDU	$t_2,$at
 	$ADDU	$c_1,$t_2
 	sltu	$at,$c_1,$t_2
 	$ADDU	$c_2,$at
 	$ST	$c_3,2*$BNSZ($a0)	# r[2]=c3;
 
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_0,$b_3)
+	mfhi	($t_2,$a_0,$b_3)
 	$ADDU	$c_1,$t_1
 	sltu	$at,$c_1,$t_1
-	$MULTU	$a_1,$b_2		# mul_add_c(a[1],b[2],c1,c2,c3);
+	$MULTU	($a_1,$b_2)		# mul_add_c(a[1],b[2],c1,c2,c3);
 	$ADDU	$t_2,$at
 	$ADDU	$c_2,$t_2
 	sltu	$c_3,$c_2,$t_2
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_1,$b_2)
+	mfhi	($t_2,$a_1,$b_2)
 	$ADDU	$c_1,$t_1
 	sltu	$at,$c_1,$t_1
-	$MULTU	$a_2,$b_1		# mul_add_c(a[2],b[1],c1,c2,c3);
+	$MULTU	($a_2,$b_1)		# mul_add_c(a[2],b[1],c1,c2,c3);
 	$ADDU	$t_2,$at
 	$ADDU	$c_2,$t_2
 	sltu	$at,$c_2,$t_2
 	$ADDU	$c_3,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_2,$b_1)
+	mfhi	($t_2,$a_2,$b_1)
 	$ADDU	$c_1,$t_1
 	sltu	$at,$c_1,$t_1
-	$MULTU	$a_3,$b_0		# mul_add_c(a[3],b[0],c1,c2,c3);
+	$MULTU	($a_3,$b_0)		# mul_add_c(a[3],b[0],c1,c2,c3);
 	$ADDU	$t_2,$at
 	$ADDU	$c_2,$t_2
 	sltu	$at,$c_2,$t_2
 	$ADDU	$c_3,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_3,$b_0)
+	mfhi	($t_2,$a_3,$b_0)
 	$ADDU	$c_1,$t_1
 	sltu	$at,$c_1,$t_1
-	 $MULTU	$a_4,$b_0		# mul_add_c(a[4],b[0],c2,c3,c1);
+	 $MULTU	($a_4,$b_0)		# mul_add_c(a[4],b[0],c2,c3,c1);
 	$ADDU	$t_2,$at
 	$ADDU	$c_2,$t_2
 	sltu	$at,$c_2,$t_2
 	$ADDU	$c_3,$at
 	$ST	$c_1,3*$BNSZ($a0)	# r[3]=c1;
 
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_4,$b_0)
+	mfhi	($t_2,$a_4,$b_0)
 	$ADDU	$c_2,$t_1
 	sltu	$at,$c_2,$t_1
-	$MULTU	$a_3,$b_1		# mul_add_c(a[3],b[1],c2,c3,c1);
+	$MULTU	($a_3,$b_1)		# mul_add_c(a[3],b[1],c2,c3,c1);
 	$ADDU	$t_2,$at
 	$ADDU	$c_3,$t_2
 	sltu	$c_1,$c_3,$t_2
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_3,$b_1)
+	mfhi	($t_2,$a_3,$b_1)
 	$ADDU	$c_2,$t_1
 	sltu	$at,$c_2,$t_1
-	$MULTU	$a_2,$b_2		# mul_add_c(a[2],b[2],c2,c3,c1);
+	$MULTU	($a_2,$b_2)		# mul_add_c(a[2],b[2],c2,c3,c1);
 	$ADDU	$t_2,$at
 	$ADDU	$c_3,$t_2
 	sltu	$at,$c_3,$t_2
 	$ADDU	$c_1,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_2,$b_2)
+	mfhi	($t_2,$a_2,$b_2)
 	$ADDU	$c_2,$t_1
 	sltu	$at,$c_2,$t_1
-	$MULTU	$a_1,$b_3		# mul_add_c(a[1],b[3],c2,c3,c1);
+	$MULTU	($a_1,$b_3)		# mul_add_c(a[1],b[3],c2,c3,c1);
 	$ADDU	$t_2,$at
 	$ADDU	$c_3,$t_2
 	sltu	$at,$c_3,$t_2
 	$ADDU	$c_1,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_1,$b_3)
+	mfhi	($t_2,$a_1,$b_3)
 	$ADDU	$c_2,$t_1
 	sltu	$at,$c_2,$t_1
-	$MULTU	$a_0,$b_4		# mul_add_c(a[0],b[4],c2,c3,c1);
+	$MULTU	($a_0,$b_4)		# mul_add_c(a[0],b[4],c2,c3,c1);
 	$ADDU	$t_2,$at
 	$ADDU	$c_3,$t_2
 	sltu	$at,$c_3,$t_2
 	$ADDU	$c_1,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_0,$b_4)
+	mfhi	($t_2,$a_0,$b_4)
 	$ADDU	$c_2,$t_1
 	sltu	$at,$c_2,$t_1
-	 $MULTU	$a_0,$b_5		# mul_add_c(a[0],b[5],c3,c1,c2);
+	 $MULTU	($a_0,$b_5)		# mul_add_c(a[0],b[5],c3,c1,c2);
 	$ADDU	$t_2,$at
 	$ADDU	$c_3,$t_2
 	sltu	$at,$c_3,$t_2
 	$ADDU	$c_1,$at
 	$ST	$c_2,4*$BNSZ($a0)	# r[4]=c2;
 
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_0,$b_5)
+	mfhi	($t_2,$a_0,$b_5)
 	$ADDU	$c_3,$t_1
 	sltu	$at,$c_3,$t_1
-	$MULTU	$a_1,$b_4		# mul_add_c(a[1],b[4],c3,c1,c2);
+	$MULTU	($a_1,$b_4)		# mul_add_c(a[1],b[4],c3,c1,c2);
 	$ADDU	$t_2,$at
 	$ADDU	$c_1,$t_2
 	sltu	$c_2,$c_1,$t_2
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_1,$b_4)
+	mfhi	($t_2,$a_1,$b_4)
 	$ADDU	$c_3,$t_1
 	sltu	$at,$c_3,$t_1
-	$MULTU	$a_2,$b_3		# mul_add_c(a[2],b[3],c3,c1,c2);
+	$MULTU	($a_2,$b_3)		# mul_add_c(a[2],b[3],c3,c1,c2);
 	$ADDU	$t_2,$at
 	$ADDU	$c_1,$t_2
 	sltu	$at,$c_1,$t_2
 	$ADDU	$c_2,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_2,$b_3)
+	mfhi	($t_2,$a_2,$b_3)
 	$ADDU	$c_3,$t_1
 	sltu	$at,$c_3,$t_1
-	$MULTU	$a_3,$b_2		# mul_add_c(a[3],b[2],c3,c1,c2);
+	$MULTU	($a_3,$b_2)		# mul_add_c(a[3],b[2],c3,c1,c2);
 	$ADDU	$t_2,$at
 	$ADDU	$c_1,$t_2
 	sltu	$at,$c_1,$t_2
 	$ADDU	$c_2,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_3,$b_2)
+	mfhi	($t_2,$a_3,$b_2)
 	$ADDU	$c_3,$t_1
 	sltu	$at,$c_3,$t_1
-	$MULTU	$a_4,$b_1		# mul_add_c(a[4],b[1],c3,c1,c2);
+	$MULTU	($a_4,$b_1)		# mul_add_c(a[4],b[1],c3,c1,c2);
 	$ADDU	$t_2,$at
 	$ADDU	$c_1,$t_2
 	sltu	$at,$c_1,$t_2
 	$ADDU	$c_2,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_4,$b_1)
+	mfhi	($t_2,$a_4,$b_1)
 	$ADDU	$c_3,$t_1
 	sltu	$at,$c_3,$t_1
-	$MULTU	$a_5,$b_0		# mul_add_c(a[5],b[0],c3,c1,c2);
+	$MULTU	($a_5,$b_0)		# mul_add_c(a[5],b[0],c3,c1,c2);
 	$ADDU	$t_2,$at
 	$ADDU	$c_1,$t_2
 	sltu	$at,$c_1,$t_2
 	$ADDU	$c_2,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_5,$b_0)
+	mfhi	($t_2,$a_5,$b_0)
 	$ADDU	$c_3,$t_1
 	sltu	$at,$c_3,$t_1
-	 $MULTU	$a_6,$b_0		# mul_add_c(a[6],b[0],c1,c2,c3);
+	 $MULTU	($a_6,$b_0)		# mul_add_c(a[6],b[0],c1,c2,c3);
 	$ADDU	$t_2,$at
 	$ADDU	$c_1,$t_2
 	sltu	$at,$c_1,$t_2
 	$ADDU	$c_2,$at
 	$ST	$c_3,5*$BNSZ($a0)	# r[5]=c3;
 
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_6,$b_0)
+	mfhi	($t_2,$a_6,$b_0)
 	$ADDU	$c_1,$t_1
 	sltu	$at,$c_1,$t_1
-	$MULTU	$a_5,$b_1		# mul_add_c(a[5],b[1],c1,c2,c3);
+	$MULTU	($a_5,$b_1)		# mul_add_c(a[5],b[1],c1,c2,c3);
 	$ADDU	$t_2,$at
 	$ADDU	$c_2,$t_2
 	sltu	$c_3,$c_2,$t_2
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_5,$b_1)
+	mfhi	($t_2,$a_5,$b_1)
 	$ADDU	$c_1,$t_1
 	sltu	$at,$c_1,$t_1
-	$MULTU	$a_4,$b_2		# mul_add_c(a[4],b[2],c1,c2,c3);
+	$MULTU	($a_4,$b_2)		# mul_add_c(a[4],b[2],c1,c2,c3);
 	$ADDU	$t_2,$at
 	$ADDU	$c_2,$t_2
 	sltu	$at,$c_2,$t_2
 	$ADDU	$c_3,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_4,$b_2)
+	mfhi	($t_2,$a_4,$b_2)
 	$ADDU	$c_1,$t_1
 	sltu	$at,$c_1,$t_1
-	$MULTU	$a_3,$b_3		# mul_add_c(a[3],b[3],c1,c2,c3);
+	$MULTU	($a_3,$b_3)		# mul_add_c(a[3],b[3],c1,c2,c3);
 	$ADDU	$t_2,$at
 	$ADDU	$c_2,$t_2
 	sltu	$at,$c_2,$t_2
 	$ADDU	$c_3,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_3,$b_3)
+	mfhi	($t_2,$a_3,$b_3)
 	$ADDU	$c_1,$t_1
 	sltu	$at,$c_1,$t_1
-	$MULTU	$a_2,$b_4		# mul_add_c(a[2],b[4],c1,c2,c3);
+	$MULTU	($a_2,$b_4)		# mul_add_c(a[2],b[4],c1,c2,c3);
 	$ADDU	$t_2,$at
 	$ADDU	$c_2,$t_2
 	sltu	$at,$c_2,$t_2
 	$ADDU	$c_3,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_2,$b_4)
+	mfhi	($t_2,$a_2,$b_4)
 	$ADDU	$c_1,$t_1
 	sltu	$at,$c_1,$t_1
-	$MULTU	$a_1,$b_5		# mul_add_c(a[1],b[5],c1,c2,c3);
+	$MULTU	($a_1,$b_5)		# mul_add_c(a[1],b[5],c1,c2,c3);
 	$ADDU	$t_2,$at
 	$ADDU	$c_2,$t_2
 	sltu	$at,$c_2,$t_2
 	$ADDU	$c_3,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_1,$b_5)
+	mfhi	($t_2,$a_1,$b_5)
 	$ADDU	$c_1,$t_1
 	sltu	$at,$c_1,$t_1
-	$MULTU	$a_0,$b_6		# mul_add_c(a[0],b[6],c1,c2,c3);
+	$MULTU	($a_0,$b_6)		# mul_add_c(a[0],b[6],c1,c2,c3);
 	$ADDU	$t_2,$at
 	$ADDU	$c_2,$t_2
 	sltu	$at,$c_2,$t_2
 	$ADDU	$c_3,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_0,$b_6)
+	mfhi	($t_2,$a_0,$b_6)
 	$ADDU	$c_1,$t_1
 	sltu	$at,$c_1,$t_1
-	 $MULTU	$a_0,$b_7		# mul_add_c(a[0],b[7],c2,c3,c1);
+	 $MULTU	($a_0,$b_7)		# mul_add_c(a[0],b[7],c2,c3,c1);
 	$ADDU	$t_2,$at
 	$ADDU	$c_2,$t_2
 	sltu	$at,$c_2,$t_2
 	$ADDU	$c_3,$at
 	$ST	$c_1,6*$BNSZ($a0)	# r[6]=c1;
 
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_0,$b_7)
+	mfhi	($t_2,$a_0,$b_7)
 	$ADDU	$c_2,$t_1
 	sltu	$at,$c_2,$t_1
-	$MULTU	$a_1,$b_6		# mul_add_c(a[1],b[6],c2,c3,c1);
+	$MULTU	($a_1,$b_6)		# mul_add_c(a[1],b[6],c2,c3,c1);
 	$ADDU	$t_2,$at
 	$ADDU	$c_3,$t_2
 	sltu	$c_1,$c_3,$t_2
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_1,$b_6)
+	mfhi	($t_2,$a_1,$b_6)
 	$ADDU	$c_2,$t_1
 	sltu	$at,$c_2,$t_1
-	$MULTU	$a_2,$b_5		# mul_add_c(a[2],b[5],c2,c3,c1);
+	$MULTU	($a_2,$b_5)		# mul_add_c(a[2],b[5],c2,c3,c1);
 	$ADDU	$t_2,$at
 	$ADDU	$c_3,$t_2
 	sltu	$at,$c_3,$t_2
 	$ADDU	$c_1,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_2,$b_5)
+	mfhi	($t_2,$a_2,$b_5)
 	$ADDU	$c_2,$t_1
 	sltu	$at,$c_2,$t_1
-	$MULTU	$a_3,$b_4		# mul_add_c(a[3],b[4],c2,c3,c1);
+	$MULTU	($a_3,$b_4)		# mul_add_c(a[3],b[4],c2,c3,c1);
 	$ADDU	$t_2,$at
 	$ADDU	$c_3,$t_2
 	sltu	$at,$c_3,$t_2
 	$ADDU	$c_1,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_3,$b_4)
+	mfhi	($t_2,$a_3,$b_4)
 	$ADDU	$c_2,$t_1
 	sltu	$at,$c_2,$t_1
-	$MULTU	$a_4,$b_3		# mul_add_c(a[4],b[3],c2,c3,c1);
+	$MULTU	($a_4,$b_3)		# mul_add_c(a[4],b[3],c2,c3,c1);
 	$ADDU	$t_2,$at
 	$ADDU	$c_3,$t_2
 	sltu	$at,$c_3,$t_2
 	$ADDU	$c_1,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_4,$b_3)
+	mfhi	($t_2,$a_4,$b_3)
 	$ADDU	$c_2,$t_1
 	sltu	$at,$c_2,$t_1
-	$MULTU	$a_5,$b_2		# mul_add_c(a[5],b[2],c2,c3,c1);
+	$MULTU	($a_5,$b_2)		# mul_add_c(a[5],b[2],c2,c3,c1);
 	$ADDU	$t_2,$at
 	$ADDU	$c_3,$t_2
 	sltu	$at,$c_3,$t_2
 	$ADDU	$c_1,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_5,$b_2)
+	mfhi	($t_2,$a_5,$b_2)
 	$ADDU	$c_2,$t_1
 	sltu	$at,$c_2,$t_1
-	$MULTU	$a_6,$b_1		# mul_add_c(a[6],b[1],c2,c3,c1);
+	$MULTU	($a_6,$b_1)		# mul_add_c(a[6],b[1],c2,c3,c1);
 	$ADDU	$t_2,$at
 	$ADDU	$c_3,$t_2
 	sltu	$at,$c_3,$t_2
 	$ADDU	$c_1,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_6,$b_1)
+	mfhi	($t_2,$a_6,$b_1)
 	$ADDU	$c_2,$t_1
 	sltu	$at,$c_2,$t_1
-	$MULTU	$a_7,$b_0		# mul_add_c(a[7],b[0],c2,c3,c1);
+	$MULTU	($a_7,$b_0)		# mul_add_c(a[7],b[0],c2,c3,c1);
 	$ADDU	$t_2,$at
 	$ADDU	$c_3,$t_2
 	sltu	$at,$c_3,$t_2
 	$ADDU	$c_1,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_7,$b_0)
+	mfhi	($t_2,$a_7,$b_0)
 	$ADDU	$c_2,$t_1
 	sltu	$at,$c_2,$t_1
-	 $MULTU	$a_7,$b_1		# mul_add_c(a[7],b[1],c3,c1,c2);
+	 $MULTU	($a_7,$b_1)		# mul_add_c(a[7],b[1],c3,c1,c2);
 	$ADDU	$t_2,$at
 	$ADDU	$c_3,$t_2
 	sltu	$at,$c_3,$t_2
 	$ADDU	$c_1,$at
 	$ST	$c_2,7*$BNSZ($a0)	# r[7]=c2;
 
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_7,$b_1)
+	mfhi	($t_2,$a_7,$b_1)
 	$ADDU	$c_3,$t_1
 	sltu	$at,$c_3,$t_1
-	$MULTU	$a_6,$b_2		# mul_add_c(a[6],b[2],c3,c1,c2);
+	$MULTU	($a_6,$b_2)		# mul_add_c(a[6],b[2],c3,c1,c2);
 	$ADDU	$t_2,$at
 	$ADDU	$c_1,$t_2
 	sltu	$c_2,$c_1,$t_2
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_6,$b_2)
+	mfhi	($t_2,$a_6,$b_2)
 	$ADDU	$c_3,$t_1
 	sltu	$at,$c_3,$t_1
-	$MULTU	$a_5,$b_3		# mul_add_c(a[5],b[3],c3,c1,c2);
+	$MULTU	($a_5,$b_3)		# mul_add_c(a[5],b[3],c3,c1,c2);
 	$ADDU	$t_2,$at
 	$ADDU	$c_1,$t_2
 	sltu	$at,$c_1,$t_2
 	$ADDU	$c_2,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_5,$b_3)
+	mfhi	($t_2,$a_5,$b_3)
 	$ADDU	$c_3,$t_1
 	sltu	$at,$c_3,$t_1
-	$MULTU	$a_4,$b_4		# mul_add_c(a[4],b[4],c3,c1,c2);
+	$MULTU	($a_4,$b_4)		# mul_add_c(a[4],b[4],c3,c1,c2);
 	$ADDU	$t_2,$at
 	$ADDU	$c_1,$t_2
 	sltu	$at,$c_1,$t_2
 	$ADDU	$c_2,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_4,$b_4)
+	mfhi	($t_2,$a_4,$b_4)
 	$ADDU	$c_3,$t_1
 	sltu	$at,$c_3,$t_1
-	$MULTU	$a_3,$b_5		# mul_add_c(a[3],b[5],c3,c1,c2);
+	$MULTU	($a_3,$b_5)		# mul_add_c(a[3],b[5],c3,c1,c2);
 	$ADDU	$t_2,$at
 	$ADDU	$c_1,$t_2
 	sltu	$at,$c_1,$t_2
 	$ADDU	$c_2,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_3,$b_5)
+	mfhi	($t_2,$a_3,$b_5)
 	$ADDU	$c_3,$t_1
 	sltu	$at,$c_3,$t_1
-	$MULTU	$a_2,$b_6		# mul_add_c(a[2],b[6],c3,c1,c2);
+	$MULTU	($a_2,$b_6)		# mul_add_c(a[2],b[6],c3,c1,c2);
 	$ADDU	$t_2,$at
 	$ADDU	$c_1,$t_2
 	sltu	$at,$c_1,$t_2
 	$ADDU	$c_2,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_2,$b_6)
+	mfhi	($t_2,$a_2,$b_6)
 	$ADDU	$c_3,$t_1
 	sltu	$at,$c_3,$t_1
-	$MULTU	$a_1,$b_7		# mul_add_c(a[1],b[7],c3,c1,c2);
+	$MULTU	($a_1,$b_7)		# mul_add_c(a[1],b[7],c3,c1,c2);
 	$ADDU	$t_2,$at
 	$ADDU	$c_1,$t_2
 	sltu	$at,$c_1,$t_2
 	$ADDU	$c_2,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_1,$b_7)
+	mfhi	($t_2,$a_1,$b_7)
 	$ADDU	$c_3,$t_1
 	sltu	$at,$c_3,$t_1
-	 $MULTU	$a_2,$b_7		# mul_add_c(a[2],b[7],c1,c2,c3);
+	 $MULTU	($a_2,$b_7)		# mul_add_c(a[2],b[7],c1,c2,c3);
 	$ADDU	$t_2,$at
 	$ADDU	$c_1,$t_2
 	sltu	$at,$c_1,$t_2
 	$ADDU	$c_2,$at
 	$ST	$c_3,8*$BNSZ($a0)	# r[8]=c3;
 
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_2,$b_7)
+	mfhi	($t_2,$a_2,$b_7)
 	$ADDU	$c_1,$t_1
 	sltu	$at,$c_1,$t_1
-	$MULTU	$a_3,$b_6		# mul_add_c(a[3],b[6],c1,c2,c3);
+	$MULTU	($a_3,$b_6)		# mul_add_c(a[3],b[6],c1,c2,c3);
 	$ADDU	$t_2,$at
 	$ADDU	$c_2,$t_2
 	sltu	$c_3,$c_2,$t_2
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_3,$b_6)
+	mfhi	($t_2,$a_3,$b_6)
 	$ADDU	$c_1,$t_1
 	sltu	$at,$c_1,$t_1
-	$MULTU	$a_4,$b_5		# mul_add_c(a[4],b[5],c1,c2,c3);
+	$MULTU	($a_4,$b_5)		# mul_add_c(a[4],b[5],c1,c2,c3);
 	$ADDU	$t_2,$at
 	$ADDU	$c_2,$t_2
 	sltu	$at,$c_2,$t_2
 	$ADDU	$c_3,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_4,$b_5)
+	mfhi	($t_2,$a_4,$b_5)
 	$ADDU	$c_1,$t_1
 	sltu	$at,$c_1,$t_1
-	$MULTU	$a_5,$b_4		# mul_add_c(a[5],b[4],c1,c2,c3);
+	$MULTU	($a_5,$b_4)		# mul_add_c(a[5],b[4],c1,c2,c3);
 	$ADDU	$t_2,$at
 	$ADDU	$c_2,$t_2
 	sltu	$at,$c_2,$t_2
 	$ADDU	$c_3,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_5,$b_4)
+	mfhi	($t_2,$a_5,$b_4)
 	$ADDU	$c_1,$t_1
 	sltu	$at,$c_1,$t_1
-	$MULTU	$a_6,$b_3		# mul_add_c(a[6],b[3],c1,c2,c3);
+	$MULTU	($a_6,$b_3)		# mul_add_c(a[6],b[3],c1,c2,c3);
 	$ADDU	$t_2,$at
 	$ADDU	$c_2,$t_2
 	sltu	$at,$c_2,$t_2
 	$ADDU	$c_3,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_6,$b_3)
+	mfhi	($t_2,$a_6,$b_3)
 	$ADDU	$c_1,$t_1
 	sltu	$at,$c_1,$t_1
-	$MULTU	$a_7,$b_2		# mul_add_c(a[7],b[2],c1,c2,c3);
+	$MULTU	($a_7,$b_2)		# mul_add_c(a[7],b[2],c1,c2,c3);
 	$ADDU	$t_2,$at
 	$ADDU	$c_2,$t_2
 	sltu	$at,$c_2,$t_2
 	$ADDU	$c_3,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_7,$b_2)
+	mfhi	($t_2,$a_7,$b_2)
 	$ADDU	$c_1,$t_1
 	sltu	$at,$c_1,$t_1
-	 $MULTU	$a_7,$b_3		# mul_add_c(a[7],b[3],c2,c3,c1);
+	 $MULTU	($a_7,$b_3)		# mul_add_c(a[7],b[3],c2,c3,c1);
 	$ADDU	$t_2,$at
 	$ADDU	$c_2,$t_2
 	sltu	$at,$c_2,$t_2
 	$ADDU	$c_3,$at
 	$ST	$c_1,9*$BNSZ($a0)	# r[9]=c1;
 
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_7,$b_3)
+	mfhi	($t_2,$a_7,$b_3)
 	$ADDU	$c_2,$t_1
 	sltu	$at,$c_2,$t_1
-	$MULTU	$a_6,$b_4		# mul_add_c(a[6],b[4],c2,c3,c1);
+	$MULTU	($a_6,$b_4)		# mul_add_c(a[6],b[4],c2,c3,c1);
 	$ADDU	$t_2,$at
 	$ADDU	$c_3,$t_2
 	sltu	$c_1,$c_3,$t_2
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_6,$b_4)
+	mfhi	($t_2,$a_6,$b_4)
 	$ADDU	$c_2,$t_1
 	sltu	$at,$c_2,$t_1
-	$MULTU	$a_5,$b_5		# mul_add_c(a[5],b[5],c2,c3,c1);
+	$MULTU	($a_5,$b_5)		# mul_add_c(a[5],b[5],c2,c3,c1);
 	$ADDU	$t_2,$at
 	$ADDU	$c_3,$t_2
 	sltu	$at,$c_3,$t_2
 	$ADDU	$c_1,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_5,$b_5)
+	mfhi	($t_2,$a_5,$b_5)
 	$ADDU	$c_2,$t_1
 	sltu	$at,$c_2,$t_1
-	$MULTU	$a_4,$b_6		# mul_add_c(a[4],b[6],c2,c3,c1);
+	$MULTU	($a_4,$b_6)		# mul_add_c(a[4],b[6],c2,c3,c1);
 	$ADDU	$t_2,$at
 	$ADDU	$c_3,$t_2
 	sltu	$at,$c_3,$t_2
 	$ADDU	$c_1,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_4,$b_6)
+	mfhi	($t_2,$a_4,$b_6)
 	$ADDU	$c_2,$t_1
 	sltu	$at,$c_2,$t_1
-	$MULTU	$a_3,$b_7		# mul_add_c(a[3],b[7],c2,c3,c1);
+	$MULTU	($a_3,$b_7)		# mul_add_c(a[3],b[7],c2,c3,c1);
 	$ADDU	$t_2,$at
 	$ADDU	$c_3,$t_2
 	sltu	$at,$c_3,$t_2
 	$ADDU	$c_1,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_3,$b_7)
+	mfhi	($t_2,$a_3,$b_7)
 	$ADDU	$c_2,$t_1
 	sltu	$at,$c_2,$t_1
-	$MULTU	$a_4,$b_7		# mul_add_c(a[4],b[7],c3,c1,c2);
+	$MULTU	($a_4,$b_7)		# mul_add_c(a[4],b[7],c3,c1,c2);
 	$ADDU	$t_2,$at
 	$ADDU	$c_3,$t_2
 	sltu	$at,$c_3,$t_2
 	$ADDU	$c_1,$at
 	$ST	$c_2,10*$BNSZ($a0)	# r[10]=c2;
 
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_4,$b_7)
+	mfhi	($t_2,$a_4,$b_7)
 	$ADDU	$c_3,$t_1
 	sltu	$at,$c_3,$t_1
-	$MULTU	$a_5,$b_6		# mul_add_c(a[5],b[6],c3,c1,c2);
+	$MULTU	($a_5,$b_6)		# mul_add_c(a[5],b[6],c3,c1,c2);
 	$ADDU	$t_2,$at
 	$ADDU	$c_1,$t_2
 	sltu	$c_2,$c_1,$t_2
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_5,$b_6)
+	mfhi	($t_2,$a_5,$b_6)
 	$ADDU	$c_3,$t_1
 	sltu	$at,$c_3,$t_1
-	$MULTU	$a_6,$b_5		# mul_add_c(a[6],b[5],c3,c1,c2);
+	$MULTU	($a_6,$b_5)		# mul_add_c(a[6],b[5],c3,c1,c2);
 	$ADDU	$t_2,$at
 	$ADDU	$c_1,$t_2
 	sltu	$at,$c_1,$t_2
 	$ADDU	$c_2,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_6,$b_5)
+	mfhi	($t_2,$a_6,$b_5)
 	$ADDU	$c_3,$t_1
 	sltu	$at,$c_3,$t_1
-	$MULTU	$a_7,$b_4		# mul_add_c(a[7],b[4],c3,c1,c2);
+	$MULTU	($a_7,$b_4)		# mul_add_c(a[7],b[4],c3,c1,c2);
 	$ADDU	$t_2,$at
 	$ADDU	$c_1,$t_2
 	sltu	$at,$c_1,$t_2
 	$ADDU	$c_2,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_7,$b_4)
+	mfhi	($t_2,$a_7,$b_4)
 	$ADDU	$c_3,$t_1
 	sltu	$at,$c_3,$t_1
-	 $MULTU	$a_7,$b_5		# mul_add_c(a[7],b[5],c1,c2,c3);
+	 $MULTU	($a_7,$b_5)		# mul_add_c(a[7],b[5],c1,c2,c3);
 	$ADDU	$t_2,$at
 	$ADDU	$c_1,$t_2
 	sltu	$at,$c_1,$t_2
 	$ADDU	$c_2,$at
 	$ST	$c_3,11*$BNSZ($a0)	# r[11]=c3;
 
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_7,$b_5)
+	mfhi	($t_2,$a_7,$b_5)
 	$ADDU	$c_1,$t_1
 	sltu	$at,$c_1,$t_1
-	$MULTU	$a_6,$b_6		# mul_add_c(a[6],b[6],c1,c2,c3);
+	$MULTU	($a_6,$b_6)		# mul_add_c(a[6],b[6],c1,c2,c3);
 	$ADDU	$t_2,$at
 	$ADDU	$c_2,$t_2
 	sltu	$c_3,$c_2,$t_2
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_6,$b_6)
+	mfhi	($t_2,$a_6,$b_6)
 	$ADDU	$c_1,$t_1
 	sltu	$at,$c_1,$t_1
-	$MULTU	$a_5,$b_7		# mul_add_c(a[5],b[7],c1,c2,c3);
+	$MULTU	($a_5,$b_7)		# mul_add_c(a[5],b[7],c1,c2,c3);
 	$ADDU	$t_2,$at
 	$ADDU	$c_2,$t_2
 	sltu	$at,$c_2,$t_2
 	$ADDU	$c_3,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_5,$b_7)
+	mfhi	($t_2,$a_5,$b_7)
 	$ADDU	$c_1,$t_1
 	sltu	$at,$c_1,$t_1
-	 $MULTU	$a_6,$b_7		# mul_add_c(a[6],b[7],c2,c3,c1);
+	 $MULTU	($a_6,$b_7)		# mul_add_c(a[6],b[7],c2,c3,c1);
 	$ADDU	$t_2,$at
 	$ADDU	$c_2,$t_2
 	sltu	$at,$c_2,$t_2
 	$ADDU	$c_3,$at
 	$ST	$c_1,12*$BNSZ($a0)	# r[12]=c1;
 
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_6,$b_7)
+	mfhi	($t_2,$a_6,$b_7)
 	$ADDU	$c_2,$t_1
 	sltu	$at,$c_2,$t_1
-	$MULTU	$a_7,$b_6		# mul_add_c(a[7],b[6],c2,c3,c1);
+	$MULTU	($a_7,$b_6)		# mul_add_c(a[7],b[6],c2,c3,c1);
 	$ADDU	$t_2,$at
 	$ADDU	$c_3,$t_2
 	sltu	$c_1,$c_3,$t_2
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_7,$b_6)
+	mfhi	($t_2,$a_7,$b_6)
 	$ADDU	$c_2,$t_1
 	sltu	$at,$c_2,$t_1
-	$MULTU	$a_7,$b_7		# mul_add_c(a[7],b[7],c3,c1,c2);
+	$MULTU	($a_7,$b_7)		# mul_add_c(a[7],b[7],c3,c1,c2);
 	$ADDU	$t_2,$at
 	$ADDU	$c_3,$t_2
 	sltu	$at,$c_3,$t_2
 	$ADDU	$c_1,$at
 	$ST	$c_2,13*$BNSZ($a0)	# r[13]=c2;
 
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_7,$b_7)
+	mfhi	($t_2,$a_7,$b_7)
 	$ADDU	$c_3,$t_1
 	sltu	$at,$c_3,$t_1
 	$ADDU	$t_2,$at
@@ -1716,144 +1732,144 @@ $code.=<<___;
 	$LD	$b_0,0($a2)
 	$LD	$a_1,$BNSZ($a1)
 	$LD	$a_2,2*$BNSZ($a1)
-	$MULTU	$a_0,$b_0		# mul_add_c(a[0],b[0],c1,c2,c3);
+	$MULTU	($a_0,$b_0)		# mul_add_c(a[0],b[0],c1,c2,c3);
 	$LD	$a_3,3*$BNSZ($a1)
 	$LD	$b_1,$BNSZ($a2)
 	$LD	$b_2,2*$BNSZ($a2)
 	$LD	$b_3,3*$BNSZ($a2)
-	mflo	$c_1
-	mfhi	$c_2
+	mflo	($c_1,$a_0,$b_0)
+	mfhi	($c_2,$a_0,$b_0)
 	$ST	$c_1,0($a0)
 
-	$MULTU	$a_0,$b_1		# mul_add_c(a[0],b[1],c2,c3,c1);
-	mflo	$t_1
-	mfhi	$t_2
+	$MULTU	($a_0,$b_1)		# mul_add_c(a[0],b[1],c2,c3,c1);
+	mflo	($t_1,$a_0,$b_1)
+	mfhi	($t_2,$a_0,$b_1)
 	$ADDU	$c_2,$t_1
 	sltu	$at,$c_2,$t_1
-	$MULTU	$a_1,$b_0		# mul_add_c(a[1],b[0],c2,c3,c1);
+	$MULTU	($a_1,$b_0)		# mul_add_c(a[1],b[0],c2,c3,c1);
 	$ADDU	$c_3,$t_2,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_1,$b_0)
+	mfhi	($t_2,$a_1,$b_0)
 	$ADDU	$c_2,$t_1
 	sltu	$at,$c_2,$t_1
-	 $MULTU	$a_2,$b_0		# mul_add_c(a[2],b[0],c3,c1,c2);
+	 $MULTU	($a_2,$b_0)		# mul_add_c(a[2],b[0],c3,c1,c2);
 	$ADDU	$t_2,$at
 	$ADDU	$c_3,$t_2
 	sltu	$c_1,$c_3,$t_2
 	$ST	$c_2,$BNSZ($a0)
 
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_2,$b_0)
+	mfhi	($t_2,$a_2,$b_0)
 	$ADDU	$c_3,$t_1
 	sltu	$at,$c_3,$t_1
-	$MULTU	$a_1,$b_1		# mul_add_c(a[1],b[1],c3,c1,c2);
+	$MULTU	($a_1,$b_1)		# mul_add_c(a[1],b[1],c3,c1,c2);
 	$ADDU	$t_2,$at
 	$ADDU	$c_1,$t_2
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_1,$b_1)
+	mfhi	($t_2,$a_1,$b_1)
 	$ADDU	$c_3,$t_1
 	sltu	$at,$c_3,$t_1
-	$MULTU	$a_0,$b_2		# mul_add_c(a[0],b[2],c3,c1,c2);
+	$MULTU	($a_0,$b_2)		# mul_add_c(a[0],b[2],c3,c1,c2);
 	$ADDU	$t_2,$at
 	$ADDU	$c_1,$t_2
 	sltu	$c_2,$c_1,$t_2
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_0,$b_2)
+	mfhi	($t_2,$a_0,$b_2)
 	$ADDU	$c_3,$t_1
 	sltu	$at,$c_3,$t_1
-	 $MULTU	$a_0,$b_3		# mul_add_c(a[0],b[3],c1,c2,c3);
+	 $MULTU	($a_0,$b_3)		# mul_add_c(a[0],b[3],c1,c2,c3);
 	$ADDU	$t_2,$at
 	$ADDU	$c_1,$t_2
 	sltu	$at,$c_1,$t_2
 	$ADDU	$c_2,$at
 	$ST	$c_3,2*$BNSZ($a0)
 
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_0,$b_3)
+	mfhi	($t_2,$a_0,$b_3)
 	$ADDU	$c_1,$t_1
 	sltu	$at,$c_1,$t_1
-	$MULTU	$a_1,$b_2		# mul_add_c(a[1],b[2],c1,c2,c3);
+	$MULTU	($a_1,$b_2)		# mul_add_c(a[1],b[2],c1,c2,c3);
 	$ADDU	$t_2,$at
 	$ADDU	$c_2,$t_2
 	sltu	$c_3,$c_2,$t_2
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_1,$b_2)
+	mfhi	($t_2,$a_1,$b_2)
 	$ADDU	$c_1,$t_1
 	sltu	$at,$c_1,$t_1
-	$MULTU	$a_2,$b_1		# mul_add_c(a[2],b[1],c1,c2,c3);
+	$MULTU	($a_2,$b_1)		# mul_add_c(a[2],b[1],c1,c2,c3);
 	$ADDU	$t_2,$at
 	$ADDU	$c_2,$t_2
 	sltu	$at,$c_2,$t_2
 	$ADDU	$c_3,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_2,$b_1)
+	mfhi	($t_2,$a_2,$b_1)
 	$ADDU	$c_1,$t_1
 	sltu	$at,$c_1,$t_1
-	$MULTU	$a_3,$b_0		# mul_add_c(a[3],b[0],c1,c2,c3);
+	$MULTU	($a_3,$b_0)		# mul_add_c(a[3],b[0],c1,c2,c3);
 	$ADDU	$t_2,$at
 	$ADDU	$c_2,$t_2
 	sltu	$at,$c_2,$t_2
 	$ADDU	$c_3,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_3,$b_0)
+	mfhi	($t_2,$a_3,$b_0)
 	$ADDU	$c_1,$t_1
 	sltu	$at,$c_1,$t_1
-	 $MULTU	$a_3,$b_1		# mul_add_c(a[3],b[1],c2,c3,c1);
+	 $MULTU	($a_3,$b_1)		# mul_add_c(a[3],b[1],c2,c3,c1);
 	$ADDU	$t_2,$at
 	$ADDU	$c_2,$t_2
 	sltu	$at,$c_2,$t_2
 	$ADDU	$c_3,$at
 	$ST	$c_1,3*$BNSZ($a0)
 
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_3,$b_1)
+	mfhi	($t_2,$a_3,$b_1)
 	$ADDU	$c_2,$t_1
 	sltu	$at,$c_2,$t_1
-	$MULTU	$a_2,$b_2		# mul_add_c(a[2],b[2],c2,c3,c1);
+	$MULTU	($a_2,$b_2)		# mul_add_c(a[2],b[2],c2,c3,c1);
 	$ADDU	$t_2,$at
 	$ADDU	$c_3,$t_2
 	sltu	$c_1,$c_3,$t_2
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_2,$b_2)
+	mfhi	($t_2,$a_2,$b_2)
 	$ADDU	$c_2,$t_1
 	sltu	$at,$c_2,$t_1
-	$MULTU	$a_1,$b_3		# mul_add_c(a[1],b[3],c2,c3,c1);
+	$MULTU	($a_1,$b_3)		# mul_add_c(a[1],b[3],c2,c3,c1);
 	$ADDU	$t_2,$at
 	$ADDU	$c_3,$t_2
 	sltu	$at,$c_3,$t_2
 	$ADDU	$c_1,$at
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_1,$b_3)
+	mfhi	($t_2,$a_1,$b_3)
 	$ADDU	$c_2,$t_1
 	sltu	$at,$c_2,$t_1
-	 $MULTU	$a_2,$b_3		# mul_add_c(a[2],b[3],c3,c1,c2);
+	 $MULTU	($a_2,$b_3)		# mul_add_c(a[2],b[3],c3,c1,c2);
 	$ADDU	$t_2,$at
 	$ADDU	$c_3,$t_2
 	sltu	$at,$c_3,$t_2
 	$ADDU	$c_1,$at
 	$ST	$c_2,4*$BNSZ($a0)
 
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_2,$b_3)
+	mfhi	($t_2,$a_2,$b_3)
 	$ADDU	$c_3,$t_1
 	sltu	$at,$c_3,$t_1
-	$MULTU	$a_3,$b_2		# mul_add_c(a[3],b[2],c3,c1,c2);
+	$MULTU	($a_3,$b_2)		# mul_add_c(a[3],b[2],c3,c1,c2);
 	$ADDU	$t_2,$at
 	$ADDU	$c_1,$t_2
 	sltu	$c_2,$c_1,$t_2
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_3,$b_2)
+	mfhi	($t_2,$a_3,$b_2)
 	$ADDU	$c_3,$t_1
 	sltu	$at,$c_3,$t_1
-	 $MULTU	$a_3,$b_3		# mul_add_c(a[3],b[3],c1,c2,c3);
+	 $MULTU	($a_3,$b_3)		# mul_add_c(a[3],b[3],c1,c2,c3);
 	$ADDU	$t_2,$at
 	$ADDU	$c_1,$t_2
 	sltu	$at,$c_1,$t_2
 	$ADDU	$c_2,$at
 	$ST	$c_3,5*$BNSZ($a0)
 
-	mflo	$t_1
-	mfhi	$t_2
+	mflo	($t_1,$a_3,$b_3)
+	mfhi	($t_2,$a_3,$b_3)
 	$ADDU	$c_1,$t_1
 	sltu	$at,$c_1,$t_1
 	$ADDU	$t_2,$at
@@ -1888,11 +1904,9 @@ my ($hi,$lo,$c0,$c1,$c2,
                 # commented as "forward multiplication" below];
     )=@_;
 $code.=<<___;
-	mflo	$lo
-	mfhi	$hi
 	$ADDU	$c0,$lo
 	sltu	$at,$c0,$lo
-	 $MULTU	$an,$bn			# forward multiplication
+	 $MULTU	($an,$bn)		# forward multiplication
 	$ADDU	$c0,$lo
 	$ADDU	$at,$hi
 	sltu	$lo,$c0,$lo
@@ -1902,15 +1916,17 @@ ___
 $code.=<<___	if (!$warm);
 	sltu	$c2,$c1,$at
 	$ADDU	$c1,$hi
-	sltu	$hi,$c1,$hi
-	$ADDU	$c2,$hi
 ___
 $code.=<<___	if ($warm);
 	sltu	$at,$c1,$at
 	$ADDU	$c1,$hi
 	$ADDU	$c2,$at
+___
+$code.=<<___;
 	sltu	$hi,$c1,$hi
 	$ADDU	$c2,$hi
+	mflo	($lo,$an,$bn)
+	mfhi	($hi,$an,$bn)
 ___
 }
 
@@ -1940,21 +1956,21 @@ $code.=<<___;
 	$LD	$a_2,2*$BNSZ($a1)
 	$LD	$a_3,3*$BNSZ($a1)
 
-	$MULTU	$a_0,$a_0		# mul_add_c(a[0],b[0],c1,c2,c3);
+	$MULTU	($a_0,$a_0)		# mul_add_c(a[0],b[0],c1,c2,c3);
 	$LD	$a_4,4*$BNSZ($a1)
 	$LD	$a_5,5*$BNSZ($a1)
 	$LD	$a_6,6*$BNSZ($a1)
 	$LD	$a_7,7*$BNSZ($a1)
-	mflo	$c_1
-	mfhi	$c_2
+	mflo	($c_1,$a_0,$a_0)
+	mfhi	($c_2,$a_0,$a_0)
 	$ST	$c_1,0($a0)
 
-	$MULTU	$a_0,$a_1		# mul_add_c2(a[0],b[1],c2,c3,c1);
-	mflo	$t_1
-	mfhi	$t_2
+	$MULTU	($a_0,$a_1)		# mul_add_c2(a[0],b[1],c2,c3,c1);
+	mflo	($t_1,$a_0,$a_1)
+	mfhi	($t_2,$a_0,$a_1)
 	slt	$c_1,$t_2,$zero
 	$SLL	$t_2,1
-	 $MULTU	$a_2,$a_0		# mul_add_c2(a[2],b[0],c3,c1,c2);
+	 $MULTU	($a_2,$a_0)		# mul_add_c2(a[2],b[0],c3,c1,c2);
 	slt	$a2,$t_1,$zero
 	$ADDU	$t_2,$a2
 	$SLL	$t_1,1
@@ -1962,20 +1978,22 @@ $code.=<<___;
 	sltu	$at,$c_2,$t_1
 	$ADDU	$c_3,$t_2,$at
 	$ST	$c_2,$BNSZ($a0)
+	mflo	($t_1,$a_2,$a_0)
+	mfhi	($t_2,$a_2,$a_0)
 ___
 	&add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
 		$a_1,$a_1);		# mul_add_c(a[1],b[1],c3,c1,c2);
 $code.=<<___;
-	mflo	$t_1
-	mfhi	$t_2
 	$ADDU	$c_3,$t_1
 	sltu	$at,$c_3,$t_1
-	 $MULTU	$a_0,$a_3		# mul_add_c2(a[0],b[3],c1,c2,c3);
+	 $MULTU	($a_0,$a_3)		# mul_add_c2(a[0],b[3],c1,c2,c3);
 	$ADDU	$t_2,$at
 	$ADDU	$c_1,$t_2
 	sltu	$at,$c_1,$t_2
 	$ADDU	$c_2,$at
 	$ST	$c_3,2*$BNSZ($a0)
+	mflo	($t_1,$a_0,$a_3)
+	mfhi	($t_2,$a_0,$a_3)
 ___
 	&add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
 		$a_1,$a_2);		# mul_add_c2(a[1],b[2],c1,c2,c3);
@@ -1989,16 +2007,16 @@ ___
 	&add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
 		$a_2,$a_2);		# mul_add_c(a[2],b[2],c2,c3,c1);
 $code.=<<___;
-	mflo	$t_1
-	mfhi	$t_2
 	$ADDU	$c_2,$t_1
 	sltu	$at,$c_2,$t_1
-	 $MULTU	$a_0,$a_5		# mul_add_c2(a[0],b[5],c3,c1,c2);
+	 $MULTU	($a_0,$a_5)		# mul_add_c2(a[0],b[5],c3,c1,c2);
 	$ADDU	$t_2,$at
 	$ADDU	$c_3,$t_2
 	sltu	$at,$c_3,$t_2
 	$ADDU	$c_1,$at
 	$ST	$c_2,4*$BNSZ($a0)
+	mflo	($t_1,$a_0,$a_5)
+	mfhi	($t_2,$a_0,$a_5)
 ___
 	&add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
 		$a_1,$a_4);		# mul_add_c2(a[1],b[4],c3,c1,c2);
@@ -2016,16 +2034,16 @@ ___
 	&add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
 		$a_3,$a_3);		# mul_add_c(a[3],b[3],c1,c2,c3);
 $code.=<<___;
-	mflo	$t_1
-	mfhi	$t_2
 	$ADDU	$c_1,$t_1
 	sltu	$at,$c_1,$t_1
-	 $MULTU	$a_0,$a_7		# mul_add_c2(a[0],b[7],c2,c3,c1);
+	 $MULTU	($a_0,$a_7)		# mul_add_c2(a[0],b[7],c2,c3,c1);
 	$ADDU	$t_2,$at
 	$ADDU	$c_2,$t_2
 	sltu	$at,$c_2,$t_2
 	$ADDU	$c_3,$at
 	$ST	$c_1,6*$BNSZ($a0)
+	mflo	($t_1,$a_0,$a_7)
+	mfhi	($t_2,$a_0,$a_7)
 ___
 	&add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
 		$a_1,$a_6);		# mul_add_c2(a[1],b[6],c2,c3,c1);
@@ -2045,16 +2063,16 @@ ___
 	&add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
 		$a_4,$a_4);		# mul_add_c(a[4],b[4],c3,c1,c2);
 $code.=<<___;
-	mflo	$t_1
-	mfhi	$t_2
 	$ADDU	$c_3,$t_1
 	sltu	$at,$c_3,$t_1
-	 $MULTU	$a_2,$a_7		# mul_add_c2(a[2],b[7],c1,c2,c3);
+	 $MULTU	($a_2,$a_7)		# mul_add_c2(a[2],b[7],c1,c2,c3);
 	$ADDU	$t_2,$at
 	$ADDU	$c_1,$t_2
 	sltu	$at,$c_1,$t_2
 	$ADDU	$c_2,$at
 	$ST	$c_3,8*$BNSZ($a0)
+	mflo	($t_1,$a_2,$a_7)
+	mfhi	($t_2,$a_2,$a_7)
 ___
 	&add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
 		$a_3,$a_6);		# mul_add_c2(a[3],b[6],c1,c2,c3);
@@ -2070,16 +2088,16 @@ ___
 	&add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
 		$a_5,$a_5);		# mul_add_c(a[5],b[5],c2,c3,c1);
 $code.=<<___;
-	mflo	$t_1
-	mfhi	$t_2
 	$ADDU	$c_2,$t_1
 	sltu	$at,$c_2,$t_1
-	 $MULTU	$a_4,$a_7		# mul_add_c2(a[4],b[7],c3,c1,c2);
+	 $MULTU	($a_4,$a_7)		# mul_add_c2(a[4],b[7],c3,c1,c2);
 	$ADDU	$t_2,$at
 	$ADDU	$c_3,$t_2
 	sltu	$at,$c_3,$t_2
 	$ADDU	$c_1,$at
 	$ST	$c_2,10*$BNSZ($a0)
+	mflo	($t_1,$a_4,$a_7)
+	mfhi	($t_2,$a_4,$a_7)
 ___
 	&add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
 		$a_5,$a_6);		# mul_add_c2(a[5],b[6],c3,c1,c2);
@@ -2091,24 +2109,22 @@ ___
 	&add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
 		$a_6,$a_6);		# mul_add_c(a[6],b[6],c1,c2,c3);
 $code.=<<___;
-	mflo	$t_1
-	mfhi	$t_2
 	$ADDU	$c_1,$t_1
 	sltu	$at,$c_1,$t_1
-	 $MULTU	$a_6,$a_7		# mul_add_c2(a[6],b[7],c2,c3,c1);
+	 $MULTU	($a_6,$a_7)		# mul_add_c2(a[6],b[7],c2,c3,c1);
 	$ADDU	$t_2,$at
 	$ADDU	$c_2,$t_2
 	sltu	$at,$c_2,$t_2
 	$ADDU	$c_3,$at
 	$ST	$c_1,12*$BNSZ($a0)
+	mflo	($t_1,$a_6,$a_7)
+	mfhi	($t_2,$a_6,$a_7)
 ___
 	&add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
 		$a_7,$a_7);		# mul_add_c(a[7],b[7],c3,c1,c2);
 $code.=<<___;
 	$ST	$c_2,13*$BNSZ($a0)
 
-	mflo	$t_1
-	mfhi	$t_2
 	$ADDU	$c_3,$t_1
 	sltu	$at,$c_3,$t_1
 	$ADDU	$t_2,$at
@@ -2152,19 +2168,19 @@ $code.=<<___;
 	.set	reorder
 	$LD	$a_0,0($a1)
 	$LD	$a_1,$BNSZ($a1)
-	$MULTU	$a_0,$a_0		# mul_add_c(a[0],b[0],c1,c2,c3);
+	$MULTU	($a_0,$a_0)		# mul_add_c(a[0],b[0],c1,c2,c3);
 	$LD	$a_2,2*$BNSZ($a1)
 	$LD	$a_3,3*$BNSZ($a1)
-	mflo	$c_1
-	mfhi	$c_2
+	mflo	($c_1,$a_0,$a_0)
+	mfhi	($c_2,$a_0,$a_0)
 	$ST	$c_1,0($a0)
 
-	$MULTU	$a_0,$a_1		# mul_add_c2(a[0],b[1],c2,c3,c1);
-	mflo	$t_1
-	mfhi	$t_2
+	$MULTU	($a_0,$a_1)		# mul_add_c2(a[0],b[1],c2,c3,c1);
+	mflo	($t_1,$a_0,$a_1)
+	mfhi	($t_2,$a_0,$a_1)
 	slt	$c_1,$t_2,$zero
 	$SLL	$t_2,1
-	 $MULTU	$a_2,$a_0		# mul_add_c2(a[2],b[0],c3,c1,c2);
+	 $MULTU	($a_2,$a_0)		# mul_add_c2(a[2],b[0],c3,c1,c2);
 	slt	$a2,$t_1,$zero
 	$ADDU	$t_2,$a2
 	$SLL	$t_1,1
@@ -2172,20 +2188,22 @@ $code.=<<___;
 	sltu	$at,$c_2,$t_1
 	$ADDU	$c_3,$t_2,$at
 	$ST	$c_2,$BNSZ($a0)
+	mflo	($t_1,$a_2,$a_0)
+	mfhi	($t_2,$a_2,$a_0)
 ___
 	&add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
 		$a_1,$a_1);		# mul_add_c(a[1],b[1],c3,c1,c2);
 $code.=<<___;
-	mflo	$t_1
-	mfhi	$t_2
 	$ADDU	$c_3,$t_1
 	sltu	$at,$c_3,$t_1
-	 $MULTU	$a_0,$a_3		# mul_add_c2(a[0],b[3],c1,c2,c3);
+	 $MULTU	($a_0,$a_3)		# mul_add_c2(a[0],b[3],c1,c2,c3);
 	$ADDU	$t_2,$at
 	$ADDU	$c_1,$t_2
 	sltu	$at,$c_1,$t_2
 	$ADDU	$c_2,$at
 	$ST	$c_3,2*$BNSZ($a0)
+	mflo	($t_1,$a_0,$a_3)
+	mfhi	($t_2,$a_0,$a_3)
 ___
 	&add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
 		$a_1,$a_2);		# mul_add_c2(a2[1],b[2],c1,c2,c3);
@@ -2197,24 +2215,22 @@ ___
 	&add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
 		$a_2,$a_2);		# mul_add_c(a[2],b[2],c2,c3,c1);
 $code.=<<___;
-	mflo	$t_1
-	mfhi	$t_2
 	$ADDU	$c_2,$t_1
 	sltu	$at,$c_2,$t_1
-	 $MULTU	$a_2,$a_3		# mul_add_c2(a[2],b[3],c3,c1,c2);
+	 $MULTU	($a_2,$a_3)		# mul_add_c2(a[2],b[3],c3,c1,c2);
 	$ADDU	$t_2,$at
 	$ADDU	$c_3,$t_2
 	sltu	$at,$c_3,$t_2
 	$ADDU	$c_1,$at
 	$ST	$c_2,4*$BNSZ($a0)
+	mflo	($t_1,$a_2,$a_3)
+	mfhi	($t_2,$a_2,$a_3)
 ___
 	&add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
 		$a_3,$a_3);		# mul_add_c(a[3],b[3],c1,c2,c3);
 $code.=<<___;
 	$ST	$c_3,5*$BNSZ($a0)
 
-	mflo	$t_1
-	mfhi	$t_2
 	$ADDU	$c_1,$t_1
 	sltu	$at,$c_1,$t_1
 	$ADDU	$t_2,$at
diff --git a/deps/openssl/openssl/crypto/bn/asm/pa-risc2.s b/deps/openssl/openssl/crypto/bn/asm/pa-risc2.s
deleted file mode 100644
index 413eac7123..0000000000
--- a/deps/openssl/openssl/crypto/bn/asm/pa-risc2.s
+++ /dev/null
@@ -1,1624 +0,0 @@
-; Copyright 1998-2016 The OpenSSL Project Authors. All Rights Reserved.
-;
-; Licensed under the OpenSSL license (the "License").  You may not use
-; this file except in compliance with the License.  You can obtain a copy
-; in the file LICENSE in the source distribution or at
-; https://www.openssl.org/source/license.html
-;
-; PA-RISC 2.0 implementation of bn_asm code, based on the
-; 64-bit version of the code.  This code is effectively the
-; same as the 64-bit version except the register model is
-; slightly different given all values must be 32-bit between
-; function calls.  Thus the 64-bit return values are returned
-; in %ret0 and %ret1 vs just %ret0 as is done in 64-bit
-;
-;
-; This code is approximately 2x faster than the C version
-; for RSA/DSA.
-;
-; See http://devresource.hp.com/  for more details on the PA-RISC
-; architecture.  Also see the book "PA-RISC 2.0 Architecture"
-; by Gerry Kane for information on the instruction set architecture.
-;
-; Code written by Chris Ruemmler (with some help from the HP C
-; compiler).
-;
-; The code compiles with HP's assembler
-;
-
-	.level	2.0N
-	.space	$TEXT$
-	.subspa	$CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY
-
-;
-; Global Register definitions used for the routines.
-;
-; Some information about HP's runtime architecture for 32-bits.
-;
-; "Caller save" means the calling function must save the register
-; if it wants the register to be preserved.
-; "Callee save" means if a function uses the register, it must save
-; the value before using it.
-;
-; For the floating point registers 
-;
-;    "caller save" registers: fr4-fr11, fr22-fr31
-;    "callee save" registers: fr12-fr21
-;    "special" registers: fr0-fr3 (status and exception registers)
-;
-; For the integer registers
-;     value zero             :  r0
-;     "caller save" registers: r1,r19-r26
-;     "callee save" registers: r3-r18
-;     return register        :  r2  (rp)
-;     return values          ; r28,r29  (ret0,ret1)
-;     Stack pointer          ; r30  (sp) 
-;     millicode return ptr   ; r31  (also a caller save register)
-
-
-;
-; Arguments to the routines
-;
-r_ptr       .reg %r26
-a_ptr       .reg %r25
-b_ptr       .reg %r24
-num         .reg %r24
-n           .reg %r23
-
-;
-; Note that the "w" argument for bn_mul_add_words and bn_mul_words
-; is passed on the stack at a delta of -56 from the top of stack
-; as the routine is entered.
-;
-
-;
-; Globals used in some routines
-;
-
-top_overflow .reg %r23
-high_mask    .reg %r22    ; value 0xffffffff80000000L
-
-
-;------------------------------------------------------------------------------
-;
-; bn_mul_add_words
-;
-;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr, 
-;								int num, BN_ULONG w)
-;
-; arg0 = r_ptr
-; arg1 = a_ptr
-; arg3 = num
-; -56(sp) =  w
-;
-; Local register definitions
-;
-
-fm1          .reg %fr22
-fm           .reg %fr23
-ht_temp      .reg %fr24
-ht_temp_1    .reg %fr25
-lt_temp      .reg %fr26
-lt_temp_1    .reg %fr27
-fm1_1        .reg %fr28
-fm_1         .reg %fr29
-
-fw_h         .reg %fr7L
-fw_l         .reg %fr7R
-fw           .reg %fr7
-
-fht_0        .reg %fr8L
-flt_0        .reg %fr8R
-t_float_0    .reg %fr8
-
-fht_1        .reg %fr9L
-flt_1        .reg %fr9R
-t_float_1    .reg %fr9
-
-tmp_0        .reg %r31
-tmp_1        .reg %r21
-m_0          .reg %r20 
-m_1          .reg %r19 
-ht_0         .reg %r1  
-ht_1         .reg %r3
-lt_0         .reg %r4
-lt_1         .reg %r5
-m1_0         .reg %r6 
-m1_1         .reg %r7 
-rp_val       .reg %r8
-rp_val_1     .reg %r9
-
-bn_mul_add_words
-	.export	bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN
-	.proc
-	.callinfo frame=128
-    .entry
-	.align 64
-
-    STD     %r3,0(%sp)          ; save r3  
-    STD     %r4,8(%sp)          ; save r4  
-	NOP                         ; Needed to make the loop 16-byte aligned
-	NOP                         ; needed to make the loop 16-byte aligned
-
-    STD     %r5,16(%sp)         ; save r5  
-	NOP
-    STD     %r6,24(%sp)         ; save r6  
-    STD     %r7,32(%sp)         ; save r7  
-
-    STD     %r8,40(%sp)         ; save r8  
-    STD     %r9,48(%sp)         ; save r9  
-    COPY    %r0,%ret1           ; return 0 by default
-    DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32    
-
-    CMPIB,>= 0,num,bn_mul_add_words_exit  ; if (num <= 0) then exit
-	LDO     128(%sp),%sp        ; bump stack
-
-	;
-	; The loop is unrolled twice, so if there is only 1 number
-    ; then go straight to the cleanup code.
-	;
-	CMPIB,= 1,num,bn_mul_add_words_single_top
-	FLDD    -184(%sp),fw        ; (-56-128) load up w into fw (fw_h/fw_l)
-
-	;
-	; This loop is unrolled 2 times (64-byte aligned as well)
-	;
-	; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
-    ; two 32-bit mutiplies can be issued per cycle.
-    ; 
-bn_mul_add_words_unroll2
-
-    FLDD    0(a_ptr),t_float_0       ; load up 64-bit value (fr8L) ht(L)/lt(R)
-    FLDD    8(a_ptr),t_float_1       ; load up 64-bit value (fr8L) ht(L)/lt(R)
-    LDD     0(r_ptr),rp_val          ; rp[0]
-    LDD     8(r_ptr),rp_val_1        ; rp[1]
-
-    XMPYU   fht_0,fw_l,fm1           ; m1[0] = fht_0*fw_l
-    XMPYU   fht_1,fw_l,fm1_1         ; m1[1] = fht_1*fw_l
-    FSTD    fm1,-16(%sp)             ; -16(sp) = m1[0]
-    FSTD    fm1_1,-48(%sp)           ; -48(sp) = m1[1]
-
-    XMPYU   flt_0,fw_h,fm            ; m[0] = flt_0*fw_h
-    XMPYU   flt_1,fw_h,fm_1          ; m[1] = flt_1*fw_h
-    FSTD    fm,-8(%sp)               ; -8(sp) = m[0]
-    FSTD    fm_1,-40(%sp)            ; -40(sp) = m[1]
-
-    XMPYU   fht_0,fw_h,ht_temp       ; ht_temp   = fht_0*fw_h
-    XMPYU   fht_1,fw_h,ht_temp_1     ; ht_temp_1 = fht_1*fw_h
-    FSTD    ht_temp,-24(%sp)         ; -24(sp)   = ht_temp
-    FSTD    ht_temp_1,-56(%sp)       ; -56(sp)   = ht_temp_1
-
-    XMPYU   flt_0,fw_l,lt_temp       ; lt_temp = lt*fw_l
-    XMPYU   flt_1,fw_l,lt_temp_1     ; lt_temp = lt*fw_l
-    FSTD    lt_temp,-32(%sp)         ; -32(sp) = lt_temp 
-    FSTD    lt_temp_1,-64(%sp)       ; -64(sp) = lt_temp_1 
-
-    LDD     -8(%sp),m_0              ; m[0] 
-    LDD     -40(%sp),m_1             ; m[1]
-    LDD     -16(%sp),m1_0            ; m1[0]
-    LDD     -48(%sp),m1_1            ; m1[1]
-
-    LDD     -24(%sp),ht_0            ; ht[0]
-    LDD     -56(%sp),ht_1            ; ht[1]
-    ADD,L   m1_0,m_0,tmp_0           ; tmp_0 = m[0] + m1[0]; 
-    ADD,L   m1_1,m_1,tmp_1           ; tmp_1 = m[1] + m1[1]; 
-
-    LDD     -32(%sp),lt_0            
-    LDD     -64(%sp),lt_1            
-    CMPCLR,*>>= tmp_0,m1_0, %r0      ; if (m[0] < m1[0])
-    ADD,L   ht_0,top_overflow,ht_0   ; ht[0] += (1<<32)
-
-    CMPCLR,*>>= tmp_1,m1_1,%r0       ; if (m[1] < m1[1])
-    ADD,L   ht_1,top_overflow,ht_1   ; ht[1] += (1<<32)
-    EXTRD,U tmp_0,31,32,m_0          ; m[0]>>32  
-    DEPD,Z  tmp_0,31,32,m1_0         ; m1[0] = m[0]<<32 
-
-    EXTRD,U tmp_1,31,32,m_1          ; m[1]>>32  
-    DEPD,Z  tmp_1,31,32,m1_1         ; m1[1] = m[1]<<32 
-    ADD,L   ht_0,m_0,ht_0            ; ht[0]+= (m[0]>>32)
-    ADD,L   ht_1,m_1,ht_1            ; ht[1]+= (m[1]>>32)
-
-    ADD     lt_0,m1_0,lt_0           ; lt[0] = lt[0]+m1[0];
-	ADD,DC  ht_0,%r0,ht_0            ; ht[0]++
-    ADD     lt_1,m1_1,lt_1           ; lt[1] = lt[1]+m1[1];
-    ADD,DC  ht_1,%r0,ht_1            ; ht[1]++
-
-    ADD    %ret1,lt_0,lt_0           ; lt[0] = lt[0] + c;
-	ADD,DC  ht_0,%r0,ht_0            ; ht[0]++
-    ADD     lt_0,rp_val,lt_0         ; lt[0] = lt[0]+rp[0]
-    ADD,DC  ht_0,%r0,ht_0            ; ht[0]++
-
-	LDO    -2(num),num               ; num = num - 2;
-    ADD     ht_0,lt_1,lt_1           ; lt[1] = lt[1] + ht_0 (c);
-    ADD,DC  ht_1,%r0,ht_1            ; ht[1]++
-    STD     lt_0,0(r_ptr)            ; rp[0] = lt[0]
-
-    ADD     lt_1,rp_val_1,lt_1       ; lt[1] = lt[1]+rp[1]
-    ADD,DC  ht_1,%r0,%ret1           ; ht[1]++
-    LDO     16(a_ptr),a_ptr          ; a_ptr += 2
-
-    STD     lt_1,8(r_ptr)            ; rp[1] = lt[1]
-	CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do
-    LDO     16(r_ptr),r_ptr          ; r_ptr += 2
-
-    CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one
-
-	;
-	; Top of loop aligned on 64-byte boundary
-	;
-bn_mul_add_words_single_top
-    FLDD    0(a_ptr),t_float_0        ; load up 64-bit value (fr8L) ht(L)/lt(R)
-    LDD     0(r_ptr),rp_val           ; rp[0]
-    LDO     8(a_ptr),a_ptr            ; a_ptr++
-    XMPYU   fht_0,fw_l,fm1            ; m1 = ht*fw_l
-    FSTD    fm1,-16(%sp)              ; -16(sp) = m1
-    XMPYU   flt_0,fw_h,fm             ; m = lt*fw_h
-    FSTD    fm,-8(%sp)                ; -8(sp) = m
-    XMPYU   fht_0,fw_h,ht_temp        ; ht_temp = ht*fw_h
-    FSTD    ht_temp,-24(%sp)          ; -24(sp) = ht
-    XMPYU   flt_0,fw_l,lt_temp        ; lt_temp = lt*fw_l
-    FSTD    lt_temp,-32(%sp)          ; -32(sp) = lt 
-
-    LDD     -8(%sp),m_0               
-    LDD    -16(%sp),m1_0              ; m1 = temp1 
-    ADD,L   m_0,m1_0,tmp_0            ; tmp_0 = m + m1; 
-    LDD     -24(%sp),ht_0             
-    LDD     -32(%sp),lt_0             
-
-    CMPCLR,*>>= tmp_0,m1_0,%r0        ; if (m < m1)
-    ADD,L   ht_0,top_overflow,ht_0    ; ht += (1<<32)
-
-    EXTRD,U tmp_0,31,32,m_0           ; m>>32  
-    DEPD,Z  tmp_0,31,32,m1_0          ; m1 = m<<32 
-
-    ADD,L   ht_0,m_0,ht_0             ; ht+= (m>>32)
-    ADD     lt_0,m1_0,tmp_0           ; tmp_0 = lt+m1;
-    ADD,DC  ht_0,%r0,ht_0             ; ht++
-    ADD     %ret1,tmp_0,lt_0          ; lt = lt + c;
-    ADD,DC  ht_0,%r0,ht_0             ; ht++
-    ADD     lt_0,rp_val,lt_0          ; lt = lt+rp[0]
-    ADD,DC  ht_0,%r0,%ret1            ; ht++
-    STD     lt_0,0(r_ptr)             ; rp[0] = lt
-
-bn_mul_add_words_exit
-    .EXIT
-	
-    EXTRD,U %ret1,31,32,%ret0         ; for 32-bit, return in ret0/ret1
-    LDD     -80(%sp),%r9              ; restore r9  
-    LDD     -88(%sp),%r8              ; restore r8  
-    LDD     -96(%sp),%r7              ; restore r7  
-    LDD     -104(%sp),%r6             ; restore r6  
-    LDD     -112(%sp),%r5             ; restore r5  
-    LDD     -120(%sp),%r4             ; restore r4  
-    BVE     (%rp)
-    LDD,MB  -128(%sp),%r3             ; restore r3
-	.PROCEND	;in=23,24,25,26,29;out=28;
-
-;----------------------------------------------------------------------------
-;
-;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
-;
-; arg0 = rp
-; arg1 = ap
-; arg3 = num
-; w on stack at -56(sp)
-
-bn_mul_words
-	.proc
-	.callinfo frame=128
-    .entry
-	.EXPORT	bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
-	.align 64
-
-    STD     %r3,0(%sp)          ; save r3  
-    STD     %r4,8(%sp)          ; save r4  
-	NOP
-    STD     %r5,16(%sp)         ; save r5  
-
-    STD     %r6,24(%sp)         ; save r6  
-    STD     %r7,32(%sp)         ; save r7  
-    COPY    %r0,%ret1           ; return 0 by default
-    DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32    
-
-    CMPIB,>= 0,num,bn_mul_words_exit
-	LDO     128(%sp),%sp    ; bump stack
-
-	;
-	; See if only 1 word to do, thus just do cleanup
-	;
-	CMPIB,= 1,num,bn_mul_words_single_top
-	FLDD    -184(%sp),fw        ; (-56-128) load up w into fw (fw_h/fw_l)
-
-	;
-	; This loop is unrolled 2 times (64-byte aligned as well)
-	;
-	; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
-    ; two 32-bit mutiplies can be issued per cycle.
-    ; 
-bn_mul_words_unroll2
-
-    FLDD    0(a_ptr),t_float_0        ; load up 64-bit value (fr8L) ht(L)/lt(R)
-    FLDD    8(a_ptr),t_float_1        ; load up 64-bit value (fr8L) ht(L)/lt(R)
-    XMPYU   fht_0,fw_l,fm1            ; m1[0] = fht_0*fw_l
-    XMPYU   fht_1,fw_l,fm1_1          ; m1[1] = ht*fw_l
-
-    FSTD    fm1,-16(%sp)              ; -16(sp) = m1
-    FSTD    fm1_1,-48(%sp)            ; -48(sp) = m1
-    XMPYU   flt_0,fw_h,fm             ; m = lt*fw_h
-    XMPYU   flt_1,fw_h,fm_1           ; m = lt*fw_h
-
-    FSTD    fm,-8(%sp)                ; -8(sp) = m
-    FSTD    fm_1,-40(%sp)             ; -40(sp) = m
-    XMPYU   fht_0,fw_h,ht_temp        ; ht_temp = fht_0*fw_h
-    XMPYU   fht_1,fw_h,ht_temp_1      ; ht_temp = ht*fw_h
-
-    FSTD    ht_temp,-24(%sp)          ; -24(sp) = ht
-    FSTD    ht_temp_1,-56(%sp)        ; -56(sp) = ht
-    XMPYU   flt_0,fw_l,lt_temp        ; lt_temp = lt*fw_l
-    XMPYU   flt_1,fw_l,lt_temp_1      ; lt_temp = lt*fw_l
-
-    FSTD    lt_temp,-32(%sp)          ; -32(sp) = lt 
-    FSTD    lt_temp_1,-64(%sp)        ; -64(sp) = lt 
-    LDD     -8(%sp),m_0               
-    LDD     -40(%sp),m_1              
-
-    LDD    -16(%sp),m1_0              
-    LDD    -48(%sp),m1_1              
-    LDD     -24(%sp),ht_0             
-    LDD     -56(%sp),ht_1             
-
-    ADD,L   m1_0,m_0,tmp_0            ; tmp_0 = m + m1; 
-    ADD,L   m1_1,m_1,tmp_1            ; tmp_1 = m + m1; 
-    LDD     -32(%sp),lt_0             
-    LDD     -64(%sp),lt_1             
-
-    CMPCLR,*>>= tmp_0,m1_0, %r0       ; if (m < m1)
-    ADD,L   ht_0,top_overflow,ht_0    ; ht += (1<<32)
-    CMPCLR,*>>= tmp_1,m1_1,%r0        ; if (m < m1)
-    ADD,L   ht_1,top_overflow,ht_1    ; ht += (1<<32)
-
-    EXTRD,U tmp_0,31,32,m_0           ; m>>32  
-    DEPD,Z  tmp_0,31,32,m1_0          ; m1 = m<<32 
-    EXTRD,U tmp_1,31,32,m_1           ; m>>32  
-    DEPD,Z  tmp_1,31,32,m1_1          ; m1 = m<<32 
-
-    ADD,L   ht_0,m_0,ht_0             ; ht+= (m>>32)
-    ADD,L   ht_1,m_1,ht_1             ; ht+= (m>>32)
-    ADD     lt_0,m1_0,lt_0            ; lt = lt+m1;
-	ADD,DC  ht_0,%r0,ht_0             ; ht++
-
-    ADD     lt_1,m1_1,lt_1            ; lt = lt+m1;
-    ADD,DC  ht_1,%r0,ht_1             ; ht++
-    ADD    %ret1,lt_0,lt_0            ; lt = lt + c (ret1);
-	ADD,DC  ht_0,%r0,ht_0             ; ht++
-
-    ADD     ht_0,lt_1,lt_1            ; lt = lt + c (ht_0)
-    ADD,DC  ht_1,%r0,ht_1             ; ht++
-    STD     lt_0,0(r_ptr)             ; rp[0] = lt
-    STD     lt_1,8(r_ptr)             ; rp[1] = lt
-
-	COPY    ht_1,%ret1                ; carry = ht
-	LDO    -2(num),num                ; num = num - 2;
-    LDO     16(a_ptr),a_ptr           ; ap += 2
-	CMPIB,<= 2,num,bn_mul_words_unroll2
-    LDO     16(r_ptr),r_ptr           ; rp++
-
-    CMPIB,=,N 0,num,bn_mul_words_exit ; are we done?
-
-	;
-	; Top of loop aligned on 64-byte boundary
-	;
-bn_mul_words_single_top
-    FLDD    0(a_ptr),t_float_0        ; load up 64-bit value (fr8L) ht(L)/lt(R)
-
-    XMPYU   fht_0,fw_l,fm1            ; m1 = ht*fw_l
-    FSTD    fm1,-16(%sp)              ; -16(sp) = m1
-    XMPYU   flt_0,fw_h,fm             ; m = lt*fw_h
-    FSTD    fm,-8(%sp)                ; -8(sp) = m
-    XMPYU   fht_0,fw_h,ht_temp        ; ht_temp = ht*fw_h
-    FSTD    ht_temp,-24(%sp)          ; -24(sp) = ht
-    XMPYU   flt_0,fw_l,lt_temp        ; lt_temp = lt*fw_l
-    FSTD    lt_temp,-32(%sp)          ; -32(sp) = lt 
-
-    LDD     -8(%sp),m_0               
-    LDD    -16(%sp),m1_0              
-    ADD,L   m_0,m1_0,tmp_0            ; tmp_0 = m + m1; 
-    LDD     -24(%sp),ht_0             
-    LDD     -32(%sp),lt_0             
-
-    CMPCLR,*>>= tmp_0,m1_0,%r0        ; if (m < m1)
-    ADD,L   ht_0,top_overflow,ht_0    ; ht += (1<<32)
-
-    EXTRD,U tmp_0,31,32,m_0           ; m>>32  
-    DEPD,Z  tmp_0,31,32,m1_0          ; m1 = m<<32 
-
-    ADD,L   ht_0,m_0,ht_0             ; ht+= (m>>32)
-    ADD     lt_0,m1_0,lt_0            ; lt= lt+m1;
-    ADD,DC  ht_0,%r0,ht_0             ; ht++
-
-    ADD     %ret1,lt_0,lt_0           ; lt = lt + c;
-    ADD,DC  ht_0,%r0,ht_0             ; ht++
-
-    COPY    ht_0,%ret1                ; copy carry
-    STD     lt_0,0(r_ptr)             ; rp[0] = lt
-
-bn_mul_words_exit
-    .EXIT
-    EXTRD,U %ret1,31,32,%ret0           ; for 32-bit, return in ret0/ret1
-    LDD     -96(%sp),%r7              ; restore r7  
-    LDD     -104(%sp),%r6             ; restore r6  
-    LDD     -112(%sp),%r5             ; restore r5  
-    LDD     -120(%sp),%r4             ; restore r4  
-    BVE     (%rp)
-    LDD,MB  -128(%sp),%r3             ; restore r3
-	.PROCEND	
-
-;----------------------------------------------------------------------------
-;
-;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num)
-;
-; arg0 = rp
-; arg1 = ap
-; arg2 = num
-;
-
-bn_sqr_words
-	.proc
-	.callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
-	.EXPORT	bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
-    .entry
-	.align 64
-
-    STD     %r3,0(%sp)          ; save r3  
-    STD     %r4,8(%sp)          ; save r4  
-	NOP
-    STD     %r5,16(%sp)         ; save r5  
-
-    CMPIB,>= 0,num,bn_sqr_words_exit
-	LDO     128(%sp),%sp       ; bump stack
-
-	;
-	; If only 1, the goto straight to cleanup
-	;
-	CMPIB,= 1,num,bn_sqr_words_single_top
-    DEPDI,Z -1,32,33,high_mask   ; Create Mask 0xffffffff80000000L
-
-	;
-	; This loop is unrolled 2 times (64-byte aligned as well)
-	;
-
-bn_sqr_words_unroll2
-    FLDD    0(a_ptr),t_float_0        ; a[0]
-    FLDD    8(a_ptr),t_float_1        ; a[1]
-    XMPYU   fht_0,flt_0,fm            ; m[0]
-    XMPYU   fht_1,flt_1,fm_1          ; m[1]
-
-    FSTD    fm,-24(%sp)               ; store m[0]
-    FSTD    fm_1,-56(%sp)             ; store m[1]
-    XMPYU   flt_0,flt_0,lt_temp       ; lt[0]
-    XMPYU   flt_1,flt_1,lt_temp_1     ; lt[1]
-
-    FSTD    lt_temp,-16(%sp)          ; store lt[0]
-    FSTD    lt_temp_1,-48(%sp)        ; store lt[1]
-    XMPYU   fht_0,fht_0,ht_temp       ; ht[0]
-    XMPYU   fht_1,fht_1,ht_temp_1     ; ht[1]
-
-    FSTD    ht_temp,-8(%sp)           ; store ht[0]
-    FSTD    ht_temp_1,-40(%sp)        ; store ht[1]
-    LDD     -24(%sp),m_0             
-    LDD     -56(%sp),m_1              
-
-    AND     m_0,high_mask,tmp_0       ; m[0] & Mask
-    AND     m_1,high_mask,tmp_1       ; m[1] & Mask
-    DEPD,Z  m_0,30,31,m_0             ; m[0] << 32+1
-    DEPD,Z  m_1,30,31,m_1             ; m[1] << 32+1
-
-    LDD     -16(%sp),lt_0        
-    LDD     -48(%sp),lt_1        
-    EXTRD,U tmp_0,32,33,tmp_0         ; tmp_0 = m[0]&Mask >> 32-1
-    EXTRD,U tmp_1,32,33,tmp_1         ; tmp_1 = m[1]&Mask >> 32-1
-
-    LDD     -8(%sp),ht_0            
-    LDD     -40(%sp),ht_1           
-    ADD,L   ht_0,tmp_0,ht_0           ; ht[0] += tmp_0
-    ADD,L   ht_1,tmp_1,ht_1           ; ht[1] += tmp_1
-
-    ADD     lt_0,m_0,lt_0             ; lt = lt+m
-    ADD,DC  ht_0,%r0,ht_0             ; ht[0]++
-    STD     lt_0,0(r_ptr)             ; rp[0] = lt[0]
-    STD     ht_0,8(r_ptr)             ; rp[1] = ht[1]
-
-    ADD     lt_1,m_1,lt_1             ; lt = lt+m
-    ADD,DC  ht_1,%r0,ht_1             ; ht[1]++
-    STD     lt_1,16(r_ptr)            ; rp[2] = lt[1]
-    STD     ht_1,24(r_ptr)            ; rp[3] = ht[1]
-
-	LDO    -2(num),num                ; num = num - 2;
-    LDO     16(a_ptr),a_ptr           ; ap += 2
-	CMPIB,<= 2,num,bn_sqr_words_unroll2
-    LDO     32(r_ptr),r_ptr           ; rp += 4
-
-    CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done?
-
-	;
-	; Top of loop aligned on 64-byte boundary
-	;
-bn_sqr_words_single_top
-    FLDD    0(a_ptr),t_float_0        ; load up 64-bit value (fr8L) ht(L)/lt(R)
-
-    XMPYU   fht_0,flt_0,fm            ; m
-    FSTD    fm,-24(%sp)               ; store m
-
-    XMPYU   flt_0,flt_0,lt_temp       ; lt
-    FSTD    lt_temp,-16(%sp)          ; store lt
-
-    XMPYU   fht_0,fht_0,ht_temp       ; ht
-    FSTD    ht_temp,-8(%sp)           ; store ht
-
-    LDD     -24(%sp),m_0              ; load m
-    AND     m_0,high_mask,tmp_0       ; m & Mask
-    DEPD,Z  m_0,30,31,m_0             ; m << 32+1
-    LDD     -16(%sp),lt_0             ; lt
-
-    LDD     -8(%sp),ht_0              ; ht
-    EXTRD,U tmp_0,32,33,tmp_0         ; tmp_0 = m&Mask >> 32-1
-    ADD     m_0,lt_0,lt_0             ; lt = lt+m
-    ADD,L   ht_0,tmp_0,ht_0           ; ht += tmp_0
-    ADD,DC  ht_0,%r0,ht_0             ; ht++
-
-    STD     lt_0,0(r_ptr)             ; rp[0] = lt
-    STD     ht_0,8(r_ptr)             ; rp[1] = ht
-
-bn_sqr_words_exit
-    .EXIT
-    LDD     -112(%sp),%r5       ; restore r5  
-    LDD     -120(%sp),%r4       ; restore r4  
-    BVE     (%rp)
-    LDD,MB  -128(%sp),%r3 
-	.PROCEND	;in=23,24,25,26,29;out=28;
-
-
-;----------------------------------------------------------------------------
-;
-;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
-;
-; arg0 = rp 
-; arg1 = ap
-; arg2 = bp 
-; arg3 = n
-
-t  .reg %r22
-b  .reg %r21
-l  .reg %r20
-
-bn_add_words
-	.proc
-    .entry
-	.callinfo
-	.EXPORT	bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
-	.align 64
-
-    CMPIB,>= 0,n,bn_add_words_exit
-    COPY    %r0,%ret1           ; return 0 by default
-
-	;
-	; If 2 or more numbers do the loop
-	;
-	CMPIB,= 1,n,bn_add_words_single_top
-	NOP
-
-	;
-	; This loop is unrolled 2 times (64-byte aligned as well)
-	;
-bn_add_words_unroll2
-	LDD     0(a_ptr),t
-	LDD     0(b_ptr),b
-	ADD     t,%ret1,t                    ; t = t+c;
-	ADD,DC  %r0,%r0,%ret1                ; set c to carry
-	ADD     t,b,l                        ; l = t + b[0]
-	ADD,DC  %ret1,%r0,%ret1              ; c+= carry
-	STD     l,0(r_ptr)
-
-	LDD     8(a_ptr),t
-	LDD     8(b_ptr),b
-	ADD     t,%ret1,t                     ; t = t+c;
-	ADD,DC  %r0,%r0,%ret1                 ; set c to carry
-	ADD     t,b,l                         ; l = t + b[0]
-	ADD,DC  %ret1,%r0,%ret1               ; c+= carry
-	STD     l,8(r_ptr)
-
-	LDO     -2(n),n
-	LDO     16(a_ptr),a_ptr
-	LDO     16(b_ptr),b_ptr
-
-	CMPIB,<= 2,n,bn_add_words_unroll2
-	LDO     16(r_ptr),r_ptr
-
-    CMPIB,=,N 0,n,bn_add_words_exit ; are we done?
-
-bn_add_words_single_top
-	LDD     0(a_ptr),t
-	LDD     0(b_ptr),b
-
-	ADD     t,%ret1,t                 ; t = t+c;
-	ADD,DC  %r0,%r0,%ret1             ; set c to carry (could use CMPCLR??)
-	ADD     t,b,l                     ; l = t + b[0]
-	ADD,DC  %ret1,%r0,%ret1           ; c+= carry
-	STD     l,0(r_ptr)
-
-bn_add_words_exit
-    .EXIT
-    BVE     (%rp)
-    EXTRD,U %ret1,31,32,%ret0           ; for 32-bit, return in ret0/ret1
-	.PROCEND	;in=23,24,25,26,29;out=28;
-
-;----------------------------------------------------------------------------
-;
-;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
-;
-; arg0 = rp 
-; arg1 = ap
-; arg2 = bp 
-; arg3 = n
-
-t1       .reg %r22
-t2       .reg %r21
-sub_tmp1 .reg %r20
-sub_tmp2 .reg %r19
-
-
-bn_sub_words
-	.proc
-	.callinfo 
-	.EXPORT	bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
-    .entry
-	.align 64
-
-    CMPIB,>=  0,n,bn_sub_words_exit
-    COPY    %r0,%ret1           ; return 0 by default
-
-	;
-	; If 2 or more numbers do the loop
-	;
-	CMPIB,= 1,n,bn_sub_words_single_top
-	NOP
-
-	;
-	; This loop is unrolled 2 times (64-byte aligned as well)
-	;
-bn_sub_words_unroll2
-	LDD     0(a_ptr),t1
-	LDD     0(b_ptr),t2
-	SUB     t1,t2,sub_tmp1           ; t3 = t1-t2; 
-	SUB     sub_tmp1,%ret1,sub_tmp1  ; t3 = t3- c; 
-
-	CMPCLR,*>> t1,t2,sub_tmp2        ; clear if t1 > t2
-	LDO      1(%r0),sub_tmp2
-	
-	CMPCLR,*= t1,t2,%r0
-	COPY    sub_tmp2,%ret1
-	STD     sub_tmp1,0(r_ptr)
-
-	LDD     8(a_ptr),t1
-	LDD     8(b_ptr),t2
-	SUB     t1,t2,sub_tmp1            ; t3 = t1-t2; 
-	SUB     sub_tmp1,%ret1,sub_tmp1   ; t3 = t3- c; 
-	CMPCLR,*>> t1,t2,sub_tmp2         ; clear if t1 > t2
-	LDO      1(%r0),sub_tmp2
-	
-	CMPCLR,*= t1,t2,%r0
-	COPY    sub_tmp2,%ret1
-	STD     sub_tmp1,8(r_ptr)
-
-	LDO     -2(n),n
-	LDO     16(a_ptr),a_ptr
-	LDO     16(b_ptr),b_ptr
-
-	CMPIB,<= 2,n,bn_sub_words_unroll2
-	LDO     16(r_ptr),r_ptr
-
-    CMPIB,=,N 0,n,bn_sub_words_exit ; are we done?
-
-bn_sub_words_single_top
-	LDD     0(a_ptr),t1
-	LDD     0(b_ptr),t2
-	SUB     t1,t2,sub_tmp1            ; t3 = t1-t2; 
-	SUB     sub_tmp1,%ret1,sub_tmp1   ; t3 = t3- c; 
-	CMPCLR,*>> t1,t2,sub_tmp2         ; clear if t1 > t2
-	LDO      1(%r0),sub_tmp2
-	
-	CMPCLR,*= t1,t2,%r0
-	COPY    sub_tmp2,%ret1
-
-	STD     sub_tmp1,0(r_ptr)
-
-bn_sub_words_exit
-    .EXIT
-    BVE     (%rp)
-    EXTRD,U %ret1,31,32,%ret0           ; for 32-bit, return in ret0/ret1
-	.PROCEND	;in=23,24,25,26,29;out=28;
-
-;------------------------------------------------------------------------------
-;
-; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d)
-;
-; arg0 = h
-; arg1 = l
-; arg2 = d
-;
-; This is mainly just output from the HP C compiler.  
-;
-;------------------------------------------------------------------------------
-bn_div_words
-	.PROC
-	.EXPORT	bn_div_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR,LONG_RETURN
-	.IMPORT	BN_num_bits_word,CODE
-	;--- not PIC	.IMPORT	__iob,DATA
-	;--- not PIC	.IMPORT	fprintf,CODE
-	.IMPORT	abort,CODE
-	.IMPORT	$$div2U,MILLICODE
-	.CALLINFO CALLER,FRAME=144,ENTRY_GR=%r9,SAVE_RP,ARGS_SAVED,ORDERING_AWARE
-        .ENTRY
-        STW     %r2,-20(%r30)   ;offset 0x8ec
-        STW,MA  %r3,192(%r30)   ;offset 0x8f0
-        STW     %r4,-188(%r30)  ;offset 0x8f4
-        DEPD    %r5,31,32,%r6   ;offset 0x8f8
-        STD     %r6,-184(%r30)  ;offset 0x8fc
-        DEPD    %r7,31,32,%r8   ;offset 0x900
-        STD     %r8,-176(%r30)  ;offset 0x904
-        STW     %r9,-168(%r30)  ;offset 0x908
-        LDD     -248(%r30),%r3  ;offset 0x90c
-        COPY    %r26,%r4        ;offset 0x910
-        COPY    %r24,%r5        ;offset 0x914
-        DEPD    %r25,31,32,%r4  ;offset 0x918
-        CMPB,*<>        %r3,%r0,$0006000C       ;offset 0x91c
-        DEPD    %r23,31,32,%r5  ;offset 0x920
-        MOVIB,TR        -1,%r29,$00060002       ;offset 0x924
-        EXTRD,U %r29,31,32,%r28 ;offset 0x928
-$0006002A
-        LDO     -1(%r29),%r29   ;offset 0x92c
-        SUB     %r23,%r7,%r23   ;offset 0x930
-$00060024
-        SUB     %r4,%r31,%r25   ;offset 0x934
-        AND     %r25,%r19,%r26  ;offset 0x938
-        CMPB,*<>,N      %r0,%r26,$00060046      ;offset 0x93c
-        DEPD,Z  %r25,31,32,%r20 ;offset 0x940
-        OR      %r20,%r24,%r21  ;offset 0x944
-        CMPB,*<<,N      %r21,%r23,$0006002A     ;offset 0x948
-        SUB     %r31,%r2,%r31   ;offset 0x94c
-$00060046
-$0006002E
-        DEPD,Z  %r23,31,32,%r25 ;offset 0x950
-        EXTRD,U %r23,31,32,%r26 ;offset 0x954
-        AND     %r25,%r19,%r24  ;offset 0x958
-        ADD,L   %r31,%r26,%r31  ;offset 0x95c
-        CMPCLR,*>>=     %r5,%r24,%r0    ;offset 0x960
-        LDO     1(%r31),%r31    ;offset 0x964
-$00060032
-        CMPB,*<<=,N     %r31,%r4,$00060036      ;offset 0x968
-        LDO     -1(%r29),%r29   ;offset 0x96c
-        ADD,L   %r4,%r3,%r4     ;offset 0x970
-$00060036
-        ADDIB,=,N       -1,%r8,$D0      ;offset 0x974
-        SUB     %r5,%r24,%r28   ;offset 0x978
-$0006003A
-        SUB     %r4,%r31,%r24   ;offset 0x97c
-        SHRPD   %r24,%r28,32,%r4        ;offset 0x980
-        DEPD,Z  %r29,31,32,%r9  ;offset 0x984
-        DEPD,Z  %r28,31,32,%r5  ;offset 0x988
-$0006001C
-        EXTRD,U %r4,31,32,%r31  ;offset 0x98c
-        CMPB,*<>,N      %r31,%r2,$00060020      ;offset 0x990
-        MOVB,TR %r6,%r29,$D1    ;offset 0x994
-        STD     %r29,-152(%r30) ;offset 0x998
-$0006000C
-        EXTRD,U %r3,31,32,%r25  ;offset 0x99c
-        COPY    %r3,%r26        ;offset 0x9a0
-        EXTRD,U %r3,31,32,%r9   ;offset 0x9a4
-        EXTRD,U %r4,31,32,%r8   ;offset 0x9a8
-        .CALL   ARGW0=GR,ARGW1=GR,RTNVAL=GR     ;in=25,26;out=28;
-        B,L     BN_num_bits_word,%r2    ;offset 0x9ac
-        EXTRD,U %r5,31,32,%r7   ;offset 0x9b0
-        LDI     64,%r20 ;offset 0x9b4
-        DEPD    %r7,31,32,%r5   ;offset 0x9b8
-        DEPD    %r8,31,32,%r4   ;offset 0x9bc
-        DEPD    %r9,31,32,%r3   ;offset 0x9c0
-        CMPB,=  %r28,%r20,$00060012     ;offset 0x9c4
-        COPY    %r28,%r24       ;offset 0x9c8
-        MTSARCM %r24    ;offset 0x9cc
-        DEPDI,Z -1,%sar,1,%r19  ;offset 0x9d0
-        CMPB,*>>,N      %r4,%r19,$D2    ;offset 0x9d4
-$00060012
-        SUBI    64,%r24,%r31    ;offset 0x9d8
-        CMPCLR,*<<      %r4,%r3,%r0     ;offset 0x9dc
-        SUB     %r4,%r3,%r4     ;offset 0x9e0
-$00060016
-        CMPB,=  %r31,%r0,$0006001A      ;offset 0x9e4
-        COPY    %r0,%r9 ;offset 0x9e8
-        MTSARCM %r31    ;offset 0x9ec
-        DEPD,Z  %r3,%sar,64,%r3 ;offset 0x9f0
-        SUBI    64,%r31,%r26    ;offset 0x9f4
-        MTSAR   %r26    ;offset 0x9f8
-        SHRPD   %r4,%r5,%sar,%r4        ;offset 0x9fc
-        MTSARCM %r31    ;offset 0xa00
-        DEPD,Z  %r5,%sar,64,%r5 ;offset 0xa04
-$0006001A
-        DEPDI,Z -1,31,32,%r19   ;offset 0xa08
-        AND     %r3,%r19,%r29   ;offset 0xa0c
-        EXTRD,U %r29,31,32,%r2  ;offset 0xa10
-        DEPDI,Z -1,63,32,%r6    ;offset 0xa14
-        MOVIB,TR        2,%r8,$0006001C ;offset 0xa18
-        EXTRD,U %r3,63,32,%r7   ;offset 0xa1c
-$D2
-        ;--- not PIC	ADDIL   LR'__iob-$global$,%r27,%r1      ;offset 0xa20
-        ;--- not PIC	LDIL    LR'C$7,%r21     ;offset 0xa24
-        ;--- not PIC	LDO     RR'__iob-$global$+32(%r1),%r26  ;offset 0xa28
-        ;--- not PIC	.CALL   ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR    ;in=24,25,26;out=28;
-        ;--- not PIC	B,L     fprintf,%r2     ;offset 0xa2c
-        ;--- not PIC	LDO     RR'C$7(%r21),%r25       ;offset 0xa30
-        .CALL           ;
-        B,L     abort,%r2       ;offset 0xa34
-        NOP             ;offset 0xa38
-        B       $D3     ;offset 0xa3c
-        LDW     -212(%r30),%r2  ;offset 0xa40
-$00060020
-        COPY    %r4,%r26        ;offset 0xa44
-        EXTRD,U %r4,31,32,%r25  ;offset 0xa48
-        COPY    %r2,%r24        ;offset 0xa4c
-        .CALL   ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL)
-        B,L     $$div2U,%r31    ;offset 0xa50
-        EXTRD,U %r2,31,32,%r23  ;offset 0xa54
-        DEPD    %r28,31,32,%r29 ;offset 0xa58
-$00060022
-        STD     %r29,-152(%r30) ;offset 0xa5c
-$D1
-        AND     %r5,%r19,%r24   ;offset 0xa60
-        EXTRD,U %r24,31,32,%r24 ;offset 0xa64
-        STW     %r2,-160(%r30)  ;offset 0xa68
-        STW     %r7,-128(%r30)  ;offset 0xa6c
-        FLDD    -152(%r30),%fr4 ;offset 0xa70
-        FLDD    -152(%r30),%fr7 ;offset 0xa74
-        FLDW    -160(%r30),%fr8L        ;offset 0xa78
-        FLDW    -128(%r30),%fr5L        ;offset 0xa7c
-        XMPYU   %fr8L,%fr7L,%fr10       ;offset 0xa80
-        FSTD    %fr10,-136(%r30)        ;offset 0xa84
-        XMPYU   %fr8L,%fr7R,%fr22       ;offset 0xa88
-        FSTD    %fr22,-144(%r30)        ;offset 0xa8c
-        XMPYU   %fr5L,%fr4L,%fr11       ;offset 0xa90
-        XMPYU   %fr5L,%fr4R,%fr23       ;offset 0xa94
-        FSTD    %fr11,-112(%r30)        ;offset 0xa98
-        FSTD    %fr23,-120(%r30)        ;offset 0xa9c
-        LDD     -136(%r30),%r28 ;offset 0xaa0
-        DEPD,Z  %r28,31,32,%r31 ;offset 0xaa4
-        LDD     -144(%r30),%r20 ;offset 0xaa8
-        ADD,L   %r20,%r31,%r31  ;offset 0xaac
-        LDD     -112(%r30),%r22 ;offset 0xab0
-        DEPD,Z  %r22,31,32,%r22 ;offset 0xab4
-        LDD     -120(%r30),%r21 ;offset 0xab8
-        B       $00060024       ;offset 0xabc
-        ADD,L   %r21,%r22,%r23  ;offset 0xac0
-$D0
-        OR      %r9,%r29,%r29   ;offset 0xac4
-$00060040
-        EXTRD,U %r29,31,32,%r28 ;offset 0xac8
-$00060002
-$L2
-        LDW     -212(%r30),%r2  ;offset 0xacc
-$D3
-        LDW     -168(%r30),%r9  ;offset 0xad0
-        LDD     -176(%r30),%r8  ;offset 0xad4
-        EXTRD,U %r8,31,32,%r7   ;offset 0xad8
-        LDD     -184(%r30),%r6  ;offset 0xadc
-        EXTRD,U %r6,31,32,%r5   ;offset 0xae0
-        LDW     -188(%r30),%r4  ;offset 0xae4
-        BVE     (%r2)   ;offset 0xae8
-        .EXIT
-        LDW,MB  -192(%r30),%r3  ;offset 0xaec
-	.PROCEND	;in=23,25;out=28,29;fpin=105,107;
-
-
-
-
-;----------------------------------------------------------------------------
-;
-; Registers to hold 64-bit values to manipulate.  The "L" part
-; of the register corresponds to the upper 32-bits, while the "R"
-; part corresponds to the lower 32-bits
-; 
-; Note, that when using b6 and b7, the code must save these before
-; using them because they are callee save registers 
-; 
-;
-; Floating point registers to use to save values that
-; are manipulated.  These don't collide with ftemp1-6 and
-; are all caller save registers
-;
-a0        .reg %fr22
-a0L       .reg %fr22L
-a0R       .reg %fr22R
-
-a1        .reg %fr23
-a1L       .reg %fr23L
-a1R       .reg %fr23R
-
-a2        .reg %fr24
-a2L       .reg %fr24L
-a2R       .reg %fr24R
-
-a3        .reg %fr25
-a3L       .reg %fr25L
-a3R       .reg %fr25R
-
-a4        .reg %fr26
-a4L       .reg %fr26L
-a4R       .reg %fr26R
-
-a5        .reg %fr27
-a5L       .reg %fr27L
-a5R       .reg %fr27R
-
-a6        .reg %fr28
-a6L       .reg %fr28L
-a6R       .reg %fr28R
-
-a7        .reg %fr29
-a7L       .reg %fr29L
-a7R       .reg %fr29R
-
-b0        .reg %fr30
-b0L       .reg %fr30L
-b0R       .reg %fr30R
-
-b1        .reg %fr31
-b1L       .reg %fr31L
-b1R       .reg %fr31R
-
-;
-; Temporary floating point variables, these are all caller save
-; registers
-;
-ftemp1    .reg %fr4
-ftemp2    .reg %fr5
-ftemp3    .reg %fr6
-ftemp4    .reg %fr7
-
-;
-; The B set of registers when used.
-;
-
-b2        .reg %fr8
-b2L       .reg %fr8L
-b2R       .reg %fr8R
-
-b3        .reg %fr9
-b3L       .reg %fr9L
-b3R       .reg %fr9R
-
-b4        .reg %fr10
-b4L       .reg %fr10L
-b4R       .reg %fr10R
-
-b5        .reg %fr11
-b5L       .reg %fr11L
-b5R       .reg %fr11R
-
-b6        .reg %fr12
-b6L       .reg %fr12L
-b6R       .reg %fr12R
-
-b7        .reg %fr13
-b7L       .reg %fr13L
-b7R       .reg %fr13R
-
-c1           .reg %r21   ; only reg
-temp1        .reg %r20   ; only reg
-temp2        .reg %r19   ; only reg
-temp3        .reg %r31   ; only reg
-
-m1           .reg %r28   
-c2           .reg %r23   
-high_one     .reg %r1
-ht           .reg %r6
-lt           .reg %r5
-m            .reg %r4
-c3           .reg %r3
-
-SQR_ADD_C  .macro  A0L,A0R,C1,C2,C3
-    XMPYU   A0L,A0R,ftemp1       ; m
-    FSTD    ftemp1,-24(%sp)      ; store m
-
-    XMPYU   A0R,A0R,ftemp2       ; lt
-    FSTD    ftemp2,-16(%sp)      ; store lt
-
-    XMPYU   A0L,A0L,ftemp3       ; ht
-    FSTD    ftemp3,-8(%sp)       ; store ht
-
-    LDD     -24(%sp),m           ; load m
-    AND     m,high_mask,temp2    ; m & Mask
-    DEPD,Z  m,30,31,temp3        ; m << 32+1
-    LDD     -16(%sp),lt          ; lt
-
-    LDD     -8(%sp),ht           ; ht
-    EXTRD,U temp2,32,33,temp1    ; temp1 = m&Mask >> 32-1
-    ADD     temp3,lt,lt          ; lt = lt+m
-    ADD,L   ht,temp1,ht          ; ht += temp1
-    ADD,DC  ht,%r0,ht            ; ht++
-
-    ADD     C1,lt,C1             ; c1=c1+lt
-    ADD,DC  ht,%r0,ht            ; ht++
-
-    ADD     C2,ht,C2             ; c2=c2+ht
-    ADD,DC  C3,%r0,C3            ; c3++
-.endm
-
-SQR_ADD_C2 .macro  A0L,A0R,A1L,A1R,C1,C2,C3
-    XMPYU   A0L,A1R,ftemp1          ; m1 = bl*ht
-    FSTD    ftemp1,-16(%sp)         ;
-    XMPYU   A0R,A1L,ftemp2          ; m = bh*lt
-    FSTD    ftemp2,-8(%sp)          ;
-    XMPYU   A0R,A1R,ftemp3          ; lt = bl*lt
-    FSTD    ftemp3,-32(%sp)
-    XMPYU   A0L,A1L,ftemp4          ; ht = bh*ht
-    FSTD    ftemp4,-24(%sp)         ;
-
-    LDD     -8(%sp),m               ; r21 = m
-    LDD     -16(%sp),m1             ; r19 = m1
-    ADD,L   m,m1,m                  ; m+m1
-
-    DEPD,Z  m,31,32,temp3           ; (m+m1<<32)
-    LDD     -24(%sp),ht             ; r24 = ht
-
-    CMPCLR,*>>= m,m1,%r0            ; if (m < m1)
-    ADD,L   ht,high_one,ht          ; ht+=high_one
-
-    EXTRD,U m,31,32,temp1           ; m >> 32
-    LDD     -32(%sp),lt             ; lt
-    ADD,L   ht,temp1,ht             ; ht+= m>>32
-    ADD     lt,temp3,lt             ; lt = lt+m1
-    ADD,DC  ht,%r0,ht               ; ht++
-
-    ADD     ht,ht,ht                ; ht=ht+ht;
-    ADD,DC  C3,%r0,C3               ; add in carry (c3++)
-
-    ADD     lt,lt,lt                ; lt=lt+lt;
-    ADD,DC  ht,%r0,ht               ; add in carry (ht++)
-
-    ADD     C1,lt,C1                ; c1=c1+lt
-    ADD,DC,*NUV ht,%r0,ht           ; add in carry (ht++)
-    LDO     1(C3),C3              ; bump c3 if overflow,nullify otherwise
-
-    ADD     C2,ht,C2                ; c2 = c2 + ht
-    ADD,DC  C3,%r0,C3             ; add in carry (c3++)
-.endm
-
-;
-;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
-; arg0 = r_ptr
-; arg1 = a_ptr
-;
-
-bn_sqr_comba8
-	.PROC
-	.CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
-	.EXPORT	bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
-    .ENTRY
-	.align 64
-
-    STD     %r3,0(%sp)          ; save r3
-    STD     %r4,8(%sp)          ; save r4
-    STD     %r5,16(%sp)         ; save r5
-    STD     %r6,24(%sp)         ; save r6
-
-	;
-	; Zero out carries
-	;
-	COPY     %r0,c1
-	COPY     %r0,c2
-	COPY     %r0,c3
-
-	LDO      128(%sp),%sp       ; bump stack
-    DEPDI,Z -1,32,33,high_mask   ; Create Mask 0xffffffff80000000L
-    DEPDI,Z  1,31,1,high_one     ; Create Value  1 << 32
-
-	;
-	; Load up all of the values we are going to use
-	;
-    FLDD     0(a_ptr),a0       
-    FLDD     8(a_ptr),a1       
-    FLDD    16(a_ptr),a2       
-    FLDD    24(a_ptr),a3       
-    FLDD    32(a_ptr),a4       
-    FLDD    40(a_ptr),a5       
-    FLDD    48(a_ptr),a6       
-    FLDD    56(a_ptr),a7       
-
-	SQR_ADD_C a0L,a0R,c1,c2,c3
-	STD     c1,0(r_ptr)          ; r[0] = c1;
-	COPY    %r0,c1
-
-	SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
-	STD     c2,8(r_ptr)          ; r[1] = c2;
-	COPY    %r0,c2
-
-	SQR_ADD_C a1L,a1R,c3,c1,c2
-	SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
-	STD     c3,16(r_ptr)            ; r[2] = c3;
-	COPY    %r0,c3
-
-	SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
-	SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
-	STD     c1,24(r_ptr)           ; r[3] = c1;
-	COPY    %r0,c1
-
-	SQR_ADD_C a2L,a2R,c2,c3,c1
-	SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
-	SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1
-	STD     c2,32(r_ptr)          ; r[4] = c2;
-	COPY    %r0,c2
-
-	SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2
-	SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2
-	SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
-	STD     c3,40(r_ptr)          ; r[5] = c3;
-	COPY    %r0,c3
-
-	SQR_ADD_C a3L,a3R,c1,c2,c3
-	SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3
-	SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3
-	SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3
-	STD     c1,48(r_ptr)          ; r[6] = c1;
-	COPY    %r0,c1
-
-	SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1
-	SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1
-	SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1
-	SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1
-	STD     c2,56(r_ptr)          ; r[7] = c2;
-	COPY    %r0,c2
-
-	SQR_ADD_C a4L,a4R,c3,c1,c2
-	SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2
-	SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2
-	SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2
-	STD     c3,64(r_ptr)          ; r[8] = c3;
-	COPY    %r0,c3
-
-	SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3
-	SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3
-	SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3
-	STD     c1,72(r_ptr)          ; r[9] = c1;
-	COPY    %r0,c1
-
-	SQR_ADD_C a5L,a5R,c2,c3,c1
-	SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1
-	SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1
-	STD     c2,80(r_ptr)          ; r[10] = c2;
-	COPY    %r0,c2
-
-	SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2
-	SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2
-	STD     c3,88(r_ptr)          ; r[11] = c3;
-	COPY    %r0,c3
-	
-	SQR_ADD_C a6L,a6R,c1,c2,c3
-	SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3
-	STD     c1,96(r_ptr)          ; r[12] = c1;
-	COPY    %r0,c1
-
-	SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1
-	STD     c2,104(r_ptr)         ; r[13] = c2;
-	COPY    %r0,c2
-
-	SQR_ADD_C a7L,a7R,c3,c1,c2
-	STD     c3, 112(r_ptr)       ; r[14] = c3
-	STD     c1, 120(r_ptr)       ; r[15] = c1
-
-    .EXIT
-    LDD     -104(%sp),%r6        ; restore r6
-    LDD     -112(%sp),%r5        ; restore r5
-    LDD     -120(%sp),%r4        ; restore r4
-    BVE     (%rp)
-    LDD,MB  -128(%sp),%r3
-
-	.PROCEND	
-
-;-----------------------------------------------------------------------------
-;
-;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
-; arg0 = r_ptr
-; arg1 = a_ptr
-;
-
-bn_sqr_comba4
-	.proc
-	.callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
-	.EXPORT	bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
-    .entry
-	.align 64
-    STD     %r3,0(%sp)          ; save r3
-    STD     %r4,8(%sp)          ; save r4
-    STD     %r5,16(%sp)         ; save r5
-    STD     %r6,24(%sp)         ; save r6
-
-	;
-	; Zero out carries
-	;
-	COPY     %r0,c1
-	COPY     %r0,c2
-	COPY     %r0,c3
-
-	LDO      128(%sp),%sp       ; bump stack
-    DEPDI,Z -1,32,33,high_mask   ; Create Mask 0xffffffff80000000L
-    DEPDI,Z  1,31,1,high_one     ; Create Value  1 << 32
-
-	;
-	; Load up all of the values we are going to use
-	;
-    FLDD     0(a_ptr),a0       
-    FLDD     8(a_ptr),a1       
-    FLDD    16(a_ptr),a2       
-    FLDD    24(a_ptr),a3       
-    FLDD    32(a_ptr),a4       
-    FLDD    40(a_ptr),a5       
-    FLDD    48(a_ptr),a6       
-    FLDD    56(a_ptr),a7       
-
-	SQR_ADD_C a0L,a0R,c1,c2,c3
-
-	STD     c1,0(r_ptr)          ; r[0] = c1;
-	COPY    %r0,c1
-
-	SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
-
-	STD     c2,8(r_ptr)          ; r[1] = c2;
-	COPY    %r0,c2
-
-	SQR_ADD_C a1L,a1R,c3,c1,c2
-	SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
-
-	STD     c3,16(r_ptr)            ; r[2] = c3;
-	COPY    %r0,c3
-
-	SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
-	SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
-
-	STD     c1,24(r_ptr)           ; r[3] = c1;
-	COPY    %r0,c1
-
-	SQR_ADD_C a2L,a2R,c2,c3,c1
-	SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
-
-	STD     c2,32(r_ptr)           ; r[4] = c2;
-	COPY    %r0,c2
-
-	SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
-	STD     c3,40(r_ptr)           ; r[5] = c3;
-	COPY    %r0,c3
-
-	SQR_ADD_C a3L,a3R,c1,c2,c3
-	STD     c1,48(r_ptr)           ; r[6] = c1;
-	STD     c2,56(r_ptr)           ; r[7] = c2;
-
-    .EXIT
-    LDD     -104(%sp),%r6        ; restore r6
-    LDD     -112(%sp),%r5        ; restore r5
-    LDD     -120(%sp),%r4        ; restore r4
-    BVE     (%rp)
-    LDD,MB  -128(%sp),%r3
-
-	.PROCEND	
-
-
-;---------------------------------------------------------------------------
-
-MUL_ADD_C  .macro  A0L,A0R,B0L,B0R,C1,C2,C3
-    XMPYU   A0L,B0R,ftemp1        ; m1 = bl*ht
-    FSTD    ftemp1,-16(%sp)       ;
-    XMPYU   A0R,B0L,ftemp2        ; m = bh*lt
-    FSTD    ftemp2,-8(%sp)        ;
-    XMPYU   A0R,B0R,ftemp3        ; lt = bl*lt
-    FSTD    ftemp3,-32(%sp)
-    XMPYU   A0L,B0L,ftemp4        ; ht = bh*ht
-    FSTD    ftemp4,-24(%sp)       ;
-
-    LDD     -8(%sp),m             ; r21 = m
-    LDD     -16(%sp),m1           ; r19 = m1
-    ADD,L   m,m1,m                ; m+m1
-
-    DEPD,Z  m,31,32,temp3         ; (m+m1<<32)
-    LDD     -24(%sp),ht           ; r24 = ht
-
-    CMPCLR,*>>= m,m1,%r0          ; if (m < m1)
-    ADD,L   ht,high_one,ht        ; ht+=high_one
-
-    EXTRD,U m,31,32,temp1         ; m >> 32
-    LDD     -32(%sp),lt           ; lt
-    ADD,L   ht,temp1,ht           ; ht+= m>>32
-    ADD     lt,temp3,lt           ; lt = lt+m1
-    ADD,DC  ht,%r0,ht             ; ht++
-
-    ADD     C1,lt,C1              ; c1=c1+lt
-    ADD,DC  ht,%r0,ht             ; bump c3 if overflow,nullify otherwise
-
-    ADD     C2,ht,C2              ; c2 = c2 + ht
-    ADD,DC  C3,%r0,C3             ; add in carry (c3++)
-.endm
-
-
-;
-;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
-; arg0 = r_ptr
-; arg1 = a_ptr
-; arg2 = b_ptr
-;
-
-bn_mul_comba8
-	.proc
-	.callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
-	.EXPORT	bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
-    .entry
-	.align 64
-
-    STD     %r3,0(%sp)          ; save r3
-    STD     %r4,8(%sp)          ; save r4
-    STD     %r5,16(%sp)         ; save r5
-    STD     %r6,24(%sp)         ; save r6
-    FSTD    %fr12,32(%sp)       ; save r6
-    FSTD    %fr13,40(%sp)       ; save r7
-
-	;
-	; Zero out carries
-	;
-	COPY     %r0,c1
-	COPY     %r0,c2
-	COPY     %r0,c3
-
-	LDO      128(%sp),%sp       ; bump stack
-    DEPDI,Z  1,31,1,high_one     ; Create Value  1 << 32
-
-	;
-	; Load up all of the values we are going to use
-	;
-    FLDD      0(a_ptr),a0       
-    FLDD      8(a_ptr),a1       
-    FLDD     16(a_ptr),a2       
-    FLDD     24(a_ptr),a3       
-    FLDD     32(a_ptr),a4       
-    FLDD     40(a_ptr),a5       
-    FLDD     48(a_ptr),a6       
-    FLDD     56(a_ptr),a7       
-
-    FLDD      0(b_ptr),b0       
-    FLDD      8(b_ptr),b1       
-    FLDD     16(b_ptr),b2       
-    FLDD     24(b_ptr),b3       
-    FLDD     32(b_ptr),b4       
-    FLDD     40(b_ptr),b5       
-    FLDD     48(b_ptr),b6       
-    FLDD     56(b_ptr),b7       
-
-	MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
-	STD       c1,0(r_ptr)
-	COPY      %r0,c1
-
-	MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
-	MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
-	STD       c2,8(r_ptr)
-	COPY      %r0,c2
-
-	MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
-	MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
-	MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
-	STD       c3,16(r_ptr)
-	COPY      %r0,c3
-
-	MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
-	MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
-	MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
-	MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
-	STD       c1,24(r_ptr)
-	COPY      %r0,c1
-
-	MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1
-	MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
-	MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
-	MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
-	MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1
-	STD       c2,32(r_ptr)
-	COPY      %r0,c2
-
-	MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2
-	MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2
-	MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
-	MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
-	MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2
-	MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2
-	STD       c3,40(r_ptr)
-	COPY      %r0,c3
-
-	MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3
-	MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3
-	MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3
-	MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
-	MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3
-	MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3
-	MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3
-	STD       c1,48(r_ptr)
-	COPY      %r0,c1
-	
-	MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1
-	MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1
-	MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1
-	MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1
-	MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1
-	MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1
-	MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1
-	MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1
-	STD       c2,56(r_ptr)
-	COPY      %r0,c2
-
-	MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2
-	MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2
-	MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2
-	MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2
-	MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2
-	MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2
-	MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2
-	STD       c3,64(r_ptr)
-	COPY      %r0,c3
-
-	MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3
-	MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3
-	MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3
-	MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3
-	MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3
-	MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3
-	STD       c1,72(r_ptr)
-	COPY      %r0,c1
-
-	MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1
-	MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1
-	MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1
-	MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1
-	MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1
-	STD       c2,80(r_ptr)
-	COPY      %r0,c2
-
-	MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2
-	MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2
-	MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2
-	MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2
-	STD       c3,88(r_ptr)
-	COPY      %r0,c3
-
-	MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3
-	MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3
-	MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3
-	STD       c1,96(r_ptr)
-	COPY      %r0,c1
-
-	MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1
-	MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1
-	STD       c2,104(r_ptr)
-	COPY      %r0,c2
-
-	MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2
-	STD       c3,112(r_ptr)
-	STD       c1,120(r_ptr)
-
-    .EXIT
-    FLDD    -88(%sp),%fr13 
-    FLDD    -96(%sp),%fr12 
-    LDD     -104(%sp),%r6        ; restore r6
-    LDD     -112(%sp),%r5        ; restore r5
-    LDD     -120(%sp),%r4        ; restore r4
-    BVE     (%rp)
-    LDD,MB  -128(%sp),%r3
-
-	.PROCEND	
-
-;-----------------------------------------------------------------------------
-;
-;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
-; arg0 = r_ptr
-; arg1 = a_ptr
-; arg2 = b_ptr
-;
-
-bn_mul_comba4
-	.proc
-	.callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
-	.EXPORT	bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
-    .entry
-	.align 64
-
-    STD     %r3,0(%sp)          ; save r3
-    STD     %r4,8(%sp)          ; save r4
-    STD     %r5,16(%sp)         ; save r5
-    STD     %r6,24(%sp)         ; save r6
-    FSTD    %fr12,32(%sp)       ; save r6
-    FSTD    %fr13,40(%sp)       ; save r7
-
-	;
-	; Zero out carries
-	;
-	COPY     %r0,c1
-	COPY     %r0,c2
-	COPY     %r0,c3
-
-	LDO      128(%sp),%sp       ; bump stack
-    DEPDI,Z  1,31,1,high_one     ; Create Value  1 << 32
-
-	;
-	; Load up all of the values we are going to use
-	;
-    FLDD      0(a_ptr),a0       
-    FLDD      8(a_ptr),a1       
-    FLDD     16(a_ptr),a2       
-    FLDD     24(a_ptr),a3       
-
-    FLDD      0(b_ptr),b0       
-    FLDD      8(b_ptr),b1       
-    FLDD     16(b_ptr),b2       
-    FLDD     24(b_ptr),b3       
-
-	MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
-	STD       c1,0(r_ptr)
-	COPY      %r0,c1
-
-	MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
-	MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
-	STD       c2,8(r_ptr)
-	COPY      %r0,c2
-
-	MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
-	MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
-	MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
-	STD       c3,16(r_ptr)
-	COPY      %r0,c3
-
-	MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
-	MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
-	MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
-	MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
-	STD       c1,24(r_ptr)
-	COPY      %r0,c1
-
-	MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
-	MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
-	MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
-	STD       c2,32(r_ptr)
-	COPY      %r0,c2
-
-	MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
-	MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
-	STD       c3,40(r_ptr)
-	COPY      %r0,c3
-
-	MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
-	STD       c1,48(r_ptr)
-	STD       c2,56(r_ptr)
-
-    .EXIT
-    FLDD    -88(%sp),%fr13 
-    FLDD    -96(%sp),%fr12 
-    LDD     -104(%sp),%r6        ; restore r6
-    LDD     -112(%sp),%r5        ; restore r5
-    LDD     -120(%sp),%r4        ; restore r4
-    BVE     (%rp)
-    LDD,MB  -128(%sp),%r3
-
-	.PROCEND	
-
-
-;--- not PIC	.SPACE	$TEXT$
-;--- not PIC	.SUBSPA	$CODE$
-;--- not PIC	.SPACE	$PRIVATE$,SORT=16
-;--- not PIC	.IMPORT	$global$,DATA
-;--- not PIC	.SPACE	$TEXT$
-;--- not PIC	.SUBSPA	$CODE$
-;--- not PIC	.SUBSPA	$LIT$,ACCESS=0x2c
-;--- not PIC	C$7
-;--- not PIC	.ALIGN	8
-;--- not PIC	.STRINGZ	"Division would overflow (%d)\n"
-	.END
diff --git a/deps/openssl/openssl/crypto/bn/asm/pa-risc2W.s b/deps/openssl/openssl/crypto/bn/asm/pa-risc2W.s
deleted file mode 100644
index 97381172e7..0000000000
--- a/deps/openssl/openssl/crypto/bn/asm/pa-risc2W.s
+++ /dev/null
@@ -1,1612 +0,0 @@
-; Copyright 2000-2016 The OpenSSL Project Authors. All Rights Reserved.
-;
-; Licensed under the OpenSSL license (the "License").  You may not use
-; this file except in compliance with the License.  You can obtain a copy
-; in the file LICENSE in the source distribution or at
-; https://www.openssl.org/source/license.html
-
-;
-; PA-RISC 64-bit implementation of bn_asm code
-;
-; This code is approximately 2x faster than the C version
-; for RSA/DSA.
-;
-; See http://devresource.hp.com/  for more details on the PA-RISC
-; architecture.  Also see the book "PA-RISC 2.0 Architecture"
-; by Gerry Kane for information on the instruction set architecture.
-;
-; Code written by Chris Ruemmler (with some help from the HP C
-; compiler).
-;
-; The code compiles with HP's assembler
-;
-
-	.level	2.0W
-	.space	$TEXT$
-	.subspa	$CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY
-
-;
-; Global Register definitions used for the routines.
-;
-; Some information about HP's runtime architecture for 64-bits.
-;
-; "Caller save" means the calling function must save the register
-; if it wants the register to be preserved.
-; "Callee save" means if a function uses the register, it must save
-; the value before using it.
-;
-; For the floating point registers 
-;
-;    "caller save" registers: fr4-fr11, fr22-fr31
-;    "callee save" registers: fr12-fr21
-;    "special" registers: fr0-fr3 (status and exception registers)
-;
-; For the integer registers
-;     value zero             :  r0
-;     "caller save" registers: r1,r19-r26
-;     "callee save" registers: r3-r18
-;     return register        :  r2  (rp)
-;     return values          ; r28  (ret0,ret1)
-;     Stack pointer          ; r30  (sp) 
-;     global data pointer    ; r27  (dp)
-;     argument pointer       ; r29  (ap)
-;     millicode return ptr   ; r31  (also a caller save register)
-
-
-;
-; Arguments to the routines
-;
-r_ptr       .reg %r26
-a_ptr       .reg %r25
-b_ptr       .reg %r24
-num         .reg %r24
-w           .reg %r23
-n           .reg %r23
-
-
-;
-; Globals used in some routines
-;
-
-top_overflow .reg %r29
-high_mask    .reg %r22    ; value 0xffffffff80000000L
-
-
-;------------------------------------------------------------------------------
-;
-; bn_mul_add_words
-;
-;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr, 
-;								int num, BN_ULONG w)
-;
-; arg0 = r_ptr
-; arg1 = a_ptr
-; arg2 = num
-; arg3 = w
-;
-; Local register definitions
-;
-
-fm1          .reg %fr22
-fm           .reg %fr23
-ht_temp      .reg %fr24
-ht_temp_1    .reg %fr25
-lt_temp      .reg %fr26
-lt_temp_1    .reg %fr27
-fm1_1        .reg %fr28
-fm_1         .reg %fr29
-
-fw_h         .reg %fr7L
-fw_l         .reg %fr7R
-fw           .reg %fr7
-
-fht_0        .reg %fr8L
-flt_0        .reg %fr8R
-t_float_0    .reg %fr8
-
-fht_1        .reg %fr9L
-flt_1        .reg %fr9R
-t_float_1    .reg %fr9
-
-tmp_0        .reg %r31
-tmp_1        .reg %r21
-m_0          .reg %r20 
-m_1          .reg %r19 
-ht_0         .reg %r1  
-ht_1         .reg %r3
-lt_0         .reg %r4
-lt_1         .reg %r5
-m1_0         .reg %r6 
-m1_1         .reg %r7 
-rp_val       .reg %r8
-rp_val_1     .reg %r9
-
-bn_mul_add_words
-	.export	bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN
-	.proc
-	.callinfo frame=128
-    .entry
-	.align 64
-
-    STD     %r3,0(%sp)          ; save r3  
-    STD     %r4,8(%sp)          ; save r4  
-	NOP                         ; Needed to make the loop 16-byte aligned
-	NOP                         ; Needed to make the loop 16-byte aligned
-
-    STD     %r5,16(%sp)         ; save r5  
-    STD     %r6,24(%sp)         ; save r6  
-    STD     %r7,32(%sp)         ; save r7  
-    STD     %r8,40(%sp)         ; save r8  
-
-    STD     %r9,48(%sp)         ; save r9  
-    COPY    %r0,%ret0           ; return 0 by default
-    DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32    
-	STD     w,56(%sp)           ; store w on stack
-
-    CMPIB,>= 0,num,bn_mul_add_words_exit  ; if (num <= 0) then exit
-	LDO     128(%sp),%sp       ; bump stack
-
-	;
-	; The loop is unrolled twice, so if there is only 1 number
-    ; then go straight to the cleanup code.
-	;
-	CMPIB,= 1,num,bn_mul_add_words_single_top
-	FLDD    -72(%sp),fw     ; load up w into fp register fw (fw_h/fw_l)
-
-	;
-	; This loop is unrolled 2 times (64-byte aligned as well)
-	;
-	; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
-    ; two 32-bit mutiplies can be issued per cycle.
-    ; 
-bn_mul_add_words_unroll2
-
-    FLDD    0(a_ptr),t_float_0       ; load up 64-bit value (fr8L) ht(L)/lt(R)
-    FLDD    8(a_ptr),t_float_1       ; load up 64-bit value (fr8L) ht(L)/lt(R)
-    LDD     0(r_ptr),rp_val          ; rp[0]
-    LDD     8(r_ptr),rp_val_1        ; rp[1]
-
-    XMPYU   fht_0,fw_l,fm1           ; m1[0] = fht_0*fw_l
-    XMPYU   fht_1,fw_l,fm1_1         ; m1[1] = fht_1*fw_l
-    FSTD    fm1,-16(%sp)             ; -16(sp) = m1[0]
-    FSTD    fm1_1,-48(%sp)           ; -48(sp) = m1[1]
-
-    XMPYU   flt_0,fw_h,fm            ; m[0] = flt_0*fw_h
-    XMPYU   flt_1,fw_h,fm_1          ; m[1] = flt_1*fw_h
-    FSTD    fm,-8(%sp)               ; -8(sp) = m[0]
-    FSTD    fm_1,-40(%sp)            ; -40(sp) = m[1]
-
-    XMPYU   fht_0,fw_h,ht_temp       ; ht_temp   = fht_0*fw_h
-    XMPYU   fht_1,fw_h,ht_temp_1     ; ht_temp_1 = fht_1*fw_h
-    FSTD    ht_temp,-24(%sp)         ; -24(sp)   = ht_temp
-    FSTD    ht_temp_1,-56(%sp)       ; -56(sp)   = ht_temp_1
-
-    XMPYU   flt_0,fw_l,lt_temp       ; lt_temp = lt*fw_l
-    XMPYU   flt_1,fw_l,lt_temp_1     ; lt_temp = lt*fw_l
-    FSTD    lt_temp,-32(%sp)         ; -32(sp) = lt_temp 
-    FSTD    lt_temp_1,-64(%sp)       ; -64(sp) = lt_temp_1 
-
-    LDD     -8(%sp),m_0              ; m[0] 
-    LDD     -40(%sp),m_1             ; m[1]
-    LDD     -16(%sp),m1_0            ; m1[0]
-    LDD     -48(%sp),m1_1            ; m1[1]
-
-    LDD     -24(%sp),ht_0            ; ht[0]
-    LDD     -56(%sp),ht_1            ; ht[1]
-    ADD,L   m1_0,m_0,tmp_0           ; tmp_0 = m[0] + m1[0]; 
-    ADD,L   m1_1,m_1,tmp_1           ; tmp_1 = m[1] + m1[1]; 
-
-    LDD     -32(%sp),lt_0            
-    LDD     -64(%sp),lt_1            
-    CMPCLR,*>>= tmp_0,m1_0, %r0      ; if (m[0] < m1[0])
-    ADD,L   ht_0,top_overflow,ht_0   ; ht[0] += (1<<32)
-
-    CMPCLR,*>>= tmp_1,m1_1,%r0       ; if (m[1] < m1[1])
-    ADD,L   ht_1,top_overflow,ht_1   ; ht[1] += (1<<32)
-    EXTRD,U tmp_0,31,32,m_0          ; m[0]>>32  
-    DEPD,Z  tmp_0,31,32,m1_0         ; m1[0] = m[0]<<32 
-
-    EXTRD,U tmp_1,31,32,m_1          ; m[1]>>32  
-    DEPD,Z  tmp_1,31,32,m1_1         ; m1[1] = m[1]<<32 
-    ADD,L   ht_0,m_0,ht_0            ; ht[0]+= (m[0]>>32)
-    ADD,L   ht_1,m_1,ht_1            ; ht[1]+= (m[1]>>32)
-
-    ADD     lt_0,m1_0,lt_0           ; lt[0] = lt[0]+m1[0];
-	ADD,DC  ht_0,%r0,ht_0            ; ht[0]++
-    ADD     lt_1,m1_1,lt_1           ; lt[1] = lt[1]+m1[1];
-    ADD,DC  ht_1,%r0,ht_1            ; ht[1]++
-
-    ADD    %ret0,lt_0,lt_0           ; lt[0] = lt[0] + c;
-	ADD,DC  ht_0,%r0,ht_0            ; ht[0]++
-    ADD     lt_0,rp_val,lt_0         ; lt[0] = lt[0]+rp[0]
-    ADD,DC  ht_0,%r0,ht_0            ; ht[0]++
-
-	LDO    -2(num),num               ; num = num - 2;
-    ADD     ht_0,lt_1,lt_1           ; lt[1] = lt[1] + ht_0 (c);
-    ADD,DC  ht_1,%r0,ht_1            ; ht[1]++
-    STD     lt_0,0(r_ptr)            ; rp[0] = lt[0]
-
-    ADD     lt_1,rp_val_1,lt_1       ; lt[1] = lt[1]+rp[1]
-    ADD,DC  ht_1,%r0,%ret0           ; ht[1]++
-    LDO     16(a_ptr),a_ptr          ; a_ptr += 2
-
-    STD     lt_1,8(r_ptr)            ; rp[1] = lt[1]
-	CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do
-    LDO     16(r_ptr),r_ptr          ; r_ptr += 2
-
-    CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one
-
-	;
-	; Top of loop aligned on 64-byte boundary
-	;
-bn_mul_add_words_single_top
-    FLDD    0(a_ptr),t_float_0        ; load up 64-bit value (fr8L) ht(L)/lt(R)
-    LDD     0(r_ptr),rp_val           ; rp[0]
-    LDO     8(a_ptr),a_ptr            ; a_ptr++
-    XMPYU   fht_0,fw_l,fm1            ; m1 = ht*fw_l
-    FSTD    fm1,-16(%sp)              ; -16(sp) = m1
-    XMPYU   flt_0,fw_h,fm             ; m = lt*fw_h
-    FSTD    fm,-8(%sp)                ; -8(sp) = m
-    XMPYU   fht_0,fw_h,ht_temp        ; ht_temp = ht*fw_h
-    FSTD    ht_temp,-24(%sp)          ; -24(sp) = ht
-    XMPYU   flt_0,fw_l,lt_temp        ; lt_temp = lt*fw_l
-    FSTD    lt_temp,-32(%sp)          ; -32(sp) = lt 
-
-    LDD     -8(%sp),m_0               
-    LDD    -16(%sp),m1_0              ; m1 = temp1 
-    ADD,L   m_0,m1_0,tmp_0            ; tmp_0 = m + m1; 
-    LDD     -24(%sp),ht_0             
-    LDD     -32(%sp),lt_0             
-
-    CMPCLR,*>>= tmp_0,m1_0,%r0        ; if (m < m1)
-    ADD,L   ht_0,top_overflow,ht_0    ; ht += (1<<32)
-
-    EXTRD,U tmp_0,31,32,m_0           ; m>>32  
-    DEPD,Z  tmp_0,31,32,m1_0          ; m1 = m<<32 
-
-    ADD,L   ht_0,m_0,ht_0             ; ht+= (m>>32)
-    ADD     lt_0,m1_0,tmp_0           ; tmp_0 = lt+m1;
-    ADD,DC  ht_0,%r0,ht_0             ; ht++
-    ADD     %ret0,tmp_0,lt_0          ; lt = lt + c;
-    ADD,DC  ht_0,%r0,ht_0             ; ht++
-    ADD     lt_0,rp_val,lt_0          ; lt = lt+rp[0]
-    ADD,DC  ht_0,%r0,%ret0            ; ht++
-    STD     lt_0,0(r_ptr)             ; rp[0] = lt
-
-bn_mul_add_words_exit
-    .EXIT
-    LDD     -80(%sp),%r9              ; restore r9  
-    LDD     -88(%sp),%r8              ; restore r8  
-    LDD     -96(%sp),%r7              ; restore r7  
-    LDD     -104(%sp),%r6             ; restore r6  
-    LDD     -112(%sp),%r5             ; restore r5  
-    LDD     -120(%sp),%r4             ; restore r4  
-    BVE     (%rp)
-    LDD,MB  -128(%sp),%r3             ; restore r3
-	.PROCEND	;in=23,24,25,26,29;out=28;
-
-;----------------------------------------------------------------------------
-;
-;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
-;
-; arg0 = rp
-; arg1 = ap
-; arg2 = num
-; arg3 = w
-
-bn_mul_words
-	.proc
-	.callinfo frame=128
-    .entry
-	.EXPORT	bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
-	.align 64
-
-    STD     %r3,0(%sp)          ; save r3  
-    STD     %r4,8(%sp)          ; save r4  
-    STD     %r5,16(%sp)         ; save r5  
-    STD     %r6,24(%sp)         ; save r6  
-
-    STD     %r7,32(%sp)         ; save r7  
-    COPY    %r0,%ret0           ; return 0 by default
-    DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32    
-	STD     w,56(%sp)           ; w on stack
-
-    CMPIB,>= 0,num,bn_mul_words_exit
-	LDO     128(%sp),%sp       ; bump stack
-
-	;
-	; See if only 1 word to do, thus just do cleanup
-	;
-	CMPIB,= 1,num,bn_mul_words_single_top
-	FLDD    -72(%sp),fw     ; load up w into fp register fw (fw_h/fw_l)
-
-	;
-	; This loop is unrolled 2 times (64-byte aligned as well)
-	;
-	; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
-    ; two 32-bit mutiplies can be issued per cycle.
-    ; 
-bn_mul_words_unroll2
-
-    FLDD    0(a_ptr),t_float_0        ; load up 64-bit value (fr8L) ht(L)/lt(R)
-    FLDD    8(a_ptr),t_float_1        ; load up 64-bit value (fr8L) ht(L)/lt(R)
-    XMPYU   fht_0,fw_l,fm1            ; m1[0] = fht_0*fw_l
-    XMPYU   fht_1,fw_l,fm1_1          ; m1[1] = ht*fw_l
-
-    FSTD    fm1,-16(%sp)              ; -16(sp) = m1
-    FSTD    fm1_1,-48(%sp)            ; -48(sp) = m1
-    XMPYU   flt_0,fw_h,fm             ; m = lt*fw_h
-    XMPYU   flt_1,fw_h,fm_1           ; m = lt*fw_h
-
-    FSTD    fm,-8(%sp)                ; -8(sp) = m
-    FSTD    fm_1,-40(%sp)             ; -40(sp) = m
-    XMPYU   fht_0,fw_h,ht_temp        ; ht_temp = fht_0*fw_h
-    XMPYU   fht_1,fw_h,ht_temp_1      ; ht_temp = ht*fw_h
-
-    FSTD    ht_temp,-24(%sp)          ; -24(sp) = ht
-    FSTD    ht_temp_1,-56(%sp)        ; -56(sp) = ht
-    XMPYU   flt_0,fw_l,lt_temp        ; lt_temp = lt*fw_l
-    XMPYU   flt_1,fw_l,lt_temp_1      ; lt_temp = lt*fw_l
-
-    FSTD    lt_temp,-32(%sp)          ; -32(sp) = lt 
-    FSTD    lt_temp_1,-64(%sp)        ; -64(sp) = lt 
-    LDD     -8(%sp),m_0               
-    LDD     -40(%sp),m_1              
-
-    LDD    -16(%sp),m1_0              
-    LDD    -48(%sp),m1_1              
-    LDD     -24(%sp),ht_0             
-    LDD     -56(%sp),ht_1             
-
-    ADD,L   m1_0,m_0,tmp_0            ; tmp_0 = m + m1; 
-    ADD,L   m1_1,m_1,tmp_1            ; tmp_1 = m + m1; 
-    LDD     -32(%sp),lt_0             
-    LDD     -64(%sp),lt_1             
-
-    CMPCLR,*>>= tmp_0,m1_0, %r0       ; if (m < m1)
-    ADD,L   ht_0,top_overflow,ht_0    ; ht += (1<<32)
-    CMPCLR,*>>= tmp_1,m1_1,%r0        ; if (m < m1)
-    ADD,L   ht_1,top_overflow,ht_1    ; ht += (1<<32)
-
-    EXTRD,U tmp_0,31,32,m_0           ; m>>32  
-    DEPD,Z  tmp_0,31,32,m1_0          ; m1 = m<<32 
-    EXTRD,U tmp_1,31,32,m_1           ; m>>32  
-    DEPD,Z  tmp_1,31,32,m1_1          ; m1 = m<<32 
-
-    ADD,L   ht_0,m_0,ht_0             ; ht+= (m>>32)
-    ADD,L   ht_1,m_1,ht_1             ; ht+= (m>>32)
-    ADD     lt_0,m1_0,lt_0            ; lt = lt+m1;
-	ADD,DC  ht_0,%r0,ht_0             ; ht++
-
-    ADD     lt_1,m1_1,lt_1            ; lt = lt+m1;
-    ADD,DC  ht_1,%r0,ht_1             ; ht++
-    ADD    %ret0,lt_0,lt_0            ; lt = lt + c (ret0);
-	ADD,DC  ht_0,%r0,ht_0             ; ht++
-
-    ADD     ht_0,lt_1,lt_1            ; lt = lt + c (ht_0)
-    ADD,DC  ht_1,%r0,ht_1             ; ht++
-    STD     lt_0,0(r_ptr)             ; rp[0] = lt
-    STD     lt_1,8(r_ptr)             ; rp[1] = lt
-
-	COPY    ht_1,%ret0                ; carry = ht
-	LDO    -2(num),num                ; num = num - 2;
-    LDO     16(a_ptr),a_ptr           ; ap += 2
-	CMPIB,<= 2,num,bn_mul_words_unroll2
-    LDO     16(r_ptr),r_ptr           ; rp++
-
-    CMPIB,=,N 0,num,bn_mul_words_exit ; are we done?
-
-	;
-	; Top of loop aligned on 64-byte boundary
-	;
-bn_mul_words_single_top
-    FLDD    0(a_ptr),t_float_0        ; load up 64-bit value (fr8L) ht(L)/lt(R)
-
-    XMPYU   fht_0,fw_l,fm1            ; m1 = ht*fw_l
-    FSTD    fm1,-16(%sp)              ; -16(sp) = m1
-    XMPYU   flt_0,fw_h,fm             ; m = lt*fw_h
-    FSTD    fm,-8(%sp)                ; -8(sp) = m
-    XMPYU   fht_0,fw_h,ht_temp        ; ht_temp = ht*fw_h
-    FSTD    ht_temp,-24(%sp)          ; -24(sp) = ht
-    XMPYU   flt_0,fw_l,lt_temp        ; lt_temp = lt*fw_l
-    FSTD    lt_temp,-32(%sp)          ; -32(sp) = lt 
-
-    LDD     -8(%sp),m_0               
-    LDD    -16(%sp),m1_0              
-    ADD,L   m_0,m1_0,tmp_0            ; tmp_0 = m + m1; 
-    LDD     -24(%sp),ht_0             
-    LDD     -32(%sp),lt_0             
-
-    CMPCLR,*>>= tmp_0,m1_0,%r0        ; if (m < m1)
-    ADD,L   ht_0,top_overflow,ht_0    ; ht += (1<<32)
-
-    EXTRD,U tmp_0,31,32,m_0           ; m>>32  
-    DEPD,Z  tmp_0,31,32,m1_0          ; m1 = m<<32 
-
-    ADD,L   ht_0,m_0,ht_0             ; ht+= (m>>32)
-    ADD     lt_0,m1_0,lt_0            ; lt= lt+m1;
-    ADD,DC  ht_0,%r0,ht_0             ; ht++
-
-    ADD     %ret0,lt_0,lt_0           ; lt = lt + c;
-    ADD,DC  ht_0,%r0,ht_0             ; ht++
-
-    COPY    ht_0,%ret0                ; copy carry
-    STD     lt_0,0(r_ptr)             ; rp[0] = lt
-
-bn_mul_words_exit
-    .EXIT
-    LDD     -96(%sp),%r7              ; restore r7  
-    LDD     -104(%sp),%r6             ; restore r6  
-    LDD     -112(%sp),%r5             ; restore r5  
-    LDD     -120(%sp),%r4             ; restore r4  
-    BVE     (%rp)
-    LDD,MB  -128(%sp),%r3             ; restore r3
-	.PROCEND	;in=23,24,25,26,29;out=28;
-
-;----------------------------------------------------------------------------
-;
-;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num)
-;
-; arg0 = rp
-; arg1 = ap
-; arg2 = num
-;
-
-bn_sqr_words
-	.proc
-	.callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
-	.EXPORT	bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
-    .entry
-	.align 64
-
-    STD     %r3,0(%sp)          ; save r3  
-    STD     %r4,8(%sp)          ; save r4  
-	NOP
-    STD     %r5,16(%sp)         ; save r5  
-
-    CMPIB,>= 0,num,bn_sqr_words_exit
-	LDO     128(%sp),%sp       ; bump stack
-
-	;
-	; If only 1, the goto straight to cleanup
-	;
-	CMPIB,= 1,num,bn_sqr_words_single_top
-    DEPDI,Z -1,32,33,high_mask   ; Create Mask 0xffffffff80000000L
-
-	;
-	; This loop is unrolled 2 times (64-byte aligned as well)
-	;
-
-bn_sqr_words_unroll2
-    FLDD    0(a_ptr),t_float_0        ; a[0]
-    FLDD    8(a_ptr),t_float_1        ; a[1]
-    XMPYU   fht_0,flt_0,fm            ; m[0]
-    XMPYU   fht_1,flt_1,fm_1          ; m[1]
-
-    FSTD    fm,-24(%sp)               ; store m[0]
-    FSTD    fm_1,-56(%sp)             ; store m[1]
-    XMPYU   flt_0,flt_0,lt_temp       ; lt[0]
-    XMPYU   flt_1,flt_1,lt_temp_1     ; lt[1]
-
-    FSTD    lt_temp,-16(%sp)          ; store lt[0]
-    FSTD    lt_temp_1,-48(%sp)        ; store lt[1]
-    XMPYU   fht_0,fht_0,ht_temp       ; ht[0]
-    XMPYU   fht_1,fht_1,ht_temp_1     ; ht[1]
-
-    FSTD    ht_temp,-8(%sp)           ; store ht[0]
-    FSTD    ht_temp_1,-40(%sp)        ; store ht[1]
-    LDD     -24(%sp),m_0             
-    LDD     -56(%sp),m_1              
-
-    AND     m_0,high_mask,tmp_0       ; m[0] & Mask
-    AND     m_1,high_mask,tmp_1       ; m[1] & Mask
-    DEPD,Z  m_0,30,31,m_0             ; m[0] << 32+1
-    DEPD,Z  m_1,30,31,m_1             ; m[1] << 32+1
-
-    LDD     -16(%sp),lt_0        
-    LDD     -48(%sp),lt_1        
-    EXTRD,U tmp_0,32,33,tmp_0         ; tmp_0 = m[0]&Mask >> 32-1
-    EXTRD,U tmp_1,32,33,tmp_1         ; tmp_1 = m[1]&Mask >> 32-1
-
-    LDD     -8(%sp),ht_0            
-    LDD     -40(%sp),ht_1           
-    ADD,L   ht_0,tmp_0,ht_0           ; ht[0] += tmp_0
-    ADD,L   ht_1,tmp_1,ht_1           ; ht[1] += tmp_1
-
-    ADD     lt_0,m_0,lt_0             ; lt = lt+m
-    ADD,DC  ht_0,%r0,ht_0             ; ht[0]++
-    STD     lt_0,0(r_ptr)             ; rp[0] = lt[0]
-    STD     ht_0,8(r_ptr)             ; rp[1] = ht[1]
-
-    ADD     lt_1,m_1,lt_1             ; lt = lt+m
-    ADD,DC  ht_1,%r0,ht_1             ; ht[1]++
-    STD     lt_1,16(r_ptr)            ; rp[2] = lt[1]
-    STD     ht_1,24(r_ptr)            ; rp[3] = ht[1]
-
-	LDO    -2(num),num                ; num = num - 2;
-    LDO     16(a_ptr),a_ptr           ; ap += 2
-	CMPIB,<= 2,num,bn_sqr_words_unroll2
-    LDO     32(r_ptr),r_ptr           ; rp += 4
-
-    CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done?
-
-	;
-	; Top of loop aligned on 64-byte boundary
-	;
-bn_sqr_words_single_top
-    FLDD    0(a_ptr),t_float_0        ; load up 64-bit value (fr8L) ht(L)/lt(R)
-
-    XMPYU   fht_0,flt_0,fm            ; m
-    FSTD    fm,-24(%sp)               ; store m
-
-    XMPYU   flt_0,flt_0,lt_temp       ; lt
-    FSTD    lt_temp,-16(%sp)          ; store lt
-
-    XMPYU   fht_0,fht_0,ht_temp       ; ht
-    FSTD    ht_temp,-8(%sp)           ; store ht
-
-    LDD     -24(%sp),m_0              ; load m
-    AND     m_0,high_mask,tmp_0       ; m & Mask
-    DEPD,Z  m_0,30,31,m_0             ; m << 32+1
-    LDD     -16(%sp),lt_0             ; lt
-
-    LDD     -8(%sp),ht_0              ; ht
-    EXTRD,U tmp_0,32,33,tmp_0         ; tmp_0 = m&Mask >> 32-1
-    ADD     m_0,lt_0,lt_0             ; lt = lt+m
-    ADD,L   ht_0,tmp_0,ht_0           ; ht += tmp_0
-    ADD,DC  ht_0,%r0,ht_0             ; ht++
-
-    STD     lt_0,0(r_ptr)             ; rp[0] = lt
-    STD     ht_0,8(r_ptr)             ; rp[1] = ht
-
-bn_sqr_words_exit
-    .EXIT
-    LDD     -112(%sp),%r5       ; restore r5  
-    LDD     -120(%sp),%r4       ; restore r4  
-    BVE     (%rp)
-    LDD,MB  -128(%sp),%r3 
-	.PROCEND	;in=23,24,25,26,29;out=28;
-
-
-;----------------------------------------------------------------------------
-;
-;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
-;
-; arg0 = rp 
-; arg1 = ap
-; arg2 = bp 
-; arg3 = n
-
-t  .reg %r22
-b  .reg %r21
-l  .reg %r20
-
-bn_add_words
-	.proc
-    .entry
-	.callinfo
-	.EXPORT	bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
-	.align 64
-
-    CMPIB,>= 0,n,bn_add_words_exit
-    COPY    %r0,%ret0           ; return 0 by default
-
-	;
-	; If 2 or more numbers do the loop
-	;
-	CMPIB,= 1,n,bn_add_words_single_top
-	NOP
-
-	;
-	; This loop is unrolled 2 times (64-byte aligned as well)
-	;
-bn_add_words_unroll2
-	LDD     0(a_ptr),t
-	LDD     0(b_ptr),b
-	ADD     t,%ret0,t                    ; t = t+c;
-	ADD,DC  %r0,%r0,%ret0                ; set c to carry
-	ADD     t,b,l                        ; l = t + b[0]
-	ADD,DC  %ret0,%r0,%ret0              ; c+= carry
-	STD     l,0(r_ptr)
-
-	LDD     8(a_ptr),t
-	LDD     8(b_ptr),b
-	ADD     t,%ret0,t                     ; t = t+c;
-	ADD,DC  %r0,%r0,%ret0                 ; set c to carry
-	ADD     t,b,l                         ; l = t + b[0]
-	ADD,DC  %ret0,%r0,%ret0               ; c+= carry
-	STD     l,8(r_ptr)
-
-	LDO     -2(n),n
-	LDO     16(a_ptr),a_ptr
-	LDO     16(b_ptr),b_ptr
-
-	CMPIB,<= 2,n,bn_add_words_unroll2
-	LDO     16(r_ptr),r_ptr
-
-    CMPIB,=,N 0,n,bn_add_words_exit ; are we done?
-
-bn_add_words_single_top
-	LDD     0(a_ptr),t
-	LDD     0(b_ptr),b
-
-	ADD     t,%ret0,t                 ; t = t+c;
-	ADD,DC  %r0,%r0,%ret0             ; set c to carry (could use CMPCLR??)
-	ADD     t,b,l                     ; l = t + b[0]
-	ADD,DC  %ret0,%r0,%ret0           ; c+= carry
-	STD     l,0(r_ptr)
-
-bn_add_words_exit
-    .EXIT
-    BVE     (%rp)
-	NOP
-	.PROCEND	;in=23,24,25,26,29;out=28;
-
-;----------------------------------------------------------------------------
-;
-;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
-;
-; arg0 = rp 
-; arg1 = ap
-; arg2 = bp 
-; arg3 = n
-
-t1       .reg %r22
-t2       .reg %r21
-sub_tmp1 .reg %r20
-sub_tmp2 .reg %r19
-
-
-bn_sub_words
-	.proc
-	.callinfo 
-	.EXPORT	bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
-    .entry
-	.align 64
-
-    CMPIB,>=  0,n,bn_sub_words_exit
-    COPY    %r0,%ret0           ; return 0 by default
-
-	;
-	; If 2 or more numbers do the loop
-	;
-	CMPIB,= 1,n,bn_sub_words_single_top
-	NOP
-
-	;
-	; This loop is unrolled 2 times (64-byte aligned as well)
-	;
-bn_sub_words_unroll2
-	LDD     0(a_ptr),t1
-	LDD     0(b_ptr),t2
-	SUB     t1,t2,sub_tmp1           ; t3 = t1-t2; 
-	SUB     sub_tmp1,%ret0,sub_tmp1  ; t3 = t3- c; 
-
-	CMPCLR,*>> t1,t2,sub_tmp2        ; clear if t1 > t2
-	LDO      1(%r0),sub_tmp2
-	
-	CMPCLR,*= t1,t2,%r0
-	COPY    sub_tmp2,%ret0
-	STD     sub_tmp1,0(r_ptr)
-
-	LDD     8(a_ptr),t1
-	LDD     8(b_ptr),t2
-	SUB     t1,t2,sub_tmp1            ; t3 = t1-t2; 
-	SUB     sub_tmp1,%ret0,sub_tmp1   ; t3 = t3- c; 
-	CMPCLR,*>> t1,t2,sub_tmp2         ; clear if t1 > t2
-	LDO      1(%r0),sub_tmp2
-	
-	CMPCLR,*= t1,t2,%r0
-	COPY    sub_tmp2,%ret0
-	STD     sub_tmp1,8(r_ptr)
-
-	LDO     -2(n),n
-	LDO     16(a_ptr),a_ptr
-	LDO     16(b_ptr),b_ptr
-
-	CMPIB,<= 2,n,bn_sub_words_unroll2
-	LDO     16(r_ptr),r_ptr
-
-    CMPIB,=,N 0,n,bn_sub_words_exit ; are we done?
-
-bn_sub_words_single_top
-	LDD     0(a_ptr),t1
-	LDD     0(b_ptr),t2
-	SUB     t1,t2,sub_tmp1            ; t3 = t1-t2; 
-	SUB     sub_tmp1,%ret0,sub_tmp1   ; t3 = t3- c; 
-	CMPCLR,*>> t1,t2,sub_tmp2         ; clear if t1 > t2
-	LDO      1(%r0),sub_tmp2
-	
-	CMPCLR,*= t1,t2,%r0
-	COPY    sub_tmp2,%ret0
-
-	STD     sub_tmp1,0(r_ptr)
-
-bn_sub_words_exit
-    .EXIT
-    BVE     (%rp)
-	NOP
-	.PROCEND	;in=23,24,25,26,29;out=28;
-
-;------------------------------------------------------------------------------
-;
-; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d)
-;
-; arg0 = h
-; arg1 = l
-; arg2 = d
-;
-; This is mainly just modified assembly from the compiler, thus the
-; lack of variable names.
-;
-;------------------------------------------------------------------------------
-bn_div_words
-	.proc
-	.callinfo CALLER,FRAME=272,ENTRY_GR=%r10,SAVE_RP,ARGS_SAVED,ORDERING_AWARE
-	.EXPORT	bn_div_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
-	.IMPORT	BN_num_bits_word,CODE,NO_RELOCATION
-	.IMPORT	__iob,DATA
-	.IMPORT	fprintf,CODE,NO_RELOCATION
-	.IMPORT	abort,CODE,NO_RELOCATION
-	.IMPORT	$$div2U,MILLICODE
-    .entry
-    STD     %r2,-16(%r30)   
-    STD,MA  %r3,352(%r30)   
-    STD     %r4,-344(%r30)  
-    STD     %r5,-336(%r30)  
-    STD     %r6,-328(%r30)  
-    STD     %r7,-320(%r30)  
-    STD     %r8,-312(%r30)  
-    STD     %r9,-304(%r30)  
-    STD     %r10,-296(%r30)
-
-    STD     %r27,-288(%r30)             ; save gp
-
-    COPY    %r24,%r3           ; save d 
-    COPY    %r26,%r4           ; save h (high 64-bits)
-    LDO      -1(%r0),%ret0     ; return -1 by default	
-
-    CMPB,*=  %r0,%arg2,$D3     ; if (d == 0)
-    COPY    %r25,%r5           ; save l (low 64-bits)
-
-    LDO     -48(%r30),%r29     ; create ap 
-    .CALL   ;in=26,29;out=28;
-    B,L     BN_num_bits_word,%r2 
-    COPY    %r3,%r26        
-    LDD     -288(%r30),%r27    ; restore gp 
-    LDI     64,%r21 
-
-    CMPB,=  %r21,%ret0,$00000012   ;if (i == 64) (forward) 
-    COPY    %ret0,%r24             ; i   
-    MTSARCM %r24    
-    DEPDI,Z -1,%sar,1,%r29  
-    CMPB,*<<,N %r29,%r4,bn_div_err_case ; if (h > 1<<i) (forward) 
-
-$00000012
-    SUBI    64,%r24,%r31                       ; i = 64 - i;
-    CMPCLR,*<< %r4,%r3,%r0                     ; if (h >= d)
-    SUB     %r4,%r3,%r4                        ; h -= d
-    CMPB,=  %r31,%r0,$0000001A                 ; if (i)
-    COPY    %r0,%r10                           ; ret = 0
-    MTSARCM %r31                               ; i to shift
-    DEPD,Z  %r3,%sar,64,%r3                    ; d <<= i;
-    SUBI    64,%r31,%r19                       ; 64 - i; redundent
-    MTSAR   %r19                               ; (64 -i) to shift
-    SHRPD   %r4,%r5,%sar,%r4                   ; l>> (64-i)
-    MTSARCM %r31                               ; i to shift
-    DEPD,Z  %r5,%sar,64,%r5                    ; l <<= i;
-
-$0000001A
-    DEPDI,Z -1,31,32,%r19                      
-    EXTRD,U %r3,31,32,%r6                      ; dh=(d&0xfff)>>32
-    EXTRD,U %r3,63,32,%r8                      ; dl = d&0xffffff
-    LDO     2(%r0),%r9
-    STD    %r3,-280(%r30)                      ; "d" to stack
-
-$0000001C
-    DEPDI,Z -1,63,32,%r29                      ; 
-    EXTRD,U %r4,31,32,%r31                     ; h >> 32
-    CMPB,*=,N  %r31,%r6,$D2     	       ; if ((h>>32) != dh)(forward) div
-    COPY    %r4,%r26       
-    EXTRD,U %r4,31,32,%r25 
-    COPY    %r6,%r24      
-    .CALL   ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL)
-    B,L     $$div2U,%r2     
-    EXTRD,U %r6,31,32,%r23  
-    DEPD    %r28,31,32,%r29 
-$D2
-    STD     %r29,-272(%r30)                   ; q
-    AND     %r5,%r19,%r24                   ; t & 0xffffffff00000000;
-    EXTRD,U %r24,31,32,%r24                 ; ??? 
-    FLDD    -272(%r30),%fr7                 ; q
-    FLDD    -280(%r30),%fr8                 ; d
-    XMPYU   %fr8L,%fr7L,%fr10  
-    FSTD    %fr10,-256(%r30)   
-    XMPYU   %fr8L,%fr7R,%fr22  
-    FSTD    %fr22,-264(%r30)   
-    XMPYU   %fr8R,%fr7L,%fr11 
-    XMPYU   %fr8R,%fr7R,%fr23
-    FSTD    %fr11,-232(%r30)
-    FSTD    %fr23,-240(%r30)
-    LDD     -256(%r30),%r28
-    DEPD,Z  %r28,31,32,%r2 
-    LDD     -264(%r30),%r20
-    ADD,L   %r20,%r2,%r31   
-    LDD     -232(%r30),%r22 
-    DEPD,Z  %r22,31,32,%r22 
-    LDD     -240(%r30),%r21 
-    B       $00000024       ; enter loop  
-    ADD,L   %r21,%r22,%r23 
-
-$0000002A
-    LDO     -1(%r29),%r29   
-    SUB     %r23,%r8,%r23   
-$00000024
-    SUB     %r4,%r31,%r25   
-    AND     %r25,%r19,%r26  
-    CMPB,*<>,N      %r0,%r26,$00000046  ; (forward)
-    DEPD,Z  %r25,31,32,%r20 
-    OR      %r20,%r24,%r21  
-    CMPB,*<<,N  %r21,%r23,$0000002A ;(backward) 
-    SUB     %r31,%r6,%r31   
-;-------------Break path---------------------
-
-$00000046
-    DEPD,Z  %r23,31,32,%r25              ;tl
-    EXTRD,U %r23,31,32,%r26              ;t
-    AND     %r25,%r19,%r24               ;tl = (tl<<32)&0xfffffff0000000L
-    ADD,L   %r31,%r26,%r31               ;th += t; 
-    CMPCLR,*>>=     %r5,%r24,%r0         ;if (l<tl)
-    LDO     1(%r31),%r31                 ; th++;
-    CMPB,*<<=,N     %r31,%r4,$00000036   ;if (n < th) (forward)
-    LDO     -1(%r29),%r29                ;q--; 
-    ADD,L   %r4,%r3,%r4                  ;h += d;
-$00000036
-    ADDIB,=,N       -1,%r9,$D1 ;if (--count == 0) break (forward) 
-    SUB     %r5,%r24,%r28                ; l -= tl;
-    SUB     %r4,%r31,%r24                ; h -= th;
-    SHRPD   %r24,%r28,32,%r4             ; h = ((h<<32)|(l>>32));
-    DEPD,Z  %r29,31,32,%r10              ; ret = q<<32
-    b      $0000001C
-    DEPD,Z  %r28,31,32,%r5               ; l = l << 32 
-
-$D1
-    OR      %r10,%r29,%r28           ; ret |= q
-$D3
-    LDD     -368(%r30),%r2  
-$D0
-    LDD     -296(%r30),%r10 
-    LDD     -304(%r30),%r9  
-    LDD     -312(%r30),%r8  
-    LDD     -320(%r30),%r7  
-    LDD     -328(%r30),%r6  
-    LDD     -336(%r30),%r5  
-    LDD     -344(%r30),%r4  
-    BVE     (%r2)   
-        .EXIT
-    LDD,MB  -352(%r30),%r3 
-
-bn_div_err_case
-    MFIA    %r6     
-    ADDIL   L'bn_div_words-bn_div_err_case,%r6,%r1 
-    LDO     R'bn_div_words-bn_div_err_case(%r1),%r6  
-    ADDIL   LT'__iob,%r27,%r1       
-    LDD     RT'__iob(%r1),%r26      
-    ADDIL   L'C$4-bn_div_words,%r6,%r1    
-    LDO     R'C$4-bn_div_words(%r1),%r25  
-    LDO     64(%r26),%r26   
-    .CALL           ;in=24,25,26,29;out=28;
-    B,L     fprintf,%r2    
-    LDO     -48(%r30),%r29 
-    LDD     -288(%r30),%r27
-    .CALL           ;in=29;
-    B,L     abort,%r2      
-    LDO     -48(%r30),%r29 
-    LDD     -288(%r30),%r27
-    B       $D0         
-    LDD     -368(%r30),%r2  
-	.PROCEND	;in=24,25,26,29;out=28;
-
-;----------------------------------------------------------------------------
-;
-; Registers to hold 64-bit values to manipulate.  The "L" part
-; of the register corresponds to the upper 32-bits, while the "R"
-; part corresponds to the lower 32-bits
-; 
-; Note, that when using b6 and b7, the code must save these before
-; using them because they are callee save registers 
-; 
-;
-; Floating point registers to use to save values that
-; are manipulated.  These don't collide with ftemp1-6 and
-; are all caller save registers
-;
-a0        .reg %fr22
-a0L       .reg %fr22L
-a0R       .reg %fr22R
-
-a1        .reg %fr23
-a1L       .reg %fr23L
-a1R       .reg %fr23R
-
-a2        .reg %fr24
-a2L       .reg %fr24L
-a2R       .reg %fr24R
-
-a3        .reg %fr25
-a3L       .reg %fr25L
-a3R       .reg %fr25R
-
-a4        .reg %fr26
-a4L       .reg %fr26L
-a4R       .reg %fr26R
-
-a5        .reg %fr27
-a5L       .reg %fr27L
-a5R       .reg %fr27R
-
-a6        .reg %fr28
-a6L       .reg %fr28L
-a6R       .reg %fr28R
-
-a7        .reg %fr29
-a7L       .reg %fr29L
-a7R       .reg %fr29R
-
-b0        .reg %fr30
-b0L       .reg %fr30L
-b0R       .reg %fr30R
-
-b1        .reg %fr31
-b1L       .reg %fr31L
-b1R       .reg %fr31R
-
-;
-; Temporary floating point variables, these are all caller save
-; registers
-;
-ftemp1    .reg %fr4
-ftemp2    .reg %fr5
-ftemp3    .reg %fr6
-ftemp4    .reg %fr7
-
-;
-; The B set of registers when used.
-;
-
-b2        .reg %fr8
-b2L       .reg %fr8L
-b2R       .reg %fr8R
-
-b3        .reg %fr9
-b3L       .reg %fr9L
-b3R       .reg %fr9R
-
-b4        .reg %fr10
-b4L       .reg %fr10L
-b4R       .reg %fr10R
-
-b5        .reg %fr11
-b5L       .reg %fr11L
-b5R       .reg %fr11R
-
-b6        .reg %fr12
-b6L       .reg %fr12L
-b6R       .reg %fr12R
-
-b7        .reg %fr13
-b7L       .reg %fr13L
-b7R       .reg %fr13R
-
-c1           .reg %r21   ; only reg
-temp1        .reg %r20   ; only reg
-temp2        .reg %r19   ; only reg
-temp3        .reg %r31   ; only reg
-
-m1           .reg %r28   
-c2           .reg %r23   
-high_one     .reg %r1
-ht           .reg %r6
-lt           .reg %r5
-m            .reg %r4
-c3           .reg %r3
-
-SQR_ADD_C  .macro  A0L,A0R,C1,C2,C3
-    XMPYU   A0L,A0R,ftemp1       ; m
-    FSTD    ftemp1,-24(%sp)      ; store m
-
-    XMPYU   A0R,A0R,ftemp2       ; lt
-    FSTD    ftemp2,-16(%sp)      ; store lt
-
-    XMPYU   A0L,A0L,ftemp3       ; ht
-    FSTD    ftemp3,-8(%sp)       ; store ht
-
-    LDD     -24(%sp),m           ; load m
-    AND     m,high_mask,temp2    ; m & Mask
-    DEPD,Z  m,30,31,temp3        ; m << 32+1
-    LDD     -16(%sp),lt          ; lt
-
-    LDD     -8(%sp),ht           ; ht
-    EXTRD,U temp2,32,33,temp1    ; temp1 = m&Mask >> 32-1
-    ADD     temp3,lt,lt          ; lt = lt+m
-    ADD,L   ht,temp1,ht          ; ht += temp1
-    ADD,DC  ht,%r0,ht            ; ht++
-
-    ADD     C1,lt,C1             ; c1=c1+lt
-    ADD,DC  ht,%r0,ht            ; ht++
-
-    ADD     C2,ht,C2             ; c2=c2+ht
-    ADD,DC  C3,%r0,C3            ; c3++
-.endm
-
-SQR_ADD_C2 .macro  A0L,A0R,A1L,A1R,C1,C2,C3
-    XMPYU   A0L,A1R,ftemp1          ; m1 = bl*ht
-    FSTD    ftemp1,-16(%sp)         ;
-    XMPYU   A0R,A1L,ftemp2          ; m = bh*lt
-    FSTD    ftemp2,-8(%sp)          ;
-    XMPYU   A0R,A1R,ftemp3          ; lt = bl*lt
-    FSTD    ftemp3,-32(%sp)
-    XMPYU   A0L,A1L,ftemp4          ; ht = bh*ht
-    FSTD    ftemp4,-24(%sp)         ;
-
-    LDD     -8(%sp),m               ; r21 = m
-    LDD     -16(%sp),m1             ; r19 = m1
-    ADD,L   m,m1,m                  ; m+m1
-
-    DEPD,Z  m,31,32,temp3           ; (m+m1<<32)
-    LDD     -24(%sp),ht             ; r24 = ht
-
-    CMPCLR,*>>= m,m1,%r0            ; if (m < m1)
-    ADD,L   ht,high_one,ht          ; ht+=high_one
-
-    EXTRD,U m,31,32,temp1           ; m >> 32
-    LDD     -32(%sp),lt             ; lt
-    ADD,L   ht,temp1,ht             ; ht+= m>>32
-    ADD     lt,temp3,lt             ; lt = lt+m1
-    ADD,DC  ht,%r0,ht               ; ht++
-
-    ADD     ht,ht,ht                ; ht=ht+ht;
-    ADD,DC  C3,%r0,C3               ; add in carry (c3++)
-
-    ADD     lt,lt,lt                ; lt=lt+lt;
-    ADD,DC  ht,%r0,ht               ; add in carry (ht++)
-
-    ADD     C1,lt,C1                ; c1=c1+lt
-    ADD,DC,*NUV ht,%r0,ht           ; add in carry (ht++)
-    LDO     1(C3),C3              ; bump c3 if overflow,nullify otherwise
-
-    ADD     C2,ht,C2                ; c2 = c2 + ht
-    ADD,DC  C3,%r0,C3             ; add in carry (c3++)
-.endm
-
-;
-;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
-; arg0 = r_ptr
-; arg1 = a_ptr
-;
-
-bn_sqr_comba8
-	.PROC
-	.CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
-	.EXPORT	bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
-    .ENTRY
-	.align 64
-
-    STD     %r3,0(%sp)          ; save r3
-    STD     %r4,8(%sp)          ; save r4
-    STD     %r5,16(%sp)         ; save r5
-    STD     %r6,24(%sp)         ; save r6
-
-	;
-	; Zero out carries
-	;
-	COPY     %r0,c1
-	COPY     %r0,c2
-	COPY     %r0,c3
-
-	LDO      128(%sp),%sp       ; bump stack
-    DEPDI,Z -1,32,33,high_mask   ; Create Mask 0xffffffff80000000L
-    DEPDI,Z  1,31,1,high_one     ; Create Value  1 << 32
-
-	;
-	; Load up all of the values we are going to use
-	;
-    FLDD     0(a_ptr),a0       
-    FLDD     8(a_ptr),a1       
-    FLDD    16(a_ptr),a2       
-    FLDD    24(a_ptr),a3       
-    FLDD    32(a_ptr),a4       
-    FLDD    40(a_ptr),a5       
-    FLDD    48(a_ptr),a6       
-    FLDD    56(a_ptr),a7       
-
-	SQR_ADD_C a0L,a0R,c1,c2,c3
-	STD     c1,0(r_ptr)          ; r[0] = c1;
-	COPY    %r0,c1
-
-	SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
-	STD     c2,8(r_ptr)          ; r[1] = c2;
-	COPY    %r0,c2
-
-	SQR_ADD_C a1L,a1R,c3,c1,c2
-	SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
-	STD     c3,16(r_ptr)            ; r[2] = c3;
-	COPY    %r0,c3
-
-	SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
-	SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
-	STD     c1,24(r_ptr)           ; r[3] = c1;
-	COPY    %r0,c1
-
-	SQR_ADD_C a2L,a2R,c2,c3,c1
-	SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
-	SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1
-	STD     c2,32(r_ptr)          ; r[4] = c2;
-	COPY    %r0,c2
-
-	SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2
-	SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2
-	SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
-	STD     c3,40(r_ptr)          ; r[5] = c3;
-	COPY    %r0,c3
-
-	SQR_ADD_C a3L,a3R,c1,c2,c3
-	SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3
-	SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3
-	SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3
-	STD     c1,48(r_ptr)          ; r[6] = c1;
-	COPY    %r0,c1
-
-	SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1
-	SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1
-	SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1
-	SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1
-	STD     c2,56(r_ptr)          ; r[7] = c2;
-	COPY    %r0,c2
-
-	SQR_ADD_C a4L,a4R,c3,c1,c2
-	SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2
-	SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2
-	SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2
-	STD     c3,64(r_ptr)          ; r[8] = c3;
-	COPY    %r0,c3
-
-	SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3
-	SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3
-	SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3
-	STD     c1,72(r_ptr)          ; r[9] = c1;
-	COPY    %r0,c1
-
-	SQR_ADD_C a5L,a5R,c2,c3,c1
-	SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1
-	SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1
-	STD     c2,80(r_ptr)          ; r[10] = c2;
-	COPY    %r0,c2
-
-	SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2
-	SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2
-	STD     c3,88(r_ptr)          ; r[11] = c3;
-	COPY    %r0,c3
-	
-	SQR_ADD_C a6L,a6R,c1,c2,c3
-	SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3
-	STD     c1,96(r_ptr)          ; r[12] = c1;
-	COPY    %r0,c1
-
-	SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1
-	STD     c2,104(r_ptr)         ; r[13] = c2;
-	COPY    %r0,c2
-
-	SQR_ADD_C a7L,a7R,c3,c1,c2
-	STD     c3, 112(r_ptr)       ; r[14] = c3
-	STD     c1, 120(r_ptr)       ; r[15] = c1
-
-    .EXIT
-    LDD     -104(%sp),%r6        ; restore r6
-    LDD     -112(%sp),%r5        ; restore r5
-    LDD     -120(%sp),%r4        ; restore r4
-    BVE     (%rp)
-    LDD,MB  -128(%sp),%r3
-
-	.PROCEND	
-
-;-----------------------------------------------------------------------------
-;
-;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
-; arg0 = r_ptr
-; arg1 = a_ptr
-;
-
-bn_sqr_comba4
-	.proc
-	.callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
-	.EXPORT	bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
-    .entry
-	.align 64
-    STD     %r3,0(%sp)          ; save r3
-    STD     %r4,8(%sp)          ; save r4
-    STD     %r5,16(%sp)         ; save r5
-    STD     %r6,24(%sp)         ; save r6
-
-	;
-	; Zero out carries
-	;
-	COPY     %r0,c1
-	COPY     %r0,c2
-	COPY     %r0,c3
-
-	LDO      128(%sp),%sp       ; bump stack
-    DEPDI,Z -1,32,33,high_mask   ; Create Mask 0xffffffff80000000L
-    DEPDI,Z  1,31,1,high_one     ; Create Value  1 << 32
-
-	;
-	; Load up all of the values we are going to use
-	;
-    FLDD     0(a_ptr),a0       
-    FLDD     8(a_ptr),a1       
-    FLDD    16(a_ptr),a2       
-    FLDD    24(a_ptr),a3       
-    FLDD    32(a_ptr),a4       
-    FLDD    40(a_ptr),a5       
-    FLDD    48(a_ptr),a6       
-    FLDD    56(a_ptr),a7       
-
-	SQR_ADD_C a0L,a0R,c1,c2,c3
-
-	STD     c1,0(r_ptr)          ; r[0] = c1;
-	COPY    %r0,c1
-
-	SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
-
-	STD     c2,8(r_ptr)          ; r[1] = c2;
-	COPY    %r0,c2
-
-	SQR_ADD_C a1L,a1R,c3,c1,c2
-	SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
-
-	STD     c3,16(r_ptr)            ; r[2] = c3;
-	COPY    %r0,c3
-
-	SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
-	SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
-
-	STD     c1,24(r_ptr)           ; r[3] = c1;
-	COPY    %r0,c1
-
-	SQR_ADD_C a2L,a2R,c2,c3,c1
-	SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
-
-	STD     c2,32(r_ptr)           ; r[4] = c2;
-	COPY    %r0,c2
-
-	SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
-	STD     c3,40(r_ptr)           ; r[5] = c3;
-	COPY    %r0,c3
-
-	SQR_ADD_C a3L,a3R,c1,c2,c3
-	STD     c1,48(r_ptr)           ; r[6] = c1;
-	STD     c2,56(r_ptr)           ; r[7] = c2;
-
-    .EXIT
-    LDD     -104(%sp),%r6        ; restore r6
-    LDD     -112(%sp),%r5        ; restore r5
-    LDD     -120(%sp),%r4        ; restore r4
-    BVE     (%rp)
-    LDD,MB  -128(%sp),%r3
-
-	.PROCEND	
-
-
-;---------------------------------------------------------------------------
-
-MUL_ADD_C  .macro  A0L,A0R,B0L,B0R,C1,C2,C3
-    XMPYU   A0L,B0R,ftemp1        ; m1 = bl*ht
-    FSTD    ftemp1,-16(%sp)       ;
-    XMPYU   A0R,B0L,ftemp2        ; m = bh*lt
-    FSTD    ftemp2,-8(%sp)        ;
-    XMPYU   A0R,B0R,ftemp3        ; lt = bl*lt
-    FSTD    ftemp3,-32(%sp)
-    XMPYU   A0L,B0L,ftemp4        ; ht = bh*ht
-    FSTD    ftemp4,-24(%sp)       ;
-
-    LDD     -8(%sp),m             ; r21 = m
-    LDD     -16(%sp),m1           ; r19 = m1
-    ADD,L   m,m1,m                ; m+m1
-
-    DEPD,Z  m,31,32,temp3         ; (m+m1<<32)
-    LDD     -24(%sp),ht           ; r24 = ht
-
-    CMPCLR,*>>= m,m1,%r0          ; if (m < m1)
-    ADD,L   ht,high_one,ht        ; ht+=high_one
-
-    EXTRD,U m,31,32,temp1         ; m >> 32
-    LDD     -32(%sp),lt           ; lt
-    ADD,L   ht,temp1,ht           ; ht+= m>>32
-    ADD     lt,temp3,lt           ; lt = lt+m1
-    ADD,DC  ht,%r0,ht             ; ht++
-
-    ADD     C1,lt,C1              ; c1=c1+lt
-    ADD,DC  ht,%r0,ht             ; bump c3 if overflow,nullify otherwise
-
-    ADD     C2,ht,C2              ; c2 = c2 + ht
-    ADD,DC  C3,%r0,C3             ; add in carry (c3++)
-.endm
-
-
-;
-;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
-; arg0 = r_ptr
-; arg1 = a_ptr
-; arg2 = b_ptr
-;
-
-bn_mul_comba8
-	.proc
-	.callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
-	.EXPORT	bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
-    .entry
-	.align 64
-
-    STD     %r3,0(%sp)          ; save r3
-    STD     %r4,8(%sp)          ; save r4
-    STD     %r5,16(%sp)         ; save r5
-    STD     %r6,24(%sp)         ; save r6
-    FSTD    %fr12,32(%sp)       ; save r6
-    FSTD    %fr13,40(%sp)       ; save r7
-
-	;
-	; Zero out carries
-	;
-	COPY     %r0,c1
-	COPY     %r0,c2
-	COPY     %r0,c3
-
-	LDO      128(%sp),%sp       ; bump stack
-    DEPDI,Z  1,31,1,high_one     ; Create Value  1 << 32
-
-	;
-	; Load up all of the values we are going to use
-	;
-    FLDD      0(a_ptr),a0       
-    FLDD      8(a_ptr),a1       
-    FLDD     16(a_ptr),a2       
-    FLDD     24(a_ptr),a3       
-    FLDD     32(a_ptr),a4       
-    FLDD     40(a_ptr),a5       
-    FLDD     48(a_ptr),a6       
-    FLDD     56(a_ptr),a7       
-
-    FLDD      0(b_ptr),b0       
-    FLDD      8(b_ptr),b1       
-    FLDD     16(b_ptr),b2       
-    FLDD     24(b_ptr),b3       
-    FLDD     32(b_ptr),b4       
-    FLDD     40(b_ptr),b5       
-    FLDD     48(b_ptr),b6       
-    FLDD     56(b_ptr),b7       
-
-	MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
-	STD       c1,0(r_ptr)
-	COPY      %r0,c1
-
-	MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
-	MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
-	STD       c2,8(r_ptr)
-	COPY      %r0,c2
-
-	MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
-	MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
-	MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
-	STD       c3,16(r_ptr)
-	COPY      %r0,c3
-
-	MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
-	MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
-	MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
-	MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
-	STD       c1,24(r_ptr)
-	COPY      %r0,c1
-
-	MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1
-	MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
-	MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
-	MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
-	MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1
-	STD       c2,32(r_ptr)
-	COPY      %r0,c2
-
-	MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2
-	MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2
-	MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
-	MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
-	MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2
-	MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2
-	STD       c3,40(r_ptr)
-	COPY      %r0,c3
-
-	MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3
-	MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3
-	MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3
-	MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
-	MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3
-	MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3
-	MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3
-	STD       c1,48(r_ptr)
-	COPY      %r0,c1
-	
-	MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1
-	MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1
-	MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1
-	MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1
-	MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1
-	MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1
-	MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1
-	MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1
-	STD       c2,56(r_ptr)
-	COPY      %r0,c2
-
-	MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2
-	MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2
-	MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2
-	MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2
-	MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2
-	MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2
-	MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2
-	STD       c3,64(r_ptr)
-	COPY      %r0,c3
-
-	MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3
-	MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3
-	MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3
-	MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3
-	MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3
-	MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3
-	STD       c1,72(r_ptr)
-	COPY      %r0,c1
-
-	MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1
-	MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1
-	MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1
-	MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1
-	MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1
-	STD       c2,80(r_ptr)
-	COPY      %r0,c2
-
-	MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2
-	MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2
-	MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2
-	MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2
-	STD       c3,88(r_ptr)
-	COPY      %r0,c3
-
-	MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3
-	MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3
-	MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3
-	STD       c1,96(r_ptr)
-	COPY      %r0,c1
-
-	MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1
-	MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1
-	STD       c2,104(r_ptr)
-	COPY      %r0,c2
-
-	MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2
-	STD       c3,112(r_ptr)
-	STD       c1,120(r_ptr)
-
-    .EXIT
-    FLDD    -88(%sp),%fr13 
-    FLDD    -96(%sp),%fr12 
-    LDD     -104(%sp),%r6        ; restore r6
-    LDD     -112(%sp),%r5        ; restore r5
-    LDD     -120(%sp),%r4        ; restore r4
-    BVE     (%rp)
-    LDD,MB  -128(%sp),%r3
-
-	.PROCEND	
-
-;-----------------------------------------------------------------------------
-;
-;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
-; arg0 = r_ptr
-; arg1 = a_ptr
-; arg2 = b_ptr
-;
-
-bn_mul_comba4
-	.proc
-	.callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
-	.EXPORT	bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
-    .entry
-	.align 64
-
-    STD     %r3,0(%sp)          ; save r3
-    STD     %r4,8(%sp)          ; save r4
-    STD     %r5,16(%sp)         ; save r5
-    STD     %r6,24(%sp)         ; save r6
-    FSTD    %fr12,32(%sp)       ; save r6
-    FSTD    %fr13,40(%sp)       ; save r7
-
-	;
-	; Zero out carries
-	;
-	COPY     %r0,c1
-	COPY     %r0,c2
-	COPY     %r0,c3
-
-	LDO      128(%sp),%sp       ; bump stack
-    DEPDI,Z  1,31,1,high_one     ; Create Value  1 << 32
-
-	;
-	; Load up all of the values we are going to use
-	;
-    FLDD      0(a_ptr),a0       
-    FLDD      8(a_ptr),a1       
-    FLDD     16(a_ptr),a2       
-    FLDD     24(a_ptr),a3       
-
-    FLDD      0(b_ptr),b0       
-    FLDD      8(b_ptr),b1       
-    FLDD     16(b_ptr),b2       
-    FLDD     24(b_ptr),b3       
-
-	MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
-	STD       c1,0(r_ptr)
-	COPY      %r0,c1
-
-	MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
-	MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
-	STD       c2,8(r_ptr)
-	COPY      %r0,c2
-
-	MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
-	MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
-	MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
-	STD       c3,16(r_ptr)
-	COPY      %r0,c3
-
-	MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
-	MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
-	MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
-	MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
-	STD       c1,24(r_ptr)
-	COPY      %r0,c1
-
-	MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
-	MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
-	MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
-	STD       c2,32(r_ptr)
-	COPY      %r0,c2
-
-	MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
-	MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
-	STD       c3,40(r_ptr)
-	COPY      %r0,c3
-
-	MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
-	STD       c1,48(r_ptr)
-	STD       c2,56(r_ptr)
-
-    .EXIT
-    FLDD    -88(%sp),%fr13 
-    FLDD    -96(%sp),%fr12 
-    LDD     -104(%sp),%r6        ; restore r6
-    LDD     -112(%sp),%r5        ; restore r5
-    LDD     -120(%sp),%r4        ; restore r4
-    BVE     (%rp)
-    LDD,MB  -128(%sp),%r3
-
-	.PROCEND	
-
-
-	.SPACE	$TEXT$
-	.SUBSPA	$CODE$
-	.SPACE	$PRIVATE$,SORT=16
-	.IMPORT	$global$,DATA
-	.SPACE	$TEXT$
-	.SUBSPA	$CODE$
-	.SUBSPA	$LIT$,ACCESS=0x2c
-C$4
-	.ALIGN	8
-	.STRINGZ	"Division would overflow (%d)\n"
-	.END
diff --git a/deps/openssl/openssl/crypto/bn/asm/parisc-mont.pl b/deps/openssl/openssl/crypto/bn/asm/parisc-mont.pl
index cd9926a25f..aa9f626ed2 100644
--- a/deps/openssl/openssl/crypto/bn/asm/parisc-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/parisc-mont.pl
@@ -8,7 +8,7 @@
 
 
 # ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
@@ -21,7 +21,7 @@
 # optimal in respect to instruction set capabilities. Fair comparison
 # with vendor compiler is problematic, because OpenSSL doesn't define
 # BN_LLONG [presumably] for historical reasons, which drives compiler
-# toward 4 times 16x16=32-bit multiplicatons [plus complementary
+# toward 4 times 16x16=32-bit multiplications [plus complementary
 # shifts and additions] instead. This means that you should observe
 # several times improvement over code generated by vendor compiler
 # for PA-RISC 1.1, but the "baseline" is far from optimal. The actual
@@ -864,7 +864,7 @@ L\$copy_pa11
 	comiclr,=	0,$hi1,%r0
 	copy		$ti0,$hi0
 	addib,<>	4,$idx,L\$copy_pa11
-	stws,ma		$hi0,4($rp)	
+	stws,ma		$hi0,4($rp)
 
 	nop					; alignment
 L\$done
@@ -984,6 +984,11 @@ sub assemble {
     ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
 }
 
+if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
+	=~ /GNU assembler/) {
+    $gnuas = 1;
+}
+
 foreach (split("\n",$code)) {
 	s/\`([^\`]*)\`/eval $1/ge;
 	# flip word order in 64-bit mode...
@@ -991,7 +996,10 @@ foreach (split("\n",$code)) {
 	# assemble 2.0 instructions in 32-bit mode...
 	s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($BN_SZ==4);
 
-	s/\bbv\b/bve/gm	if ($SIZE_T==8);
+	s/(\.LEVEL\s+2\.0)W/$1w/	if ($gnuas && $SIZE_T==8);
+	s/\.SPACE\s+\$TEXT\$/.text/	if ($gnuas && $SIZE_T==8);
+	s/\.SUBSPA.*//			if ($gnuas && $SIZE_T==8);
+	s/\bbv\b/bve/			if ($SIZE_T==8);
 
 	print $_,"\n";
 }
diff --git a/deps/openssl/openssl/crypto/bn/asm/ppc-mont.pl b/deps/openssl/openssl/crypto/bn/asm/ppc-mont.pl
index 9d14a12156..ec7e019a43 100644
--- a/deps/openssl/openssl/crypto/bn/asm/ppc-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/ppc-mont.pl
@@ -8,7 +8,7 @@
 
 
 # ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
@@ -26,11 +26,21 @@
 # So far RSA *sign* performance improvement over pre-bn_mul_mont asm
 # for 64-bit application running on PPC970/G5 is:
 #
-# 512-bit	+65%	
+# 512-bit	+65%
 # 1024-bit	+35%
 # 2048-bit	+18%
 # 4096-bit	+4%
 
+# September 2016
+#
+# Add multiplication procedure operating on lengths divisible by 4
+# and squaring procedure operating on lengths divisible by 8. Length
+# is expressed in number of limbs. RSA private key operations are
+# ~35-50% faster (more for longer keys) on contemporary high-end POWER
+# processors in 64-bit builds, [mysteriously enough] more in 32-bit
+# builds. On low-end 32-bit processors performance improvement turned
+# to be marginal...
+
 $flavour = shift;
 
 if ($flavour =~ /32/) {
@@ -49,7 +59,8 @@ if ($flavour =~ /32/) {
 	$UMULL=	"mullw";	# unsigned multiply low
 	$UMULH=	"mulhwu";	# unsigned multiply high
 	$UCMP=	"cmplw";	# unsigned compare
-	$SHRI=	"srwi";		# unsigned shift right by immediate	
+	$SHRI=	"srwi";		# unsigned shift right by immediate
+	$SHLI=	"slwi";		# unsigned shift left by immediate
 	$PUSH=	$ST;
 	$POP=	$LD;
 } elsif ($flavour =~ /64/) {
@@ -69,7 +80,8 @@ if ($flavour =~ /32/) {
 	$UMULL=	"mulld";	# unsigned multiply low
 	$UMULH=	"mulhdu";	# unsigned multiply high
 	$UCMP=	"cmpld";	# unsigned compare
-	$SHRI=	"srdi";		# unsigned shift right by immediate	
+	$SHRI=	"srdi";		# unsigned shift right by immediate
+	$SHLI=	"sldi";		# unsigned shift left by immediate
 	$PUSH=	$ST;
 	$POP=	$LD;
 } else { die "nonsense $flavour"; }
@@ -86,43 +98,44 @@ open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
 
 $sp="r1";
 $toc="r2";
-$rp="r3";	$ovf="r3";
+$rp="r3";
 $ap="r4";
 $bp="r5";
 $np="r6";
 $n0="r7";
 $num="r8";
-$rp="r9";	# $rp is reassigned
-$aj="r10";
-$nj="r11";
-$tj="r12";
+
+{
+my $ovf=$rp;
+my $rp="r9";	# $rp is reassigned
+my $aj="r10";
+my $nj="r11";
+my $tj="r12";
 # non-volatile registers
-$i="r20";
-$j="r21";
-$tp="r22";
-$m0="r23";
-$m1="r24";
-$lo0="r25";
-$hi0="r26";
-$lo1="r27";
-$hi1="r28";
-$alo="r29";
-$ahi="r30";
-$nlo="r31";
+my $i="r20";
+my $j="r21";
+my $tp="r22";
+my $m0="r23";
+my $m1="r24";
+my $lo0="r25";
+my $hi0="r26";
+my $lo1="r27";
+my $hi1="r28";
+my $alo="r29";
+my $ahi="r30";
+my $nlo="r31";
 #
-$nhi="r0";
+my $nhi="r0";
 
 $code=<<___;
 .machine "any"
 .text
 
 .globl	.bn_mul_mont_int
-.align	4
+.align	5
 .bn_mul_mont_int:
-	cmpwi	$num,4
 	mr	$rp,r3		; $rp is reassigned
 	li	r3,0
-	bltlr
 ___
 $code.=<<___ if ($BNSZ==4);
 	cmpwi	$num,32		; longer key performance is not better
@@ -334,7 +347,1641 @@ Lcopy:				; conditional copy
 	.byte	0,12,4,0,0x80,12,6,0
 	.long	0
 .size	.bn_mul_mont_int,.-.bn_mul_mont_int
+___
+}
+if (1) {
+my ($a0,$a1,$a2,$a3,
+    $t0,$t1,$t2,$t3,
+    $m0,$m1,$m2,$m3,
+    $acc0,$acc1,$acc2,$acc3,$acc4,
+    $bi,$mi,$tp,$ap_end,$cnt) = map("r$_",(9..12,14..31));
+my  ($carry,$zero) = ($rp,"r0");
+
+# sp----------->+-------------------------------+
+#		| saved sp			|
+#		+-------------------------------+
+#		.				.
+# +8*size_t	+-------------------------------+
+#		| 4 "n0*t0"			|
+#		.				.
+#		.				.
+# +12*size_t	+-------------------------------+
+#		| size_t tmp[num]		|
+#		.				.
+#		.				.
+#		.				.
+#		+-------------------------------+
+#		| topmost carry			|
+#		.				.
+# -18*size_t	+-------------------------------+
+#		| 18 saved gpr, r14-r31		|
+#		.				.
+#		.				.
+#		+-------------------------------+
+$code.=<<___;
+.globl	.bn_mul4x_mont_int
+.align	5
+.bn_mul4x_mont_int:
+	andi.	r0,$num,7
+	bne	.Lmul4x_do
+	$UCMP	$ap,$bp
+	bne	.Lmul4x_do
+	b	.Lsqr8x_do
+.Lmul4x_do:
+	slwi	$num,$num,`log($SIZE_T)/log(2)`
+	mr	$a0,$sp
+	li	$a1,-32*$SIZE_T
+	sub	$a1,$a1,$num
+	$STUX	$sp,$sp,$a1		# alloca
+
+	$PUSH	r14,-$SIZE_T*18($a0)
+	$PUSH	r15,-$SIZE_T*17($a0)
+	$PUSH	r16,-$SIZE_T*16($a0)
+	$PUSH	r17,-$SIZE_T*15($a0)
+	$PUSH	r18,-$SIZE_T*14($a0)
+	$PUSH	r19,-$SIZE_T*13($a0)
+	$PUSH	r20,-$SIZE_T*12($a0)
+	$PUSH	r21,-$SIZE_T*11($a0)
+	$PUSH	r22,-$SIZE_T*10($a0)
+	$PUSH	r23,-$SIZE_T*9($a0)
+	$PUSH	r24,-$SIZE_T*8($a0)
+	$PUSH	r25,-$SIZE_T*7($a0)
+	$PUSH	r26,-$SIZE_T*6($a0)
+	$PUSH	r27,-$SIZE_T*5($a0)
+	$PUSH	r28,-$SIZE_T*4($a0)
+	$PUSH	r29,-$SIZE_T*3($a0)
+	$PUSH	r30,-$SIZE_T*2($a0)
+	$PUSH	r31,-$SIZE_T*1($a0)
+
+	subi	$ap,$ap,$SIZE_T		# bias by -1
+	subi	$np,$np,$SIZE_T		# bias by -1
+	subi	$rp,$rp,$SIZE_T		# bias by -1
+	$LD	$n0,0($n0)		# *n0
+
+	add	$t0,$bp,$num
+	add	$ap_end,$ap,$num
+	subi	$t0,$t0,$SIZE_T*4	# &b[num-4]
+
+	$LD	$bi,$SIZE_T*0($bp)	# b[0]
+	li	$acc0,0
+	$LD	$a0,$SIZE_T*1($ap)	# a[0..3]
+	li	$acc1,0
+	$LD	$a1,$SIZE_T*2($ap)
+	li	$acc2,0
+	$LD	$a2,$SIZE_T*3($ap)
+	li	$acc3,0
+	$LDU	$a3,$SIZE_T*4($ap)
+	$LD	$m0,$SIZE_T*1($np)	# n[0..3]
+	$LD	$m1,$SIZE_T*2($np)
+	$LD	$m2,$SIZE_T*3($np)
+	$LDU	$m3,$SIZE_T*4($np)
+
+	$PUSH	$rp,$SIZE_T*6($sp)	# offload rp and &b[num-4]
+	$PUSH	$t0,$SIZE_T*7($sp)
+	li	$carry,0
+	addic	$tp,$sp,$SIZE_T*7	# &t[-1], clear carry bit
+	li	$cnt,0
+	li	$zero,0
+	b	.Loop_mul4x_1st_reduction
+
+.align	5
+.Loop_mul4x_1st_reduction:
+	$UMULL	$t0,$a0,$bi		# lo(a[0..3]*b[0])
+	addze	$carry,$carry		# modulo-scheduled
+	$UMULL	$t1,$a1,$bi
+	addi	$cnt,$cnt,$SIZE_T
+	$UMULL	$t2,$a2,$bi
+	andi.	$cnt,$cnt,$SIZE_T*4-1
+	$UMULL	$t3,$a3,$bi
+	addc	$acc0,$acc0,$t0
+	$UMULH	$t0,$a0,$bi		# hi(a[0..3]*b[0])
+	adde	$acc1,$acc1,$t1
+	$UMULH	$t1,$a1,$bi
+	adde	$acc2,$acc2,$t2
+	$UMULL	$mi,$acc0,$n0		# t[0]*n0
+	adde	$acc3,$acc3,$t3
+	$UMULH	$t2,$a2,$bi
+	addze	$acc4,$zero
+	$UMULH	$t3,$a3,$bi
+	$LDX	$bi,$bp,$cnt		# next b[i] (or b[0])
+	addc	$acc1,$acc1,$t0
+	# (*)	mul	$t0,$m0,$mi	# lo(n[0..3]*t[0]*n0)
+	$STU	$mi,$SIZE_T($tp)	# put aside t[0]*n0 for tail processing
+	adde	$acc2,$acc2,$t1
+	$UMULL	$t1,$m1,$mi
+	adde	$acc3,$acc3,$t2
+	$UMULL	$t2,$m2,$mi
+	adde	$acc4,$acc4,$t3		# can't overflow
+	$UMULL	$t3,$m3,$mi
+	# (*)	addc	$acc0,$acc0,$t0
+	# (*)	As for removal of first multiplication and addition
+	#	instructions. The outcome of first addition is
+	#	guaranteed to be zero, which leaves two computationally
+	#	significant outcomes: it either carries or not. Then
+	#	question is when does it carry? Is there alternative
+	#	way to deduce it? If you follow operations, you can
+	#	observe that condition for carry is quite simple:
+	#	$acc0 being non-zero. So that carry can be calculated
+	#	by adding -1 to $acc0. That's what next instruction does.
+	addic	$acc0,$acc0,-1		# (*), discarded
+	$UMULH	$t0,$m0,$mi		# hi(n[0..3]*t[0]*n0)
+	adde	$acc0,$acc1,$t1
+	$UMULH	$t1,$m1,$mi
+	adde	$acc1,$acc2,$t2
+	$UMULH	$t2,$m2,$mi
+	adde	$acc2,$acc3,$t3
+	$UMULH	$t3,$m3,$mi
+	adde	$acc3,$acc4,$carry
+	addze	$carry,$zero
+	addc	$acc0,$acc0,$t0
+	adde	$acc1,$acc1,$t1
+	adde	$acc2,$acc2,$t2
+	adde	$acc3,$acc3,$t3
+	#addze	$carry,$carry
+	bne	.Loop_mul4x_1st_reduction
+
+	$UCMP	$ap_end,$ap
+	beq	.Lmul4x4_post_condition
+
+	$LD	$a0,$SIZE_T*1($ap)	# a[4..7]
+	$LD	$a1,$SIZE_T*2($ap)
+	$LD	$a2,$SIZE_T*3($ap)
+	$LDU	$a3,$SIZE_T*4($ap)
+	$LD	$mi,$SIZE_T*8($sp)	# a[0]*n0
+	$LD	$m0,$SIZE_T*1($np)	# n[4..7]
+	$LD	$m1,$SIZE_T*2($np)
+	$LD	$m2,$SIZE_T*3($np)
+	$LDU	$m3,$SIZE_T*4($np)
+	b	.Loop_mul4x_1st_tail
+
+.align	5
+.Loop_mul4x_1st_tail:
+	$UMULL	$t0,$a0,$bi		# lo(a[4..7]*b[i])
+	addze	$carry,$carry		# modulo-scheduled
+	$UMULL	$t1,$a1,$bi
+	addi	$cnt,$cnt,$SIZE_T
+	$UMULL	$t2,$a2,$bi
+	andi.	$cnt,$cnt,$SIZE_T*4-1
+	$UMULL	$t3,$a3,$bi
+	addc	$acc0,$acc0,$t0
+	$UMULH	$t0,$a0,$bi		# hi(a[4..7]*b[i])
+	adde	$acc1,$acc1,$t1
+	$UMULH	$t1,$a1,$bi
+	adde	$acc2,$acc2,$t2
+	$UMULH	$t2,$a2,$bi
+	adde	$acc3,$acc3,$t3
+	$UMULH	$t3,$a3,$bi
+	addze	$acc4,$zero
+	$LDX	$bi,$bp,$cnt		# next b[i] (or b[0])
+	addc	$acc1,$acc1,$t0
+	$UMULL	$t0,$m0,$mi		# lo(n[4..7]*a[0]*n0)
+	adde	$acc2,$acc2,$t1
+	$UMULL	$t1,$m1,$mi
+	adde	$acc3,$acc3,$t2
+	$UMULL	$t2,$m2,$mi
+	adde	$acc4,$acc4,$t3		# can't overflow
+	$UMULL	$t3,$m3,$mi
+	addc	$acc0,$acc0,$t0
+	$UMULH	$t0,$m0,$mi		# hi(n[4..7]*a[0]*n0)
+	adde	$acc1,$acc1,$t1
+	$UMULH	$t1,$m1,$mi
+	adde	$acc2,$acc2,$t2
+	$UMULH	$t2,$m2,$mi
+	adde	$acc3,$acc3,$t3
+	adde	$acc4,$acc4,$carry
+	$UMULH	$t3,$m3,$mi
+	addze	$carry,$zero
+	addi	$mi,$sp,$SIZE_T*8
+	$LDX	$mi,$mi,$cnt		# next t[0]*n0
+	$STU	$acc0,$SIZE_T($tp)	# word of result
+	addc	$acc0,$acc1,$t0
+	adde	$acc1,$acc2,$t1
+	adde	$acc2,$acc3,$t2
+	adde	$acc3,$acc4,$t3
+	#addze	$carry,$carry
+	bne	.Loop_mul4x_1st_tail
+
+	sub	$t1,$ap_end,$num	# rewinded $ap
+	$UCMP	$ap_end,$ap		# done yet?
+	beq	.Lmul4x_proceed
+
+	$LD	$a0,$SIZE_T*1($ap)
+	$LD	$a1,$SIZE_T*2($ap)
+	$LD	$a2,$SIZE_T*3($ap)
+	$LDU	$a3,$SIZE_T*4($ap)
+	$LD	$m0,$SIZE_T*1($np)
+	$LD	$m1,$SIZE_T*2($np)
+	$LD	$m2,$SIZE_T*3($np)
+	$LDU	$m3,$SIZE_T*4($np)
+	b	.Loop_mul4x_1st_tail
+
+.align	5
+.Lmul4x_proceed:
+	$LDU	$bi,$SIZE_T*4($bp)	# *++b
+	addze	$carry,$carry		# topmost carry
+	$LD	$a0,$SIZE_T*1($t1)
+	$LD	$a1,$SIZE_T*2($t1)
+	$LD	$a2,$SIZE_T*3($t1)
+	$LD	$a3,$SIZE_T*4($t1)
+	addi	$ap,$t1,$SIZE_T*4
+	sub	$np,$np,$num		# rewind np
+
+	$ST	$acc0,$SIZE_T*1($tp)	# result
+	$ST	$acc1,$SIZE_T*2($tp)
+	$ST	$acc2,$SIZE_T*3($tp)
+	$ST	$acc3,$SIZE_T*4($tp)
+	$ST	$carry,$SIZE_T*5($tp)	# save topmost carry
+	$LD	$acc0,$SIZE_T*12($sp)	# t[0..3]
+	$LD	$acc1,$SIZE_T*13($sp)
+	$LD	$acc2,$SIZE_T*14($sp)
+	$LD	$acc3,$SIZE_T*15($sp)
+
+	$LD	$m0,$SIZE_T*1($np)	# n[0..3]
+	$LD	$m1,$SIZE_T*2($np)
+	$LD	$m2,$SIZE_T*3($np)
+	$LDU	$m3,$SIZE_T*4($np)
+	addic	$tp,$sp,$SIZE_T*7	# &t[-1], clear carry bit
+	li	$carry,0
+	b	.Loop_mul4x_reduction
+
+.align	5
+.Loop_mul4x_reduction:
+	$UMULL	$t0,$a0,$bi		# lo(a[0..3]*b[4])
+	addze	$carry,$carry		# modulo-scheduled
+	$UMULL	$t1,$a1,$bi
+	addi	$cnt,$cnt,$SIZE_T
+	$UMULL	$t2,$a2,$bi
+	andi.	$cnt,$cnt,$SIZE_T*4-1
+	$UMULL	$t3,$a3,$bi
+	addc	$acc0,$acc0,$t0
+	$UMULH	$t0,$a0,$bi		# hi(a[0..3]*b[4])
+	adde	$acc1,$acc1,$t1
+	$UMULH	$t1,$a1,$bi
+	adde	$acc2,$acc2,$t2
+	$UMULL	$mi,$acc0,$n0		# t[0]*n0
+	adde	$acc3,$acc3,$t3
+	$UMULH	$t2,$a2,$bi
+	addze	$acc4,$zero
+	$UMULH	$t3,$a3,$bi
+	$LDX	$bi,$bp,$cnt		# next b[i]
+	addc	$acc1,$acc1,$t0
+	# (*)	mul	$t0,$m0,$mi
+	$STU	$mi,$SIZE_T($tp)	# put aside t[0]*n0 for tail processing
+	adde	$acc2,$acc2,$t1
+	$UMULL	$t1,$m1,$mi		# lo(n[0..3]*t[0]*n0
+	adde	$acc3,$acc3,$t2
+	$UMULL	$t2,$m2,$mi
+	adde	$acc4,$acc4,$t3		# can't overflow
+	$UMULL	$t3,$m3,$mi
+	# (*)	addc	$acc0,$acc0,$t0
+	addic	$acc0,$acc0,-1		# (*), discarded
+	$UMULH	$t0,$m0,$mi		# hi(n[0..3]*t[0]*n0
+	adde	$acc0,$acc1,$t1
+	$UMULH	$t1,$m1,$mi
+	adde	$acc1,$acc2,$t2
+	$UMULH	$t2,$m2,$mi
+	adde	$acc2,$acc3,$t3
+	$UMULH	$t3,$m3,$mi
+	adde	$acc3,$acc4,$carry
+	addze	$carry,$zero
+	addc	$acc0,$acc0,$t0
+	adde	$acc1,$acc1,$t1
+	adde	$acc2,$acc2,$t2
+	adde	$acc3,$acc3,$t3
+	#addze	$carry,$carry
+	bne	.Loop_mul4x_reduction
+
+	$LD	$t0,$SIZE_T*5($tp)	# t[4..7]
+	addze	$carry,$carry
+	$LD	$t1,$SIZE_T*6($tp)
+	$LD	$t2,$SIZE_T*7($tp)
+	$LD	$t3,$SIZE_T*8($tp)
+	$LD	$a0,$SIZE_T*1($ap)	# a[4..7]
+	$LD	$a1,$SIZE_T*2($ap)
+	$LD	$a2,$SIZE_T*3($ap)
+	$LDU	$a3,$SIZE_T*4($ap)
+	addc	$acc0,$acc0,$t0
+	adde	$acc1,$acc1,$t1
+	adde	$acc2,$acc2,$t2
+	adde	$acc3,$acc3,$t3
+	#addze	$carry,$carry
+
+	$LD	$mi,$SIZE_T*8($sp)	# t[0]*n0
+	$LD	$m0,$SIZE_T*1($np)	# n[4..7]
+	$LD	$m1,$SIZE_T*2($np)
+	$LD	$m2,$SIZE_T*3($np)
+	$LDU	$m3,$SIZE_T*4($np)
+	b	.Loop_mul4x_tail
+
+.align	5
+.Loop_mul4x_tail:
+	$UMULL	$t0,$a0,$bi		# lo(a[4..7]*b[4])
+	addze	$carry,$carry		# modulo-scheduled
+	$UMULL	$t1,$a1,$bi
+	addi	$cnt,$cnt,$SIZE_T
+	$UMULL	$t2,$a2,$bi
+	andi.	$cnt,$cnt,$SIZE_T*4-1
+	$UMULL	$t3,$a3,$bi
+	addc	$acc0,$acc0,$t0
+	$UMULH	$t0,$a0,$bi		# hi(a[4..7]*b[4])
+	adde	$acc1,$acc1,$t1
+	$UMULH	$t1,$a1,$bi
+	adde	$acc2,$acc2,$t2
+	$UMULH	$t2,$a2,$bi
+	adde	$acc3,$acc3,$t3
+	$UMULH	$t3,$a3,$bi
+	addze	$acc4,$zero
+	$LDX	$bi,$bp,$cnt		# next b[i]
+	addc	$acc1,$acc1,$t0
+	$UMULL	$t0,$m0,$mi		# lo(n[4..7]*t[0]*n0)
+	adde	$acc2,$acc2,$t1
+	$UMULL	$t1,$m1,$mi
+	adde	$acc3,$acc3,$t2
+	$UMULL	$t2,$m2,$mi
+	adde	$acc4,$acc4,$t3		# can't overflow
+	$UMULL	$t3,$m3,$mi
+	addc	$acc0,$acc0,$t0
+	$UMULH	$t0,$m0,$mi		# hi(n[4..7]*t[0]*n0)
+	adde	$acc1,$acc1,$t1
+	$UMULH	$t1,$m1,$mi
+	adde	$acc2,$acc2,$t2
+	$UMULH	$t2,$m2,$mi
+	adde	$acc3,$acc3,$t3
+	$UMULH	$t3,$m3,$mi
+	adde	$acc4,$acc4,$carry
+	addi	$mi,$sp,$SIZE_T*8
+	$LDX	$mi,$mi,$cnt		# next a[0]*n0
+	addze	$carry,$zero
+	$STU	$acc0,$SIZE_T($tp)	# word of result
+	addc	$acc0,$acc1,$t0
+	adde	$acc1,$acc2,$t1
+	adde	$acc2,$acc3,$t2
+	adde	$acc3,$acc4,$t3
+	#addze	$carry,$carry
+	bne	.Loop_mul4x_tail
+
+	$LD	$t0,$SIZE_T*5($tp)	# next t[i] or topmost carry
+	sub	$t1,$np,$num		# rewinded np?
+	addze	$carry,$carry
+	$UCMP	$ap_end,$ap		# done yet?
+	beq	.Loop_mul4x_break
+
+	$LD	$t1,$SIZE_T*6($tp)
+	$LD	$t2,$SIZE_T*7($tp)
+	$LD	$t3,$SIZE_T*8($tp)
+	$LD	$a0,$SIZE_T*1($ap)
+	$LD	$a1,$SIZE_T*2($ap)
+	$LD	$a2,$SIZE_T*3($ap)
+	$LDU	$a3,$SIZE_T*4($ap)
+	addc	$acc0,$acc0,$t0
+	adde	$acc1,$acc1,$t1
+	adde	$acc2,$acc2,$t2
+	adde	$acc3,$acc3,$t3
+	#addze	$carry,$carry
+
+	$LD	$m0,$SIZE_T*1($np)	# n[4..7]
+	$LD	$m1,$SIZE_T*2($np)
+	$LD	$m2,$SIZE_T*3($np)
+	$LDU	$m3,$SIZE_T*4($np)
+	b	.Loop_mul4x_tail
+
+.align	5
+.Loop_mul4x_break:
+	$POP	$t2,$SIZE_T*6($sp)	# pull rp and &b[num-4]
+	$POP	$t3,$SIZE_T*7($sp)
+	addc	$a0,$acc0,$t0		# accumulate topmost carry
+	$LD	$acc0,$SIZE_T*12($sp)	# t[0..3]
+	addze	$a1,$acc1
+	$LD	$acc1,$SIZE_T*13($sp)
+	addze	$a2,$acc2
+	$LD	$acc2,$SIZE_T*14($sp)
+	addze	$a3,$acc3
+	$LD	$acc3,$SIZE_T*15($sp)
+	addze	$carry,$carry		# topmost carry
+	$ST	$a0,$SIZE_T*1($tp)	# result
+	sub	$ap,$ap_end,$num	# rewind ap
+	$ST	$a1,$SIZE_T*2($tp)
+	$ST	$a2,$SIZE_T*3($tp)
+	$ST	$a3,$SIZE_T*4($tp)
+	$ST	$carry,$SIZE_T*5($tp)	# store topmost carry
+
+	$LD	$m0,$SIZE_T*1($t1)	# n[0..3]
+	$LD	$m1,$SIZE_T*2($t1)
+	$LD	$m2,$SIZE_T*3($t1)
+	$LD	$m3,$SIZE_T*4($t1)
+	addi	$np,$t1,$SIZE_T*4
+	$UCMP	$bp,$t3			# done yet?
+	beq	.Lmul4x_post
+
+	$LDU	$bi,$SIZE_T*4($bp)
+	$LD	$a0,$SIZE_T*1($ap)	# a[0..3]
+	$LD	$a1,$SIZE_T*2($ap)
+	$LD	$a2,$SIZE_T*3($ap)
+	$LDU	$a3,$SIZE_T*4($ap)
+	li	$carry,0
+	addic	$tp,$sp,$SIZE_T*7	# &t[-1], clear carry bit
+	b	.Loop_mul4x_reduction
+
+.align	5
+.Lmul4x_post:
+	# Final step. We see if result is larger than modulus, and
+	# if it is, subtract the modulus. But comparison implies
+	# subtraction. So we subtract modulus, see if it borrowed,
+	# and conditionally copy original value.
+	srwi	$cnt,$num,`log($SIZE_T)/log(2)+2`
+	mr	$bp,$t2			# &rp[-1]
+	subi	$cnt,$cnt,1
+	mr	$ap_end,$t2		# &rp[-1] copy
+	subfc	$t0,$m0,$acc0
+	addi	$tp,$sp,$SIZE_T*15
+	subfe	$t1,$m1,$acc1
+
+	mtctr	$cnt
+.Lmul4x_sub:
+	$LD	$m0,$SIZE_T*1($np)
+	$LD	$acc0,$SIZE_T*1($tp)
+	subfe	$t2,$m2,$acc2
+	$LD	$m1,$SIZE_T*2($np)
+	$LD	$acc1,$SIZE_T*2($tp)
+	subfe	$t3,$m3,$acc3
+	$LD	$m2,$SIZE_T*3($np)
+	$LD	$acc2,$SIZE_T*3($tp)
+	$LDU	$m3,$SIZE_T*4($np)
+	$LDU	$acc3,$SIZE_T*4($tp)
+	$ST	$t0,$SIZE_T*1($bp)
+	$ST	$t1,$SIZE_T*2($bp)
+	subfe	$t0,$m0,$acc0
+	$ST	$t2,$SIZE_T*3($bp)
+	$STU	$t3,$SIZE_T*4($bp)
+	subfe	$t1,$m1,$acc1
+	bdnz	.Lmul4x_sub
+
+	 $LD	$a0,$SIZE_T*1($ap_end)
+	$ST	$t0,$SIZE_T*1($bp)
+	 $LD	$t0,$SIZE_T*12($sp)
+	subfe	$t2,$m2,$acc2
+	 $LD	$a1,$SIZE_T*2($ap_end)
+	$ST	$t1,$SIZE_T*2($bp)
+	 $LD	$t1,$SIZE_T*13($sp)
+	subfe	$t3,$m3,$acc3
+	subfe	$carry,$zero,$carry	# did it borrow?
+	 addi	$tp,$sp,$SIZE_T*12
+	 $LD	$a2,$SIZE_T*3($ap_end)
+	$ST	$t2,$SIZE_T*3($bp)
+	 $LD	$t2,$SIZE_T*14($sp)
+	 $LD	$a3,$SIZE_T*4($ap_end)
+	$ST	$t3,$SIZE_T*4($bp)
+	 $LD	$t3,$SIZE_T*15($sp)
+
+	mtctr	$cnt
+.Lmul4x_cond_copy:
+	and	$t0,$t0,$carry
+	andc	$a0,$a0,$carry
+	$ST	$zero,$SIZE_T*0($tp)	# wipe stack clean
+	and	$t1,$t1,$carry
+	andc	$a1,$a1,$carry
+	$ST	$zero,$SIZE_T*1($tp)
+	and	$t2,$t2,$carry
+	andc	$a2,$a2,$carry
+	$ST	$zero,$SIZE_T*2($tp)
+	and	$t3,$t3,$carry
+	andc	$a3,$a3,$carry
+	$ST	$zero,$SIZE_T*3($tp)
+	or	$acc0,$t0,$a0
+	$LD	$a0,$SIZE_T*5($ap_end)
+	$LD	$t0,$SIZE_T*4($tp)
+	or	$acc1,$t1,$a1
+	$LD	$a1,$SIZE_T*6($ap_end)
+	$LD	$t1,$SIZE_T*5($tp)
+	or	$acc2,$t2,$a2
+	$LD	$a2,$SIZE_T*7($ap_end)
+	$LD	$t2,$SIZE_T*6($tp)
+	or	$acc3,$t3,$a3
+	$LD	$a3,$SIZE_T*8($ap_end)
+	$LD	$t3,$SIZE_T*7($tp)
+	addi	$tp,$tp,$SIZE_T*4
+	$ST	$acc0,$SIZE_T*1($ap_end)
+	$ST	$acc1,$SIZE_T*2($ap_end)
+	$ST	$acc2,$SIZE_T*3($ap_end)
+	$STU	$acc3,$SIZE_T*4($ap_end)
+	bdnz	.Lmul4x_cond_copy
+
+	$POP	$bp,0($sp)		# pull saved sp
+	and	$t0,$t0,$carry
+	andc	$a0,$a0,$carry
+	$ST	$zero,$SIZE_T*0($tp)
+	and	$t1,$t1,$carry
+	andc	$a1,$a1,$carry
+	$ST	$zero,$SIZE_T*1($tp)
+	and	$t2,$t2,$carry
+	andc	$a2,$a2,$carry
+	$ST	$zero,$SIZE_T*2($tp)
+	and	$t3,$t3,$carry
+	andc	$a3,$a3,$carry
+	$ST	$zero,$SIZE_T*3($tp)
+	or	$acc0,$t0,$a0
+	or	$acc1,$t1,$a1
+	$ST	$zero,$SIZE_T*4($tp)
+	or	$acc2,$t2,$a2
+	or	$acc3,$t3,$a3
+	$ST	$acc0,$SIZE_T*1($ap_end)
+	$ST	$acc1,$SIZE_T*2($ap_end)
+	$ST	$acc2,$SIZE_T*3($ap_end)
+	$ST	$acc3,$SIZE_T*4($ap_end)
+
+	b	.Lmul4x_done
+
+.align	4
+.Lmul4x4_post_condition:
+	$POP	$ap,$SIZE_T*6($sp)	# pull &rp[-1]
+	$POP	$bp,0($sp)		# pull saved sp
+	addze	$carry,$carry		# modulo-scheduled
+	# $acc0-3,$carry hold result, $m0-3 hold modulus
+	subfc	$a0,$m0,$acc0
+	subfe	$a1,$m1,$acc1
+	subfe	$a2,$m2,$acc2
+	subfe	$a3,$m3,$acc3
+	subfe	$carry,$zero,$carry	# did it borrow?
+
+	and	$m0,$m0,$carry
+	and	$m1,$m1,$carry
+	addc	$a0,$a0,$m0
+	and	$m2,$m2,$carry
+	adde	$a1,$a1,$m1
+	and	$m3,$m3,$carry
+	adde	$a2,$a2,$m2
+	adde	$a3,$a3,$m3
+
+	$ST	$a0,$SIZE_T*1($ap)	# write result
+	$ST	$a1,$SIZE_T*2($ap)
+	$ST	$a2,$SIZE_T*3($ap)
+	$ST	$a3,$SIZE_T*4($ap)
+
+.Lmul4x_done:
+	$ST	$zero,$SIZE_T*8($sp)	# wipe stack clean
+	$ST	$zero,$SIZE_T*9($sp)
+	$ST	$zero,$SIZE_T*10($sp)
+	$ST	$zero,$SIZE_T*11($sp)
+	li	r3,1			# signal "done"
+	$POP	r14,-$SIZE_T*18($bp)
+	$POP	r15,-$SIZE_T*17($bp)
+	$POP	r16,-$SIZE_T*16($bp)
+	$POP	r17,-$SIZE_T*15($bp)
+	$POP	r18,-$SIZE_T*14($bp)
+	$POP	r19,-$SIZE_T*13($bp)
+	$POP	r20,-$SIZE_T*12($bp)
+	$POP	r21,-$SIZE_T*11($bp)
+	$POP	r22,-$SIZE_T*10($bp)
+	$POP	r23,-$SIZE_T*9($bp)
+	$POP	r24,-$SIZE_T*8($bp)
+	$POP	r25,-$SIZE_T*7($bp)
+	$POP	r26,-$SIZE_T*6($bp)
+	$POP	r27,-$SIZE_T*5($bp)
+	$POP	r28,-$SIZE_T*4($bp)
+	$POP	r29,-$SIZE_T*3($bp)
+	$POP	r30,-$SIZE_T*2($bp)
+	$POP	r31,-$SIZE_T*1($bp)
+	mr	$sp,$bp
+	blr
+	.long	0
+	.byte	0,12,4,0x20,0x80,18,6,0
+	.long	0
+.size	.bn_mul4x_mont_int,.-.bn_mul4x_mont_int
+___
+}
+
+if (1) {
+########################################################################
+# Following is PPC adaptation of sqrx8x_mont from x86_64-mont5 module.
+
+my ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("r$_",(9..12,14..17));
+my ($t0,$t1,$t2,$t3)=map("r$_",(18..21));
+my ($acc0,$acc1,$acc2,$acc3,$acc4,$acc5,$acc6,$acc7)=map("r$_",(22..29));
+my ($cnt,$carry,$zero)=("r30","r31","r0");
+my ($tp,$ap_end,$na0)=($bp,$np,$carry);
+
+# sp----------->+-------------------------------+
+#		| saved sp			|
+#		+-------------------------------+
+#		.				.
+# +12*size_t	+-------------------------------+
+#		| size_t tmp[2*num]		|
+#		.				.
+#		.				.
+#		.				.
+#		+-------------------------------+
+#		.				.
+# -18*size_t	+-------------------------------+
+#		| 18 saved gpr, r14-r31		|
+#		.				.
+#		.				.
+#		+-------------------------------+
+$code.=<<___;
+.align	5
+__bn_sqr8x_mont:
+.Lsqr8x_do:
+	mr	$a0,$sp
+	slwi	$a1,$num,`log($SIZE_T)/log(2)+1`
+	li	$a2,-32*$SIZE_T
+	sub	$a1,$a2,$a1
+	slwi	$num,$num,`log($SIZE_T)/log(2)`
+	$STUX	$sp,$sp,$a1		# alloca
+
+	$PUSH	r14,-$SIZE_T*18($a0)
+	$PUSH	r15,-$SIZE_T*17($a0)
+	$PUSH	r16,-$SIZE_T*16($a0)
+	$PUSH	r17,-$SIZE_T*15($a0)
+	$PUSH	r18,-$SIZE_T*14($a0)
+	$PUSH	r19,-$SIZE_T*13($a0)
+	$PUSH	r20,-$SIZE_T*12($a0)
+	$PUSH	r21,-$SIZE_T*11($a0)
+	$PUSH	r22,-$SIZE_T*10($a0)
+	$PUSH	r23,-$SIZE_T*9($a0)
+	$PUSH	r24,-$SIZE_T*8($a0)
+	$PUSH	r25,-$SIZE_T*7($a0)
+	$PUSH	r26,-$SIZE_T*6($a0)
+	$PUSH	r27,-$SIZE_T*5($a0)
+	$PUSH	r28,-$SIZE_T*4($a0)
+	$PUSH	r29,-$SIZE_T*3($a0)
+	$PUSH	r30,-$SIZE_T*2($a0)
+	$PUSH	r31,-$SIZE_T*1($a0)
 
+	subi	$ap,$ap,$SIZE_T		# bias by -1
+	subi	$t0,$np,$SIZE_T		# bias by -1
+	subi	$rp,$rp,$SIZE_T		# bias by -1
+	$LD	$n0,0($n0)		# *n0
+	li	$zero,0
+
+	add	$ap_end,$ap,$num
+	$LD	$a0,$SIZE_T*1($ap)
+	#li	$acc0,0
+	$LD	$a1,$SIZE_T*2($ap)
+	li	$acc1,0
+	$LD	$a2,$SIZE_T*3($ap)
+	li	$acc2,0
+	$LD	$a3,$SIZE_T*4($ap)
+	li	$acc3,0
+	$LD	$a4,$SIZE_T*5($ap)
+	li	$acc4,0
+	$LD	$a5,$SIZE_T*6($ap)
+	li	$acc5,0
+	$LD	$a6,$SIZE_T*7($ap)
+	li	$acc6,0
+	$LDU	$a7,$SIZE_T*8($ap)
+	li	$acc7,0
+
+	addi	$tp,$sp,$SIZE_T*11	# &tp[-1]
+	subic.	$cnt,$num,$SIZE_T*8
+	b	.Lsqr8x_zero_start
+
+.align	5
+.Lsqr8x_zero:
+	subic.	$cnt,$cnt,$SIZE_T*8
+	$ST	$zero,$SIZE_T*1($tp)
+	$ST	$zero,$SIZE_T*2($tp)
+	$ST	$zero,$SIZE_T*3($tp)
+	$ST	$zero,$SIZE_T*4($tp)
+	$ST	$zero,$SIZE_T*5($tp)
+	$ST	$zero,$SIZE_T*6($tp)
+	$ST	$zero,$SIZE_T*7($tp)
+	$ST	$zero,$SIZE_T*8($tp)
+.Lsqr8x_zero_start:
+	$ST	$zero,$SIZE_T*9($tp)
+	$ST	$zero,$SIZE_T*10($tp)
+	$ST	$zero,$SIZE_T*11($tp)
+	$ST	$zero,$SIZE_T*12($tp)
+	$ST	$zero,$SIZE_T*13($tp)
+	$ST	$zero,$SIZE_T*14($tp)
+	$ST	$zero,$SIZE_T*15($tp)
+	$STU	$zero,$SIZE_T*16($tp)
+	bne	.Lsqr8x_zero
+
+	$PUSH	$rp,$SIZE_T*6($sp)	# offload &rp[-1]
+	$PUSH	$t0,$SIZE_T*7($sp)	# offload &np[-1]
+	$PUSH	$n0,$SIZE_T*8($sp)	# offload n0
+	$PUSH	$tp,$SIZE_T*9($sp)	# &tp[2*num-1]
+	$PUSH	$zero,$SIZE_T*10($sp)	# initial top-most carry
+	addi	$tp,$sp,$SIZE_T*11	# &tp[-1]
+
+	# Multiply everything but a[i]*a[i]
+.align	5
+.Lsqr8x_outer_loop:
+	#						  a[1]a[0]     (i)
+	#					      a[2]a[0]
+	#					  a[3]a[0]
+	#				      a[4]a[0]
+	#				  a[5]a[0]
+	#			      a[6]a[0]
+	#			  a[7]a[0]
+	#					  a[2]a[1]	       (ii)
+	#				      a[3]a[1]
+	#				  a[4]a[1]
+	#			      a[5]a[1]
+	#			  a[6]a[1]
+	#		      a[7]a[1]
+	#				  a[3]a[2]		       (iii)
+	#			      a[4]a[2]
+	#			  a[5]a[2]
+	#		      a[6]a[2]
+	#		  a[7]a[2]
+	#			  a[4]a[3]			       (iv)
+	#		      a[5]a[3]
+	#		  a[6]a[3]
+	#	      a[7]a[3]
+	#		  a[5]a[4]				       (v)
+	#	      a[6]a[4]
+	#	  a[7]a[4]
+	#	  a[6]a[5]					       (vi)
+	#     a[7]a[5]
+	# a[7]a[6]						       (vii)
+
+	$UMULL	$t0,$a1,$a0		# lo(a[1..7]*a[0])		(i)
+	$UMULL	$t1,$a2,$a0
+	$UMULL	$t2,$a3,$a0
+	$UMULL	$t3,$a4,$a0
+	addc	$acc1,$acc1,$t0		# t[1]+lo(a[1]*a[0])
+	$UMULL	$t0,$a5,$a0
+	adde	$acc2,$acc2,$t1
+	$UMULL	$t1,$a6,$a0
+	adde	$acc3,$acc3,$t2
+	$UMULL	$t2,$a7,$a0
+	adde	$acc4,$acc4,$t3
+	$UMULH	$t3,$a1,$a0		# hi(a[1..7]*a[0])
+	adde	$acc5,$acc5,$t0
+	$UMULH	$t0,$a2,$a0
+	adde	$acc6,$acc6,$t1
+	$UMULH	$t1,$a3,$a0
+	adde	$acc7,$acc7,$t2
+	$UMULH	$t2,$a4,$a0
+	$ST	$acc0,$SIZE_T*1($tp)	# t[0]
+	addze	$acc0,$zero		# t[8]
+	$ST	$acc1,$SIZE_T*2($tp)	# t[1]
+	addc	$acc2,$acc2,$t3		# t[2]+lo(a[1]*a[0])
+	$UMULH	$t3,$a5,$a0
+	adde	$acc3,$acc3,$t0
+	$UMULH	$t0,$a6,$a0
+	adde	$acc4,$acc4,$t1
+	$UMULH	$t1,$a7,$a0
+	adde	$acc5,$acc5,$t2
+	 $UMULL	$t2,$a2,$a1		# lo(a[2..7]*a[1])		(ii)
+	adde	$acc6,$acc6,$t3
+	 $UMULL	$t3,$a3,$a1
+	adde	$acc7,$acc7,$t0
+	 $UMULL	$t0,$a4,$a1
+	adde	$acc0,$acc0,$t1
+
+	$UMULL	$t1,$a5,$a1
+	addc	$acc3,$acc3,$t2
+	$UMULL	$t2,$a6,$a1
+	adde	$acc4,$acc4,$t3
+	$UMULL	$t3,$a7,$a1
+	adde	$acc5,$acc5,$t0
+	$UMULH	$t0,$a2,$a1		# hi(a[2..7]*a[1])
+	adde	$acc6,$acc6,$t1
+	$UMULH	$t1,$a3,$a1
+	adde	$acc7,$acc7,$t2
+	$UMULH	$t2,$a4,$a1
+	adde	$acc0,$acc0,$t3
+	$UMULH	$t3,$a5,$a1
+	$ST	$acc2,$SIZE_T*3($tp)	# t[2]
+	addze	$acc1,$zero		# t[9]
+	$ST	$acc3,$SIZE_T*4($tp)	# t[3]
+	addc	$acc4,$acc4,$t0
+	$UMULH	$t0,$a6,$a1
+	adde	$acc5,$acc5,$t1
+	$UMULH	$t1,$a7,$a1
+	adde	$acc6,$acc6,$t2
+	 $UMULL	$t2,$a3,$a2		# lo(a[3..7]*a[2])		(iii)
+	adde	$acc7,$acc7,$t3
+	 $UMULL	$t3,$a4,$a2
+	adde	$acc0,$acc0,$t0
+	 $UMULL	$t0,$a5,$a2
+	adde	$acc1,$acc1,$t1
+
+	$UMULL	$t1,$a6,$a2
+	addc	$acc5,$acc5,$t2
+	$UMULL	$t2,$a7,$a2
+	adde	$acc6,$acc6,$t3
+	$UMULH	$t3,$a3,$a2		# hi(a[3..7]*a[2])
+	adde	$acc7,$acc7,$t0
+	$UMULH	$t0,$a4,$a2
+	adde	$acc0,$acc0,$t1
+	$UMULH	$t1,$a5,$a2
+	adde	$acc1,$acc1,$t2
+	$UMULH	$t2,$a6,$a2
+	$ST	$acc4,$SIZE_T*5($tp)	# t[4]
+	addze	$acc2,$zero		# t[10]
+	$ST	$acc5,$SIZE_T*6($tp)	# t[5]
+	addc	$acc6,$acc6,$t3
+	$UMULH	$t3,$a7,$a2
+	adde	$acc7,$acc7,$t0
+	 $UMULL	$t0,$a4,$a3		# lo(a[4..7]*a[3])		(iv)
+	adde	$acc0,$acc0,$t1
+	 $UMULL	$t1,$a5,$a3
+	adde	$acc1,$acc1,$t2
+	 $UMULL	$t2,$a6,$a3
+	adde	$acc2,$acc2,$t3
+
+	$UMULL	$t3,$a7,$a3
+	addc	$acc7,$acc7,$t0
+	$UMULH	$t0,$a4,$a3		# hi(a[4..7]*a[3])
+	adde	$acc0,$acc0,$t1
+	$UMULH	$t1,$a5,$a3
+	adde	$acc1,$acc1,$t2
+	$UMULH	$t2,$a6,$a3
+	adde	$acc2,$acc2,$t3
+	$UMULH	$t3,$a7,$a3
+	$ST	$acc6,$SIZE_T*7($tp)	# t[6]
+	addze	$acc3,$zero		# t[11]
+	$STU	$acc7,$SIZE_T*8($tp)	# t[7]
+	addc	$acc0,$acc0,$t0
+	 $UMULL	$t0,$a5,$a4		# lo(a[5..7]*a[4])		(v)
+	adde	$acc1,$acc1,$t1
+	 $UMULL	$t1,$a6,$a4
+	adde	$acc2,$acc2,$t2
+	 $UMULL	$t2,$a7,$a4
+	adde	$acc3,$acc3,$t3
+
+	$UMULH	$t3,$a5,$a4		# hi(a[5..7]*a[4])
+	addc	$acc1,$acc1,$t0
+	$UMULH	$t0,$a6,$a4
+	adde	$acc2,$acc2,$t1
+	$UMULH	$t1,$a7,$a4
+	adde	$acc3,$acc3,$t2
+	 $UMULL	$t2,$a6,$a5		# lo(a[6..7]*a[5])		(vi)
+	addze	$acc4,$zero		# t[12]
+	addc	$acc2,$acc2,$t3
+	 $UMULL	$t3,$a7,$a5
+	adde	$acc3,$acc3,$t0
+	 $UMULH	$t0,$a6,$a5		# hi(a[6..7]*a[5])
+	adde	$acc4,$acc4,$t1
+
+	$UMULH	$t1,$a7,$a5
+	addc	$acc3,$acc3,$t2
+	 $UMULL	$t2,$a7,$a6		# lo(a[7]*a[6])			(vii)
+	adde	$acc4,$acc4,$t3
+	 $UMULH	$t3,$a7,$a6		# hi(a[7]*a[6])
+	addze	$acc5,$zero		# t[13]
+	addc	$acc4,$acc4,$t0
+	$UCMP	$ap_end,$ap		# done yet?
+	adde	$acc5,$acc5,$t1
+
+	addc	$acc5,$acc5,$t2
+	sub	$t0,$ap_end,$num	# rewinded ap
+	addze	$acc6,$zero		# t[14]
+	add	$acc6,$acc6,$t3
+
+	beq	.Lsqr8x_outer_break
+
+	mr	$n0,$a0
+	$LD	$a0,$SIZE_T*1($tp)
+	$LD	$a1,$SIZE_T*2($tp)
+	$LD	$a2,$SIZE_T*3($tp)
+	$LD	$a3,$SIZE_T*4($tp)
+	$LD	$a4,$SIZE_T*5($tp)
+	$LD	$a5,$SIZE_T*6($tp)
+	$LD	$a6,$SIZE_T*7($tp)
+	$LD	$a7,$SIZE_T*8($tp)
+	addc	$acc0,$acc0,$a0
+	$LD	$a0,$SIZE_T*1($ap)
+	adde	$acc1,$acc1,$a1
+	$LD	$a1,$SIZE_T*2($ap)
+	adde	$acc2,$acc2,$a2
+	$LD	$a2,$SIZE_T*3($ap)
+	adde	$acc3,$acc3,$a3
+	$LD	$a3,$SIZE_T*4($ap)
+	adde	$acc4,$acc4,$a4
+	$LD	$a4,$SIZE_T*5($ap)
+	adde	$acc5,$acc5,$a5
+	$LD	$a5,$SIZE_T*6($ap)
+	adde	$acc6,$acc6,$a6
+	$LD	$a6,$SIZE_T*7($ap)
+	subi	$rp,$ap,$SIZE_T*7
+	addze	$acc7,$a7
+	$LDU	$a7,$SIZE_T*8($ap)
+	#addze	$carry,$zero		# moved below
+	li	$cnt,0
+	b	.Lsqr8x_mul
+
+	#                                                          a[8]a[0]
+	#                                                      a[9]a[0]
+	#                                                  a[a]a[0]
+	#                                              a[b]a[0]
+	#                                          a[c]a[0]
+	#                                      a[d]a[0]
+	#                                  a[e]a[0]
+	#                              a[f]a[0]
+	#                                                      a[8]a[1]
+	#                          a[f]a[1]........................
+	#                                                  a[8]a[2]
+	#                      a[f]a[2]........................
+	#                                              a[8]a[3]
+	#                  a[f]a[3]........................
+	#                                          a[8]a[4]
+	#              a[f]a[4]........................
+	#                                      a[8]a[5]
+	#          a[f]a[5]........................
+	#                                  a[8]a[6]
+	#      a[f]a[6]........................
+	#                              a[8]a[7]
+	#  a[f]a[7]........................
+.align	5
+.Lsqr8x_mul:
+	$UMULL	$t0,$a0,$n0
+	addze	$carry,$zero		# carry bit, modulo-scheduled
+	$UMULL	$t1,$a1,$n0
+	addi	$cnt,$cnt,$SIZE_T
+	$UMULL	$t2,$a2,$n0
+	andi.	$cnt,$cnt,$SIZE_T*8-1
+	$UMULL	$t3,$a3,$n0
+	addc	$acc0,$acc0,$t0
+	$UMULL	$t0,$a4,$n0
+	adde	$acc1,$acc1,$t1
+	$UMULL	$t1,$a5,$n0
+	adde	$acc2,$acc2,$t2
+	$UMULL	$t2,$a6,$n0
+	adde	$acc3,$acc3,$t3
+	$UMULL	$t3,$a7,$n0
+	adde	$acc4,$acc4,$t0
+	$UMULH	$t0,$a0,$n0
+	adde	$acc5,$acc5,$t1
+	$UMULH	$t1,$a1,$n0
+	adde	$acc6,$acc6,$t2
+	$UMULH	$t2,$a2,$n0
+	adde	$acc7,$acc7,$t3
+	$UMULH	$t3,$a3,$n0
+	addze	$carry,$carry
+	$STU	$acc0,$SIZE_T($tp)
+	addc	$acc0,$acc1,$t0
+	$UMULH	$t0,$a4,$n0
+	adde	$acc1,$acc2,$t1
+	$UMULH	$t1,$a5,$n0
+	adde	$acc2,$acc3,$t2
+	$UMULH	$t2,$a6,$n0
+	adde	$acc3,$acc4,$t3
+	$UMULH	$t3,$a7,$n0
+	$LDX	$n0,$rp,$cnt
+	adde	$acc4,$acc5,$t0
+	adde	$acc5,$acc6,$t1
+	adde	$acc6,$acc7,$t2
+	adde	$acc7,$carry,$t3
+	#addze	$carry,$zero		# moved above
+	bne	.Lsqr8x_mul
+					# note that carry flag is guaranteed
+					# to be zero at this point
+	$UCMP	$ap,$ap_end		# done yet?
+	beq	.Lsqr8x_break
+
+	$LD	$a0,$SIZE_T*1($tp)
+	$LD	$a1,$SIZE_T*2($tp)
+	$LD	$a2,$SIZE_T*3($tp)
+	$LD	$a3,$SIZE_T*4($tp)
+	$LD	$a4,$SIZE_T*5($tp)
+	$LD	$a5,$SIZE_T*6($tp)
+	$LD	$a6,$SIZE_T*7($tp)
+	$LD	$a7,$SIZE_T*8($tp)
+	addc	$acc0,$acc0,$a0
+	$LD	$a0,$SIZE_T*1($ap)
+	adde	$acc1,$acc1,$a1
+	$LD	$a1,$SIZE_T*2($ap)
+	adde	$acc2,$acc2,$a2
+	$LD	$a2,$SIZE_T*3($ap)
+	adde	$acc3,$acc3,$a3
+	$LD	$a3,$SIZE_T*4($ap)
+	adde	$acc4,$acc4,$a4
+	$LD	$a4,$SIZE_T*5($ap)
+	adde	$acc5,$acc5,$a5
+	$LD	$a5,$SIZE_T*6($ap)
+	adde	$acc6,$acc6,$a6
+	$LD	$a6,$SIZE_T*7($ap)
+	adde	$acc7,$acc7,$a7
+	$LDU	$a7,$SIZE_T*8($ap)
+	#addze	$carry,$zero		# moved above
+	b	.Lsqr8x_mul
+
+.align	5
+.Lsqr8x_break:
+	$LD	$a0,$SIZE_T*8($rp)
+	addi	$ap,$rp,$SIZE_T*15
+	$LD	$a1,$SIZE_T*9($rp)
+	sub.	$t0,$ap_end,$ap		# is it last iteration?
+	$LD	$a2,$SIZE_T*10($rp)
+	sub	$t1,$tp,$t0
+	$LD	$a3,$SIZE_T*11($rp)
+	$LD	$a4,$SIZE_T*12($rp)
+	$LD	$a5,$SIZE_T*13($rp)
+	$LD	$a6,$SIZE_T*14($rp)
+	$LD	$a7,$SIZE_T*15($rp)
+	beq	.Lsqr8x_outer_loop
+
+	$ST	$acc0,$SIZE_T*1($tp)
+	$LD	$acc0,$SIZE_T*1($t1)
+	$ST	$acc1,$SIZE_T*2($tp)
+	$LD	$acc1,$SIZE_T*2($t1)
+	$ST	$acc2,$SIZE_T*3($tp)
+	$LD	$acc2,$SIZE_T*3($t1)
+	$ST	$acc3,$SIZE_T*4($tp)
+	$LD	$acc3,$SIZE_T*4($t1)
+	$ST	$acc4,$SIZE_T*5($tp)
+	$LD	$acc4,$SIZE_T*5($t1)
+	$ST	$acc5,$SIZE_T*6($tp)
+	$LD	$acc5,$SIZE_T*6($t1)
+	$ST	$acc6,$SIZE_T*7($tp)
+	$LD	$acc6,$SIZE_T*7($t1)
+	$ST	$acc7,$SIZE_T*8($tp)
+	$LD	$acc7,$SIZE_T*8($t1)
+	mr	$tp,$t1
+	b	.Lsqr8x_outer_loop
+
+.align	5
+.Lsqr8x_outer_break:
+	####################################################################
+	# Now multiply above result by 2 and add a[n-1]*a[n-1]|...|a[0]*a[0]
+	$LD	$a1,$SIZE_T*1($t0)	# recall that $t0 is &a[-1]
+	$LD	$a3,$SIZE_T*2($t0)
+	$LD	$a5,$SIZE_T*3($t0)
+	$LD	$a7,$SIZE_T*4($t0)
+	addi	$ap,$t0,$SIZE_T*4
+					# "tp[x]" comments are for num==8 case
+	$LD	$t1,$SIZE_T*13($sp)	# =tp[1], t[0] is not interesting
+	$LD	$t2,$SIZE_T*14($sp)
+	$LD	$t3,$SIZE_T*15($sp)
+	$LD	$t0,$SIZE_T*16($sp)
+
+	$ST	$acc0,$SIZE_T*1($tp)	# tp[8]=
+	srwi	$cnt,$num,`log($SIZE_T)/log(2)+2`
+	$ST	$acc1,$SIZE_T*2($tp)
+	subi	$cnt,$cnt,1
+	$ST	$acc2,$SIZE_T*3($tp)
+	$ST	$acc3,$SIZE_T*4($tp)
+	$ST	$acc4,$SIZE_T*5($tp)
+	$ST	$acc5,$SIZE_T*6($tp)
+	$ST	$acc6,$SIZE_T*7($tp)
+	#$ST	$acc7,$SIZE_T*8($tp)	# tp[15] is not interesting
+	addi	$tp,$sp,$SIZE_T*11	# &tp[-1]
+	$UMULL	$acc0,$a1,$a1
+	$UMULH	$a1,$a1,$a1
+	add	$acc1,$t1,$t1		# <<1
+	$SHRI	$t1,$t1,$BITS-1
+	$UMULL	$a2,$a3,$a3
+	$UMULH	$a3,$a3,$a3
+	addc	$acc1,$acc1,$a1
+	add	$acc2,$t2,$t2
+	$SHRI	$t2,$t2,$BITS-1
+	add	$acc3,$t3,$t3
+	$SHRI	$t3,$t3,$BITS-1
+	or	$acc2,$acc2,$t1
+
+	mtctr	$cnt
+.Lsqr4x_shift_n_add:
+	$UMULL	$a4,$a5,$a5
+	$UMULH	$a5,$a5,$a5
+	$LD	$t1,$SIZE_T*6($tp)	# =tp[5]
+	$LD	$a1,$SIZE_T*1($ap)
+	adde	$acc2,$acc2,$a2
+	add	$acc4,$t0,$t0
+	$SHRI	$t0,$t0,$BITS-1
+	or	$acc3,$acc3,$t2
+	$LD	$t2,$SIZE_T*7($tp)	# =tp[6]
+	adde	$acc3,$acc3,$a3
+	$LD	$a3,$SIZE_T*2($ap)
+	add	$acc5,$t1,$t1
+	$SHRI	$t1,$t1,$BITS-1
+	or	$acc4,$acc4,$t3
+	$LD	$t3,$SIZE_T*8($tp)	# =tp[7]
+	$UMULL	$a6,$a7,$a7
+	$UMULH	$a7,$a7,$a7
+	adde	$acc4,$acc4,$a4
+	add	$acc6,$t2,$t2
+	$SHRI	$t2,$t2,$BITS-1
+	or	$acc5,$acc5,$t0
+	$LD	$t0,$SIZE_T*9($tp)	# =tp[8]
+	adde	$acc5,$acc5,$a5
+	$LD	$a5,$SIZE_T*3($ap)
+	add	$acc7,$t3,$t3
+	$SHRI	$t3,$t3,$BITS-1
+	or	$acc6,$acc6,$t1
+	$LD	$t1,$SIZE_T*10($tp)	# =tp[9]
+	$UMULL	$a0,$a1,$a1
+	$UMULH	$a1,$a1,$a1
+	adde	$acc6,$acc6,$a6
+	$ST	$acc0,$SIZE_T*1($tp)	# tp[0]=
+	add	$acc0,$t0,$t0
+	$SHRI	$t0,$t0,$BITS-1
+	or	$acc7,$acc7,$t2
+	$LD	$t2,$SIZE_T*11($tp)	# =tp[10]
+	adde	$acc7,$acc7,$a7
+	$LDU	$a7,$SIZE_T*4($ap)
+	$ST	$acc1,$SIZE_T*2($tp)	# tp[1]=
+	add	$acc1,$t1,$t1
+	$SHRI	$t1,$t1,$BITS-1
+	or	$acc0,$acc0,$t3
+	$LD	$t3,$SIZE_T*12($tp)	# =tp[11]
+	$UMULL	$a2,$a3,$a3
+	$UMULH	$a3,$a3,$a3
+	adde	$acc0,$acc0,$a0
+	$ST	$acc2,$SIZE_T*3($tp)	# tp[2]=
+	add	$acc2,$t2,$t2
+	$SHRI	$t2,$t2,$BITS-1
+	or	$acc1,$acc1,$t0
+	$LD	$t0,$SIZE_T*13($tp)	# =tp[12]
+	adde	$acc1,$acc1,$a1
+	$ST	$acc3,$SIZE_T*4($tp)	# tp[3]=
+	$ST	$acc4,$SIZE_T*5($tp)	# tp[4]=
+	$ST	$acc5,$SIZE_T*6($tp)	# tp[5]=
+	$ST	$acc6,$SIZE_T*7($tp)	# tp[6]=
+	$STU	$acc7,$SIZE_T*8($tp)	# tp[7]=
+	add	$acc3,$t3,$t3
+	$SHRI	$t3,$t3,$BITS-1
+	or	$acc2,$acc2,$t1
+	bdnz	.Lsqr4x_shift_n_add
+___
+my ($np,$np_end)=($ap,$ap_end);
+$code.=<<___;
+	 $POP	$np,$SIZE_T*7($sp)	# pull &np[-1] and n0
+	 $POP	$n0,$SIZE_T*8($sp)
+
+	$UMULL	$a4,$a5,$a5
+	$UMULH	$a5,$a5,$a5
+	$ST	$acc0,$SIZE_T*1($tp)	# tp[8]=
+	 $LD	$acc0,$SIZE_T*12($sp)	# =tp[0]
+	$LD	$t1,$SIZE_T*6($tp)	# =tp[13]
+	adde	$acc2,$acc2,$a2
+	add	$acc4,$t0,$t0
+	$SHRI	$t0,$t0,$BITS-1
+	or	$acc3,$acc3,$t2
+	$LD	$t2,$SIZE_T*7($tp)	# =tp[14]
+	adde	$acc3,$acc3,$a3
+	add	$acc5,$t1,$t1
+	$SHRI	$t1,$t1,$BITS-1
+	or	$acc4,$acc4,$t3
+	$UMULL	$a6,$a7,$a7
+	$UMULH	$a7,$a7,$a7
+	adde	$acc4,$acc4,$a4
+	add	$acc6,$t2,$t2
+	$SHRI	$t2,$t2,$BITS-1
+	or	$acc5,$acc5,$t0
+	$ST	$acc1,$SIZE_T*2($tp)	# tp[9]=
+	 $LD	$acc1,$SIZE_T*13($sp)	# =tp[1]
+	adde	$acc5,$acc5,$a5
+	or	$acc6,$acc6,$t1
+	 $LD	$a0,$SIZE_T*1($np)
+	 $LD	$a1,$SIZE_T*2($np)
+	adde	$acc6,$acc6,$a6
+	 $LD	$a2,$SIZE_T*3($np)
+	 $LD	$a3,$SIZE_T*4($np)
+	adde	$acc7,$a7,$t2
+	 $LD	$a4,$SIZE_T*5($np)
+	 $LD	$a5,$SIZE_T*6($np)
+
+	################################################################
+	# Reduce by 8 limbs per iteration
+	$UMULL	$na0,$n0,$acc0		# t[0]*n0
+	li	$cnt,8
+	$LD	$a6,$SIZE_T*7($np)
+	add	$np_end,$np,$num
+	$LDU	$a7,$SIZE_T*8($np)
+	$ST	$acc2,$SIZE_T*3($tp)	# tp[10]=
+	$LD	$acc2,$SIZE_T*14($sp)
+	$ST	$acc3,$SIZE_T*4($tp)	# tp[11]=
+	$LD	$acc3,$SIZE_T*15($sp)
+	$ST	$acc4,$SIZE_T*5($tp)	# tp[12]=
+	$LD	$acc4,$SIZE_T*16($sp)
+	$ST	$acc5,$SIZE_T*6($tp)	# tp[13]=
+	$LD	$acc5,$SIZE_T*17($sp)
+	$ST	$acc6,$SIZE_T*7($tp)	# tp[14]=
+	$LD	$acc6,$SIZE_T*18($sp)
+	$ST	$acc7,$SIZE_T*8($tp)	# tp[15]=
+	$LD	$acc7,$SIZE_T*19($sp)
+	addi	$tp,$sp,$SIZE_T*11	# &tp[-1]
+	mtctr	$cnt
+	b	.Lsqr8x_reduction
+
+.align	5
+.Lsqr8x_reduction:
+	# (*)	$UMULL	$t0,$a0,$na0	# lo(n[0-7])*lo(t[0]*n0)
+	$UMULL	$t1,$a1,$na0
+	$UMULL	$t2,$a2,$na0
+	$STU	$na0,$SIZE_T($tp)	# put aside t[0]*n0 for tail processing
+	$UMULL	$t3,$a3,$na0
+	# (*)	addc	$acc0,$acc0,$t0
+	addic	$acc0,$acc0,-1		# (*)
+	$UMULL	$t0,$a4,$na0
+	adde	$acc0,$acc1,$t1
+	$UMULL	$t1,$a5,$na0
+	adde	$acc1,$acc2,$t2
+	$UMULL	$t2,$a6,$na0
+	adde	$acc2,$acc3,$t3
+	$UMULL	$t3,$a7,$na0
+	adde	$acc3,$acc4,$t0
+	$UMULH	$t0,$a0,$na0		# hi(n[0-7])*lo(t[0]*n0)
+	adde	$acc4,$acc5,$t1
+	$UMULH	$t1,$a1,$na0
+	adde	$acc5,$acc6,$t2
+	$UMULH	$t2,$a2,$na0
+	adde	$acc6,$acc7,$t3
+	$UMULH	$t3,$a3,$na0
+	addze	$acc7,$zero
+	addc	$acc0,$acc0,$t0
+	$UMULH	$t0,$a4,$na0
+	adde	$acc1,$acc1,$t1
+	$UMULH	$t1,$a5,$na0
+	adde	$acc2,$acc2,$t2
+	$UMULH	$t2,$a6,$na0
+	adde	$acc3,$acc3,$t3
+	$UMULH	$t3,$a7,$na0
+	$UMULL	$na0,$n0,$acc0		# next t[0]*n0
+	adde	$acc4,$acc4,$t0
+	adde	$acc5,$acc5,$t1
+	adde	$acc6,$acc6,$t2
+	adde	$acc7,$acc7,$t3
+	bdnz	.Lsqr8x_reduction
+
+	$LD	$t0,$SIZE_T*1($tp)
+	$LD	$t1,$SIZE_T*2($tp)
+	$LD	$t2,$SIZE_T*3($tp)
+	$LD	$t3,$SIZE_T*4($tp)
+	subi	$rp,$tp,$SIZE_T*7
+	$UCMP	$np_end,$np		# done yet?
+	addc	$acc0,$acc0,$t0
+	$LD	$t0,$SIZE_T*5($tp)
+	adde	$acc1,$acc1,$t1
+	$LD	$t1,$SIZE_T*6($tp)
+	adde	$acc2,$acc2,$t2
+	$LD	$t2,$SIZE_T*7($tp)
+	adde	$acc3,$acc3,$t3
+	$LD	$t3,$SIZE_T*8($tp)
+	adde	$acc4,$acc4,$t0
+	adde	$acc5,$acc5,$t1
+	adde	$acc6,$acc6,$t2
+	adde	$acc7,$acc7,$t3
+	#addze	$carry,$zero		# moved below
+	beq	.Lsqr8x8_post_condition
+
+	$LD	$n0,$SIZE_T*0($rp)
+	$LD	$a0,$SIZE_T*1($np)
+	$LD	$a1,$SIZE_T*2($np)
+	$LD	$a2,$SIZE_T*3($np)
+	$LD	$a3,$SIZE_T*4($np)
+	$LD	$a4,$SIZE_T*5($np)
+	$LD	$a5,$SIZE_T*6($np)
+	$LD	$a6,$SIZE_T*7($np)
+	$LDU	$a7,$SIZE_T*8($np)
+	li	$cnt,0
+
+.align	5
+.Lsqr8x_tail:
+	$UMULL	$t0,$a0,$n0
+	addze	$carry,$zero		# carry bit, modulo-scheduled
+	$UMULL	$t1,$a1,$n0
+	addi	$cnt,$cnt,$SIZE_T
+	$UMULL	$t2,$a2,$n0
+	andi.	$cnt,$cnt,$SIZE_T*8-1
+	$UMULL	$t3,$a3,$n0
+	addc	$acc0,$acc0,$t0
+	$UMULL	$t0,$a4,$n0
+	adde	$acc1,$acc1,$t1
+	$UMULL	$t1,$a5,$n0
+	adde	$acc2,$acc2,$t2
+	$UMULL	$t2,$a6,$n0
+	adde	$acc3,$acc3,$t3
+	$UMULL	$t3,$a7,$n0
+	adde	$acc4,$acc4,$t0
+	$UMULH	$t0,$a0,$n0
+	adde	$acc5,$acc5,$t1
+	$UMULH	$t1,$a1,$n0
+	adde	$acc6,$acc6,$t2
+	$UMULH	$t2,$a2,$n0
+	adde	$acc7,$acc7,$t3
+	$UMULH	$t3,$a3,$n0
+	addze	$carry,$carry
+	$STU	$acc0,$SIZE_T($tp)
+	addc	$acc0,$acc1,$t0
+	$UMULH	$t0,$a4,$n0
+	adde	$acc1,$acc2,$t1
+	$UMULH	$t1,$a5,$n0
+	adde	$acc2,$acc3,$t2
+	$UMULH	$t2,$a6,$n0
+	adde	$acc3,$acc4,$t3
+	$UMULH	$t3,$a7,$n0
+	$LDX	$n0,$rp,$cnt
+	adde	$acc4,$acc5,$t0
+	adde	$acc5,$acc6,$t1
+	adde	$acc6,$acc7,$t2
+	adde	$acc7,$carry,$t3
+	#addze	$carry,$zero		# moved above
+	bne	.Lsqr8x_tail
+					# note that carry flag is guaranteed
+					# to be zero at this point
+	$LD	$a0,$SIZE_T*1($tp)
+	$POP	$carry,$SIZE_T*10($sp)	# pull top-most carry in case we break
+	$UCMP	$np_end,$np		# done yet?
+	$LD	$a1,$SIZE_T*2($tp)
+	sub	$t2,$np_end,$num	# rewinded np
+	$LD	$a2,$SIZE_T*3($tp)
+	$LD	$a3,$SIZE_T*4($tp)
+	$LD	$a4,$SIZE_T*5($tp)
+	$LD	$a5,$SIZE_T*6($tp)
+	$LD	$a6,$SIZE_T*7($tp)
+	$LD	$a7,$SIZE_T*8($tp)
+	beq	.Lsqr8x_tail_break
+
+	addc	$acc0,$acc0,$a0
+	$LD	$a0,$SIZE_T*1($np)
+	adde	$acc1,$acc1,$a1
+	$LD	$a1,$SIZE_T*2($np)
+	adde	$acc2,$acc2,$a2
+	$LD	$a2,$SIZE_T*3($np)
+	adde	$acc3,$acc3,$a3
+	$LD	$a3,$SIZE_T*4($np)
+	adde	$acc4,$acc4,$a4
+	$LD	$a4,$SIZE_T*5($np)
+	adde	$acc5,$acc5,$a5
+	$LD	$a5,$SIZE_T*6($np)
+	adde	$acc6,$acc6,$a6
+	$LD	$a6,$SIZE_T*7($np)
+	adde	$acc7,$acc7,$a7
+	$LDU	$a7,$SIZE_T*8($np)
+	#addze	$carry,$zero		# moved above
+	b	.Lsqr8x_tail
+
+.align	5
+.Lsqr8x_tail_break:
+	$POP	$n0,$SIZE_T*8($sp)	# pull n0
+	$POP	$t3,$SIZE_T*9($sp)	# &tp[2*num-1]
+	addi	$cnt,$tp,$SIZE_T*8	# end of current t[num] window
+
+	addic	$carry,$carry,-1	# "move" top-most carry to carry bit
+	adde	$t0,$acc0,$a0
+	$LD	$acc0,$SIZE_T*8($rp)
+	$LD	$a0,$SIZE_T*1($t2)	# recall that $t2 is &n[-1]
+	adde	$t1,$acc1,$a1
+	$LD	$acc1,$SIZE_T*9($rp)
+	$LD	$a1,$SIZE_T*2($t2)
+	adde	$acc2,$acc2,$a2
+	$LD	$a2,$SIZE_T*3($t2)
+	adde	$acc3,$acc3,$a3
+	$LD	$a3,$SIZE_T*4($t2)
+	adde	$acc4,$acc4,$a4
+	$LD	$a4,$SIZE_T*5($t2)
+	adde	$acc5,$acc5,$a5
+	$LD	$a5,$SIZE_T*6($t2)
+	adde	$acc6,$acc6,$a6
+	$LD	$a6,$SIZE_T*7($t2)
+	adde	$acc7,$acc7,$a7
+	$LD	$a7,$SIZE_T*8($t2)
+	addi	$np,$t2,$SIZE_T*8
+	addze	$t2,$zero		# top-most carry
+	$UMULL	$na0,$n0,$acc0
+	$ST	$t0,$SIZE_T*1($tp)
+	$UCMP	$cnt,$t3		# did we hit the bottom?
+	$ST	$t1,$SIZE_T*2($tp)
+	li	$cnt,8
+	$ST	$acc2,$SIZE_T*3($tp)
+	$LD	$acc2,$SIZE_T*10($rp)
+	$ST	$acc3,$SIZE_T*4($tp)
+	$LD	$acc3,$SIZE_T*11($rp)
+	$ST	$acc4,$SIZE_T*5($tp)
+	$LD	$acc4,$SIZE_T*12($rp)
+	$ST	$acc5,$SIZE_T*6($tp)
+	$LD	$acc5,$SIZE_T*13($rp)
+	$ST	$acc6,$SIZE_T*7($tp)
+	$LD	$acc6,$SIZE_T*14($rp)
+	$ST	$acc7,$SIZE_T*8($tp)
+	$LD	$acc7,$SIZE_T*15($rp)
+	$PUSH	$t2,$SIZE_T*10($sp)	# off-load top-most carry
+	addi	$tp,$rp,$SIZE_T*7	# slide the window
+	mtctr	$cnt
+	bne	.Lsqr8x_reduction
+
+	################################################################
+	# Final step. We see if result is larger than modulus, and
+	# if it is, subtract the modulus. But comparison implies
+	# subtraction. So we subtract modulus, see if it borrowed,
+	# and conditionally copy original value.
+	$POP	$rp,$SIZE_T*6($sp)	# pull &rp[-1]
+	srwi	$cnt,$num,`log($SIZE_T)/log(2)+3`
+	mr	$n0,$tp			# put tp aside
+	addi	$tp,$tp,$SIZE_T*8
+	subi	$cnt,$cnt,1
+	subfc	$t0,$a0,$acc0
+	subfe	$t1,$a1,$acc1
+	mr	$carry,$t2
+	mr	$ap_end,$rp		# $rp copy
+
+	mtctr	$cnt
+	b	.Lsqr8x_sub
+
+.align	5
+.Lsqr8x_sub:
+	$LD	$a0,$SIZE_T*1($np)
+	$LD	$acc0,$SIZE_T*1($tp)
+	$LD	$a1,$SIZE_T*2($np)
+	$LD	$acc1,$SIZE_T*2($tp)
+	subfe	$t2,$a2,$acc2
+	$LD	$a2,$SIZE_T*3($np)
+	$LD	$acc2,$SIZE_T*3($tp)
+	subfe	$t3,$a3,$acc3
+	$LD	$a3,$SIZE_T*4($np)
+	$LD	$acc3,$SIZE_T*4($tp)
+	$ST	$t0,$SIZE_T*1($rp)
+	subfe	$t0,$a4,$acc4
+	$LD	$a4,$SIZE_T*5($np)
+	$LD	$acc4,$SIZE_T*5($tp)
+	$ST	$t1,$SIZE_T*2($rp)
+	subfe	$t1,$a5,$acc5
+	$LD	$a5,$SIZE_T*6($np)
+	$LD	$acc5,$SIZE_T*6($tp)
+	$ST	$t2,$SIZE_T*3($rp)
+	subfe	$t2,$a6,$acc6
+	$LD	$a6,$SIZE_T*7($np)
+	$LD	$acc6,$SIZE_T*7($tp)
+	$ST	$t3,$SIZE_T*4($rp)
+	subfe	$t3,$a7,$acc7
+	$LDU	$a7,$SIZE_T*8($np)
+	$LDU	$acc7,$SIZE_T*8($tp)
+	$ST	$t0,$SIZE_T*5($rp)
+	subfe	$t0,$a0,$acc0
+	$ST	$t1,$SIZE_T*6($rp)
+	subfe	$t1,$a1,$acc1
+	$ST	$t2,$SIZE_T*7($rp)
+	$STU	$t3,$SIZE_T*8($rp)
+	bdnz	.Lsqr8x_sub
+
+	srwi	$cnt,$num,`log($SIZE_T)/log(2)+2`
+	 $LD	$a0,$SIZE_T*1($ap_end)	# original $rp
+	 $LD	$acc0,$SIZE_T*1($n0)	# original $tp
+	subi	$cnt,$cnt,1
+	 $LD	$a1,$SIZE_T*2($ap_end)
+	 $LD	$acc1,$SIZE_T*2($n0)
+	subfe	$t2,$a2,$acc2
+	 $LD	$a2,$SIZE_T*3($ap_end)
+	 $LD	$acc2,$SIZE_T*3($n0)
+	subfe	$t3,$a3,$acc3
+	 $LD	$a3,$SIZE_T*4($ap_end)
+	 $LDU	$acc3,$SIZE_T*4($n0)
+	$ST	$t0,$SIZE_T*1($rp)
+	subfe	$t0,$a4,$acc4
+	$ST	$t1,$SIZE_T*2($rp)
+	subfe	$t1,$a5,$acc5
+	$ST	$t2,$SIZE_T*3($rp)
+	subfe	$t2,$a6,$acc6
+	$ST	$t3,$SIZE_T*4($rp)
+	subfe	$t3,$a7,$acc7
+	$ST	$t0,$SIZE_T*5($rp)
+	subfe	$carry,$zero,$carry	# did it borrow?
+	$ST	$t1,$SIZE_T*6($rp)
+	$ST	$t2,$SIZE_T*7($rp)
+	$ST	$t3,$SIZE_T*8($rp)
+
+	addi	$tp,$sp,$SIZE_T*11
+	mtctr	$cnt
+
+.Lsqr4x_cond_copy:
+	andc	$a0,$a0,$carry
+	 $ST	$zero,-$SIZE_T*3($n0)	# wipe stack clean
+	and	$acc0,$acc0,$carry
+	 $ST	$zero,-$SIZE_T*2($n0)
+	andc	$a1,$a1,$carry
+	 $ST	$zero,-$SIZE_T*1($n0)
+	and	$acc1,$acc1,$carry
+	 $ST	$zero,-$SIZE_T*0($n0)
+	andc	$a2,$a2,$carry
+	 $ST	$zero,$SIZE_T*1($tp)
+	and	$acc2,$acc2,$carry
+	 $ST	$zero,$SIZE_T*2($tp)
+	andc	$a3,$a3,$carry
+	 $ST	$zero,$SIZE_T*3($tp)
+	and	$acc3,$acc3,$carry
+	 $STU	$zero,$SIZE_T*4($tp)
+	or	$t0,$a0,$acc0
+	$LD	$a0,$SIZE_T*5($ap_end)
+	$LD	$acc0,$SIZE_T*1($n0)
+	or	$t1,$a1,$acc1
+	$LD	$a1,$SIZE_T*6($ap_end)
+	$LD	$acc1,$SIZE_T*2($n0)
+	or	$t2,$a2,$acc2
+	$LD	$a2,$SIZE_T*7($ap_end)
+	$LD	$acc2,$SIZE_T*3($n0)
+	or	$t3,$a3,$acc3
+	$LD	$a3,$SIZE_T*8($ap_end)
+	$LDU	$acc3,$SIZE_T*4($n0)
+	$ST	$t0,$SIZE_T*1($ap_end)
+	$ST	$t1,$SIZE_T*2($ap_end)
+	$ST	$t2,$SIZE_T*3($ap_end)
+	$STU	$t3,$SIZE_T*4($ap_end)
+	bdnz	.Lsqr4x_cond_copy
+
+	$POP	$ap,0($sp)		# pull saved sp
+	andc	$a0,$a0,$carry
+	and	$acc0,$acc0,$carry
+	andc	$a1,$a1,$carry
+	and	$acc1,$acc1,$carry
+	andc	$a2,$a2,$carry
+	and	$acc2,$acc2,$carry
+	andc	$a3,$a3,$carry
+	and	$acc3,$acc3,$carry
+	or	$t0,$a0,$acc0
+	or	$t1,$a1,$acc1
+	or	$t2,$a2,$acc2
+	or	$t3,$a3,$acc3
+	$ST	$t0,$SIZE_T*1($ap_end)
+	$ST	$t1,$SIZE_T*2($ap_end)
+	$ST	$t2,$SIZE_T*3($ap_end)
+	$ST	$t3,$SIZE_T*4($ap_end)
+
+	b	.Lsqr8x_done
+
+.align	5
+.Lsqr8x8_post_condition:
+	$POP	$rp,$SIZE_T*6($sp)	# pull rp
+	$POP	$ap,0($sp)		# pull saved sp
+	addze	$carry,$zero
+
+	# $acc0-7,$carry hold result, $a0-7 hold modulus
+	subfc	$acc0,$a0,$acc0
+	subfe	$acc1,$a1,$acc1
+	 $ST	$zero,$SIZE_T*12($sp)	# wipe stack clean
+	 $ST	$zero,$SIZE_T*13($sp)
+	subfe	$acc2,$a2,$acc2
+	 $ST	$zero,$SIZE_T*14($sp)
+	 $ST	$zero,$SIZE_T*15($sp)
+	subfe	$acc3,$a3,$acc3
+	 $ST	$zero,$SIZE_T*16($sp)
+	 $ST	$zero,$SIZE_T*17($sp)
+	subfe	$acc4,$a4,$acc4
+	 $ST	$zero,$SIZE_T*18($sp)
+	 $ST	$zero,$SIZE_T*19($sp)
+	subfe	$acc5,$a5,$acc5
+	 $ST	$zero,$SIZE_T*20($sp)
+	 $ST	$zero,$SIZE_T*21($sp)
+	subfe	$acc6,$a6,$acc6
+	 $ST	$zero,$SIZE_T*22($sp)
+	 $ST	$zero,$SIZE_T*23($sp)
+	subfe	$acc7,$a7,$acc7
+	 $ST	$zero,$SIZE_T*24($sp)
+	 $ST	$zero,$SIZE_T*25($sp)
+	subfe	$carry,$zero,$carry	# did it borrow?
+	 $ST	$zero,$SIZE_T*26($sp)
+	 $ST	$zero,$SIZE_T*27($sp)
+
+	and	$a0,$a0,$carry
+	and	$a1,$a1,$carry
+	addc	$acc0,$acc0,$a0		# add modulus back if borrowed
+	and	$a2,$a2,$carry
+	adde	$acc1,$acc1,$a1
+	and	$a3,$a3,$carry
+	adde	$acc2,$acc2,$a2
+	and	$a4,$a4,$carry
+	adde	$acc3,$acc3,$a3
+	and	$a5,$a5,$carry
+	adde	$acc4,$acc4,$a4
+	and	$a6,$a6,$carry
+	adde	$acc5,$acc5,$a5
+	and	$a7,$a7,$carry
+	adde	$acc6,$acc6,$a6
+	adde	$acc7,$acc7,$a7
+	$ST	$acc0,$SIZE_T*1($rp)
+	$ST	$acc1,$SIZE_T*2($rp)
+	$ST	$acc2,$SIZE_T*3($rp)
+	$ST	$acc3,$SIZE_T*4($rp)
+	$ST	$acc4,$SIZE_T*5($rp)
+	$ST	$acc5,$SIZE_T*6($rp)
+	$ST	$acc6,$SIZE_T*7($rp)
+	$ST	$acc7,$SIZE_T*8($rp)
+
+.Lsqr8x_done:
+	$PUSH	$zero,$SIZE_T*8($sp)
+	$PUSH	$zero,$SIZE_T*10($sp)
+
+	$POP	r14,-$SIZE_T*18($ap)
+	li	r3,1			# signal "done"
+	$POP	r15,-$SIZE_T*17($ap)
+	$POP	r16,-$SIZE_T*16($ap)
+	$POP	r17,-$SIZE_T*15($ap)
+	$POP	r18,-$SIZE_T*14($ap)
+	$POP	r19,-$SIZE_T*13($ap)
+	$POP	r20,-$SIZE_T*12($ap)
+	$POP	r21,-$SIZE_T*11($ap)
+	$POP	r22,-$SIZE_T*10($ap)
+	$POP	r23,-$SIZE_T*9($ap)
+	$POP	r24,-$SIZE_T*8($ap)
+	$POP	r25,-$SIZE_T*7($ap)
+	$POP	r26,-$SIZE_T*6($ap)
+	$POP	r27,-$SIZE_T*5($ap)
+	$POP	r28,-$SIZE_T*4($ap)
+	$POP	r29,-$SIZE_T*3($ap)
+	$POP	r30,-$SIZE_T*2($ap)
+	$POP	r31,-$SIZE_T*1($ap)
+	mr	$sp,$ap
+	blr
+	.long	0
+	.byte	0,12,4,0x20,0x80,18,6,0
+	.long	0
+.size	__bn_sqr8x_mont,.-__bn_sqr8x_mont
+___
+}
+$code.=<<___;
 .asciz  "Montgomery Multiplication for PPC, CRYPTOGAMS by <appro\@openssl.org>"
 ___
 
diff --git a/deps/openssl/openssl/crypto/bn/asm/ppc.pl b/deps/openssl/openssl/crypto/bn/asm/ppc.pl
index 4ea534a1c7..e37068192f 100644
--- a/deps/openssl/openssl/crypto/bn/asm/ppc.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/ppc.pl
@@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 2004-2016 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2004-2018 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
@@ -38,9 +38,9 @@
 #rsa 2048 bits   0.3036s   0.0085s      3.3    117.1
 #rsa 4096 bits   2.0040s   0.0299s      0.5     33.4
 #dsa  512 bits   0.0087s   0.0106s    114.3     94.5
-#dsa 1024 bits   0.0256s   0.0313s     39.0     32.0	
+#dsa 1024 bits   0.0256s   0.0313s     39.0     32.0
 #
-#	Same bechmark with this assembler code:
+#	Same benchmark with this assembler code:
 #
 #rsa  512 bits   0.0056s   0.0005s    178.6   2049.2
 #rsa 1024 bits   0.0283s   0.0015s     35.3    674.1
@@ -74,7 +74,7 @@
 #rsa 4096 bits   0.3700s   0.0058s      2.7    171.0
 #dsa  512 bits   0.0016s   0.0020s    610.7    507.1
 #dsa 1024 bits   0.0047s   0.0058s    212.5    173.2
-#	
+#
 #	Again, performance increases by at about 75%
 #
 #       Mac OS X, Apple G5 1.8GHz (Note this is 32 bit code)
@@ -101,10 +101,7 @@
 #dsa 2048 bits   0.0061s   0.0075s    163.5    132.8
 #
 #        Performance increase of ~60%
-#
-#	If you have comments or suggestions to improve code send
-#	me a note at schari@us.ibm.com
-#
+#        Based on submission from Suresh N. Chari of IBM
 
 $flavour = shift;
 
@@ -125,7 +122,7 @@ if ($flavour =~ /32/) {
 	$CNTLZ=	"cntlzw";	# count leading zeros
 	$SHL=	"slw";		# shift left
 	$SHR=	"srw";		# unsigned shift right
-	$SHRI=	"srwi";		# unsigned shift right by immediate	
+	$SHRI=	"srwi";		# unsigned shift right by immediate
 	$SHLI=	"slwi";		# shift left by immediate
 	$CLRU=	"clrlwi";	# clear upper bits
 	$INSR=	"insrwi";	# insert right
@@ -149,10 +146,10 @@ if ($flavour =~ /32/) {
 	$CNTLZ=	"cntlzd";	# count leading zeros
 	$SHL=	"sld";		# shift left
 	$SHR=	"srd";		# unsigned shift right
-	$SHRI=	"srdi";		# unsigned shift right by immediate	
+	$SHRI=	"srdi";		# unsigned shift right by immediate
 	$SHLI=	"sldi";		# shift left by immediate
 	$CLRU=	"clrldi";	# clear upper bits
-	$INSR=	"insrdi";	# insert right 
+	$INSR=	"insrdi";	# insert right
 	$ROTL=	"rotldi";	# rotate left by immediate
 	$TR=	"td";		# conditional trap
 } else { die "nonsense $flavour"; }
@@ -189,7 +186,7 @@ $data=<<EOF;
 #	   below.
 #				12/05/03		Suresh Chari
 #			(with lots of help from)        Andy Polyakov
-##	
+##
 #	1. Initial version	10/20/02		Suresh Chari
 #
 #
@@ -202,7 +199,7 @@ $data=<<EOF;
 #		be done in the build process.
 #
 #	Hand optimized assembly code for the following routines
-#	
+#
 #	bn_sqr_comba4
 #	bn_sqr_comba8
 #	bn_mul_comba4
@@ -225,10 +222,10 @@ $data=<<EOF;
 #--------------------------------------------------------------------------
 #
 #	Defines to be used in the assembly code.
-#	
+#
 #.set r0,0	# we use it as storage for value of 0
 #.set SP,1	# preserved
-#.set RTOC,2	# preserved 
+#.set RTOC,2	# preserved
 #.set r3,3	# 1st argument/return value
 #.set r4,4	# 2nd argument/volatile register
 #.set r5,5	# 3rd argument/volatile register
@@ -246,7 +243,7 @@ $data=<<EOF;
 #	        the first . i.e. for example change ".bn_sqr_comba4"
 #		to "bn_sqr_comba4". This should be automatically done
 #		in the build.
-	
+
 	.globl	.bn_sqr_comba4
 	.globl	.bn_sqr_comba8
 	.globl	.bn_mul_comba4
@@ -257,9 +254,9 @@ $data=<<EOF;
 	.globl	.bn_sqr_words
 	.globl	.bn_mul_words
 	.globl	.bn_mul_add_words
-	
+
 # .text section
-	
+
 	.machine	"any"
 
 #
@@ -278,8 +275,8 @@ $data=<<EOF;
 # r3 contains r
 # r4 contains a
 #
-# Freely use registers r5,r6,r7,r8,r9,r10,r11 as follows:	
-# 
+# Freely use registers r5,r6,r7,r8,r9,r10,r11 as follows:
+#
 # r5,r6 are the two BN_ULONGs being multiplied.
 # r7,r8 are the results of the 32x32 giving 64 bit multiply.
 # r9,r10, r11 are the equivalents of c1,c2, c3.
@@ -288,10 +285,10 @@ $data=<<EOF;
 #
 	xor		r0,r0,r0		# set r0 = 0. Used in the addze
 						# instructions below
-	
+
 						#sqr_add_c(a,0,c1,c2,c3)
-	$LD		r5,`0*$BNSZ`(r4)		
-	$UMULL		r9,r5,r5		
+	$LD		r5,`0*$BNSZ`(r4)
+	$UMULL		r9,r5,r5
 	$UMULH		r10,r5,r5		#in first iteration. No need
 						#to add since c1=c2=c3=0.
 						# Note c3(r11) is NOT set to 0
@@ -299,20 +296,20 @@ $data=<<EOF;
 
 	$ST		r9,`0*$BNSZ`(r3)	# r[0]=c1;
 						# sqr_add_c2(a,1,0,c2,c3,c1);
-	$LD		r6,`1*$BNSZ`(r4)		
+	$LD		r6,`1*$BNSZ`(r4)
 	$UMULL		r7,r5,r6
 	$UMULH		r8,r5,r6
-					
+
 	addc		r7,r7,r7		# compute (r7,r8)=2*(r7,r8)
 	adde		r8,r8,r8
 	addze		r9,r0			# catch carry if any.
-						# r9= r0(=0) and carry 
-	
+						# r9= r0(=0) and carry
+
 	addc		r10,r7,r10		# now add to temp result.
-	addze		r11,r8                  # r8 added to r11 which is 0 
+	addze		r11,r8                  # r8 added to r11 which is 0
 	addze		r9,r9
-	
-	$ST		r10,`1*$BNSZ`(r3)	#r[1]=c2; 
+
+	$ST		r10,`1*$BNSZ`(r3)	#r[1]=c2;
 						#sqr_add_c(a,1,c3,c1,c2)
 	$UMULL		r7,r6,r6
 	$UMULH		r8,r6,r6
@@ -323,23 +320,23 @@ $data=<<EOF;
 	$LD		r6,`2*$BNSZ`(r4)
 	$UMULL		r7,r5,r6
 	$UMULH		r8,r5,r6
-	
+
 	addc		r7,r7,r7
 	adde		r8,r8,r8
 	addze		r10,r10
-	
+
 	addc		r11,r7,r11
 	adde		r9,r8,r9
 	addze		r10,r10
-	$ST		r11,`2*$BNSZ`(r3)	#r[2]=c3 
+	$ST		r11,`2*$BNSZ`(r3)	#r[2]=c3
 						#sqr_add_c2(a,3,0,c1,c2,c3);
-	$LD		r6,`3*$BNSZ`(r4)		
+	$LD		r6,`3*$BNSZ`(r4)
 	$UMULL		r7,r5,r6
 	$UMULH		r8,r5,r6
 	addc		r7,r7,r7
 	adde		r8,r8,r8
 	addze		r11,r0
-	
+
 	addc		r9,r7,r9
 	adde		r10,r8,r10
 	addze		r11,r11
@@ -348,7 +345,7 @@ $data=<<EOF;
 	$LD		r6,`2*$BNSZ`(r4)
 	$UMULL		r7,r5,r6
 	$UMULH		r8,r5,r6
-	
+
 	addc		r7,r7,r7
 	adde		r8,r8,r8
 	addze		r11,r11
@@ -363,31 +360,31 @@ $data=<<EOF;
 	adde		r11,r8,r11
 	addze		r9,r0
 						#sqr_add_c2(a,3,1,c2,c3,c1);
-	$LD		r6,`3*$BNSZ`(r4)		
+	$LD		r6,`3*$BNSZ`(r4)
 	$UMULL		r7,r5,r6
 	$UMULH		r8,r5,r6
 	addc		r7,r7,r7
 	adde		r8,r8,r8
 	addze		r9,r9
-	
+
 	addc		r10,r7,r10
 	adde		r11,r8,r11
 	addze		r9,r9
 	$ST		r10,`4*$BNSZ`(r3)	#r[4]=c2
 						#sqr_add_c2(a,3,2,c3,c1,c2);
-	$LD		r5,`2*$BNSZ`(r4)		
+	$LD		r5,`2*$BNSZ`(r4)
 	$UMULL		r7,r5,r6
 	$UMULH		r8,r5,r6
 	addc		r7,r7,r7
 	adde		r8,r8,r8
 	addze		r10,r0
-	
+
 	addc		r11,r7,r11
 	adde		r9,r8,r9
 	addze		r10,r10
 	$ST		r11,`5*$BNSZ`(r3)	#r[5] = c3
 						#sqr_add_c(a,3,c1,c2,c3);
-	$UMULL		r7,r6,r6		
+	$UMULL		r7,r6,r6
 	$UMULH		r8,r6,r6
 	addc		r9,r7,r9
 	adde		r10,r8,r10
@@ -406,7 +403,7 @@ $data=<<EOF;
 #		for the gcc compiler. This should be automatically
 #		done in the build
 #
-	
+
 .align	4
 .bn_sqr_comba8:
 #
@@ -418,15 +415,15 @@ $data=<<EOF;
 # r3 contains r
 # r4 contains a
 #
-# Freely use registers r5,r6,r7,r8,r9,r10,r11 as follows:	
-# 
+# Freely use registers r5,r6,r7,r8,r9,r10,r11 as follows:
+#
 # r5,r6 are the two BN_ULONGs being multiplied.
 # r7,r8 are the results of the 32x32 giving 64 bit multiply.
 # r9,r10, r11 are the equivalents of c1,c2, c3.
 #
 # Possible optimization of loading all 8 longs of a into registers
 # doesn't provide any speedup
-# 
+#
 
 	xor		r0,r0,r0		#set r0 = 0.Used in addze
 						#instructions below.
@@ -439,18 +436,18 @@ $data=<<EOF;
 						#sqr_add_c2(a,1,0,c2,c3,c1);
 	$LD		r6,`1*$BNSZ`(r4)
 	$UMULL		r7,r5,r6
-	$UMULH		r8,r5,r6	
-	
+	$UMULH		r8,r5,r6
+
 	addc		r10,r7,r10		#add the two register number
 	adde		r11,r8,r0 		# (r8,r7) to the three register
 	addze		r9,r0			# number (r9,r11,r10).NOTE:r0=0
-	
+
 	addc		r10,r7,r10		#add the two register number
 	adde		r11,r8,r11 		# (r8,r7) to the three register
 	addze		r9,r9			# number (r9,r11,r10).
-	
+
 	$ST		r10,`1*$BNSZ`(r3)	# r[1]=c2
-				
+
 						#sqr_add_c(a,1,c3,c1,c2);
 	$UMULL		r7,r6,r6
 	$UMULH		r8,r6,r6
@@ -461,25 +458,25 @@ $data=<<EOF;
 	$LD		r6,`2*$BNSZ`(r4)
 	$UMULL		r7,r5,r6
 	$UMULH		r8,r5,r6
-	
+
 	addc		r11,r7,r11
 	adde		r9,r8,r9
 	addze		r10,r10
-	
+
 	addc		r11,r7,r11
 	adde		r9,r8,r9
 	addze		r10,r10
-	
+
 	$ST		r11,`2*$BNSZ`(r3)	#r[2]=c3
 						#sqr_add_c2(a,3,0,c1,c2,c3);
 	$LD		r6,`3*$BNSZ`(r4)	#r6 = a[3]. r5 is already a[0].
 	$UMULL		r7,r5,r6
 	$UMULH		r8,r5,r6
-	
+
 	addc		r9,r7,r9
 	adde		r10,r8,r10
 	addze		r11,r0
-	
+
 	addc		r9,r7,r9
 	adde		r10,r8,r10
 	addze		r11,r11
@@ -488,20 +485,20 @@ $data=<<EOF;
 	$LD		r6,`2*$BNSZ`(r4)
 	$UMULL		r7,r5,r6
 	$UMULH		r8,r5,r6
-	
+
 	addc		r9,r7,r9
 	adde		r10,r8,r10
 	addze		r11,r11
-	
+
 	addc		r9,r7,r9
 	adde		r10,r8,r10
 	addze		r11,r11
-	
+
 	$ST		r9,`3*$BNSZ`(r3)	#r[3]=c1;
 						#sqr_add_c(a,2,c2,c3,c1);
 	$UMULL		r7,r6,r6
 	$UMULH		r8,r6,r6
-	
+
 	addc		r10,r7,r10
 	adde		r11,r8,r11
 	addze		r9,r0
@@ -509,11 +506,11 @@ $data=<<EOF;
 	$LD		r6,`3*$BNSZ`(r4)
 	$UMULL		r7,r5,r6
 	$UMULH		r8,r5,r6
-	
+
 	addc		r10,r7,r10
 	adde		r11,r8,r11
 	addze		r9,r9
-	
+
 	addc		r10,r7,r10
 	adde		r11,r8,r11
 	addze		r9,r9
@@ -522,11 +519,11 @@ $data=<<EOF;
 	$LD		r6,`4*$BNSZ`(r4)
 	$UMULL		r7,r5,r6
 	$UMULH		r8,r5,r6
-	
+
 	addc		r10,r7,r10
 	adde		r11,r8,r11
 	addze		r9,r9
-	
+
 	addc		r10,r7,r10
 	adde		r11,r8,r11
 	addze		r9,r9
@@ -535,11 +532,11 @@ $data=<<EOF;
 	$LD		r6,`5*$BNSZ`(r4)
 	$UMULL		r7,r5,r6
 	$UMULH		r8,r5,r6
-	
+
 	addc		r11,r7,r11
 	adde		r9,r8,r9
 	addze		r10,r0
-	
+
 	addc		r11,r7,r11
 	adde		r9,r8,r9
 	addze		r10,r10
@@ -548,11 +545,11 @@ $data=<<EOF;
 	$LD		r6,`4*$BNSZ`(r4)
 	$UMULL		r7,r5,r6
 	$UMULH		r8,r5,r6
-	
+
 	addc		r11,r7,r11
 	adde		r9,r8,r9
 	addze		r10,r10
-	
+
 	addc		r11,r7,r11
 	adde		r9,r8,r9
 	addze		r10,r10
@@ -561,11 +558,11 @@ $data=<<EOF;
 	$LD		r6,`3*$BNSZ`(r4)
 	$UMULL		r7,r5,r6
 	$UMULH		r8,r5,r6
-	
+
 	addc		r11,r7,r11
 	adde		r9,r8,r9
 	addze		r10,r10
-	
+
 	addc		r11,r7,r11
 	adde		r9,r8,r9
 	addze		r10,r10
@@ -580,11 +577,11 @@ $data=<<EOF;
 	$LD		r6,`4*$BNSZ`(r4)
 	$UMULL		r7,r5,r6
 	$UMULH		r8,r5,r6
-	
+
 	addc		r9,r7,r9
 	adde		r10,r8,r10
 	addze		r11,r11
-	
+
 	addc		r9,r7,r9
 	adde		r10,r8,r10
 	addze		r11,r11
@@ -593,11 +590,11 @@ $data=<<EOF;
 	$LD		r6,`5*$BNSZ`(r4)
 	$UMULL		r7,r5,r6
 	$UMULH		r8,r5,r6
-	
+
 	addc		r9,r7,r9
 	adde		r10,r8,r10
 	addze		r11,r11
-	
+
 	addc		r9,r7,r9
 	adde		r10,r8,r10
 	addze		r11,r11
@@ -617,7 +614,7 @@ $data=<<EOF;
 	$LD		r6,`7*$BNSZ`(r4)
 	$UMULL		r7,r5,r6
 	$UMULH		r8,r5,r6
-	
+
 	addc		r10,r7,r10
 	adde		r11,r8,r11
 	addze		r9,r0
@@ -629,7 +626,7 @@ $data=<<EOF;
 	$LD		r6,`6*$BNSZ`(r4)
 	$UMULL		r7,r5,r6
 	$UMULH		r8,r5,r6
-	
+
 	addc		r10,r7,r10
 	adde		r11,r8,r11
 	addze		r9,r9
@@ -652,7 +649,7 @@ $data=<<EOF;
 	$LD		r6,`4*$BNSZ`(r4)
 	$UMULL		r7,r5,r6
 	$UMULH		r8,r5,r6
-	
+
 	addc		r10,r7,r10
 	adde		r11,r8,r11
 	addze		r9,r9
@@ -684,7 +681,7 @@ $data=<<EOF;
 	addc		r11,r7,r11
 	adde		r9,r8,r9
 	addze		r10,r10
-	
+
 	addc		r11,r7,r11
 	adde		r9,r8,r9
 	addze		r10,r10
@@ -704,7 +701,7 @@ $data=<<EOF;
 	$LD		r5,`2*$BNSZ`(r4)
 	$UMULL		r7,r5,r6
 	$UMULH		r8,r5,r6
-	
+
 	addc		r9,r7,r9
 	adde		r10,r8,r10
 	addze		r11,r0
@@ -801,7 +798,7 @@ $data=<<EOF;
 	adde		r10,r8,r10
 	addze		r11,r11
 	$ST		r9,`12*$BNSZ`(r3)	#r[12]=c1;
-	
+
 						#sqr_add_c2(a,7,6,c2,c3,c1)
 	$LD		r5,`6*$BNSZ`(r4)
 	$UMULL		r7,r5,r6
@@ -850,21 +847,21 @@ $data=<<EOF;
 #
 	xor	r0,r0,r0		#r0=0. Used in addze below.
 					#mul_add_c(a[0],b[0],c1,c2,c3);
-	$LD	r6,`0*$BNSZ`(r4)		
-	$LD	r7,`0*$BNSZ`(r5)		
-	$UMULL	r10,r6,r7		
-	$UMULH	r11,r6,r7		
+	$LD	r6,`0*$BNSZ`(r4)
+	$LD	r7,`0*$BNSZ`(r5)
+	$UMULL	r10,r6,r7
+	$UMULH	r11,r6,r7
 	$ST	r10,`0*$BNSZ`(r3)	#r[0]=c1
 					#mul_add_c(a[0],b[1],c2,c3,c1);
-	$LD	r7,`1*$BNSZ`(r5)		
+	$LD	r7,`1*$BNSZ`(r5)
 	$UMULL	r8,r6,r7
 	$UMULH	r9,r6,r7
 	addc	r11,r8,r11
 	adde	r12,r9,r0
 	addze	r10,r0
 					#mul_add_c(a[1],b[0],c2,c3,c1);
-	$LD	r6, `1*$BNSZ`(r4)		
-	$LD	r7, `0*$BNSZ`(r5)		
+	$LD	r6, `1*$BNSZ`(r4)
+	$LD	r7, `0*$BNSZ`(r5)
 	$UMULL	r8,r6,r7
 	$UMULH	r9,r6,r7
 	addc	r11,r8,r11
@@ -872,23 +869,23 @@ $data=<<EOF;
 	addze	r10,r10
 	$ST	r11,`1*$BNSZ`(r3)	#r[1]=c2
 					#mul_add_c(a[2],b[0],c3,c1,c2);
-	$LD	r6,`2*$BNSZ`(r4)		
+	$LD	r6,`2*$BNSZ`(r4)
 	$UMULL	r8,r6,r7
 	$UMULH	r9,r6,r7
 	addc	r12,r8,r12
 	adde	r10,r9,r10
 	addze	r11,r0
 					#mul_add_c(a[1],b[1],c3,c1,c2);
-	$LD	r6,`1*$BNSZ`(r4)		
-	$LD	r7,`1*$BNSZ`(r5)		
+	$LD	r6,`1*$BNSZ`(r4)
+	$LD	r7,`1*$BNSZ`(r5)
 	$UMULL	r8,r6,r7
 	$UMULH	r9,r6,r7
 	addc	r12,r8,r12
 	adde	r10,r9,r10
 	addze	r11,r11
 					#mul_add_c(a[0],b[2],c3,c1,c2);
-	$LD	r6,`0*$BNSZ`(r4)		
-	$LD	r7,`2*$BNSZ`(r5)		
+	$LD	r6,`0*$BNSZ`(r4)
+	$LD	r7,`2*$BNSZ`(r5)
 	$UMULL	r8,r6,r7
 	$UMULH	r9,r6,r7
 	addc	r12,r8,r12
@@ -896,7 +893,7 @@ $data=<<EOF;
 	addze	r11,r11
 	$ST	r12,`2*$BNSZ`(r3)	#r[2]=c3
 					#mul_add_c(a[0],b[3],c1,c2,c3);
-	$LD	r7,`3*$BNSZ`(r5)		
+	$LD	r7,`3*$BNSZ`(r5)
 	$UMULL	r8,r6,r7
 	$UMULH	r9,r6,r7
 	addc	r10,r8,r10
@@ -928,7 +925,7 @@ $data=<<EOF;
 	addze	r12,r12
 	$ST	r10,`3*$BNSZ`(r3)	#r[3]=c1
 					#mul_add_c(a[3],b[1],c2,c3,c1);
-	$LD	r7,`1*$BNSZ`(r5)		
+	$LD	r7,`1*$BNSZ`(r5)
 	$UMULL	r8,r6,r7
 	$UMULH	r9,r6,r7
 	addc	r11,r8,r11
@@ -952,7 +949,7 @@ $data=<<EOF;
 	addze	r10,r10
 	$ST	r11,`4*$BNSZ`(r3)	#r[4]=c2
 					#mul_add_c(a[2],b[3],c3,c1,c2);
-	$LD	r6,`2*$BNSZ`(r4)		
+	$LD	r6,`2*$BNSZ`(r4)
 	$UMULL	r8,r6,r7
 	$UMULH	r9,r6,r7
 	addc	r12,r8,r12
@@ -968,7 +965,7 @@ $data=<<EOF;
 	addze	r11,r11
 	$ST	r12,`5*$BNSZ`(r3)	#r[5]=c3
 					#mul_add_c(a[3],b[3],c1,c2,c3);
-	$LD	r7,`3*$BNSZ`(r5)		
+	$LD	r7,`3*$BNSZ`(r5)
 	$UMULL	r8,r6,r7
 	$UMULH	r9,r6,r7
 	addc	r10,r8,r10
@@ -988,7 +985,7 @@ $data=<<EOF;
 #		for the gcc compiler. This should be automatically
 #		done in the build
 #
-	
+
 .align	4
 .bn_mul_comba8:
 #
@@ -1003,7 +1000,7 @@ $data=<<EOF;
 # r10, r11, r12 are the equivalents of c1, c2, and c3.
 #
 	xor	r0,r0,r0		#r0=0. Used in addze below.
-	
+
 					#mul_add_c(a[0],b[0],c1,c2,c3);
 	$LD	r6,`0*$BNSZ`(r4)	#a[0]
 	$LD	r7,`0*$BNSZ`(r5)	#b[0]
@@ -1065,7 +1062,7 @@ $data=<<EOF;
 	addc	r10,r10,r8
 	adde	r11,r11,r9
 	addze	r12,r12
-		
+
 					#mul_add_c(a[2],b[1],c1,c2,c3);
 	$LD	r6,`2*$BNSZ`(r4)
 	$LD	r7,`1*$BNSZ`(r5)
@@ -1131,7 +1128,7 @@ $data=<<EOF;
 	adde	r10,r10,r9
 	addze	r11,r0
 					#mul_add_c(a[1],b[4],c3,c1,c2);
-	$LD	r6,`1*$BNSZ`(r4)		
+	$LD	r6,`1*$BNSZ`(r4)
 	$LD	r7,`4*$BNSZ`(r5)
 	$UMULL	r8,r6,r7
 	$UMULH	r9,r6,r7
@@ -1139,7 +1136,7 @@ $data=<<EOF;
 	adde	r10,r10,r9
 	addze	r11,r11
 					#mul_add_c(a[2],b[3],c3,c1,c2);
-	$LD	r6,`2*$BNSZ`(r4)		
+	$LD	r6,`2*$BNSZ`(r4)
 	$LD	r7,`3*$BNSZ`(r5)
 	$UMULL	r8,r6,r7
 	$UMULH	r9,r6,r7
@@ -1147,7 +1144,7 @@ $data=<<EOF;
 	adde	r10,r10,r9
 	addze	r11,r11
 					#mul_add_c(a[3],b[2],c3,c1,c2);
-	$LD	r6,`3*$BNSZ`(r4)		
+	$LD	r6,`3*$BNSZ`(r4)
 	$LD	r7,`2*$BNSZ`(r5)
 	$UMULL	r8,r6,r7
 	$UMULH	r9,r6,r7
@@ -1155,7 +1152,7 @@ $data=<<EOF;
 	adde	r10,r10,r9
 	addze	r11,r11
 					#mul_add_c(a[4],b[1],c3,c1,c2);
-	$LD	r6,`4*$BNSZ`(r4)		
+	$LD	r6,`4*$BNSZ`(r4)
 	$LD	r7,`1*$BNSZ`(r5)
 	$UMULL	r8,r6,r7
 	$UMULH	r9,r6,r7
@@ -1163,7 +1160,7 @@ $data=<<EOF;
 	adde	r10,r10,r9
 	addze	r11,r11
 					#mul_add_c(a[5],b[0],c3,c1,c2);
-	$LD	r6,`5*$BNSZ`(r4)		
+	$LD	r6,`5*$BNSZ`(r4)
 	$LD	r7,`0*$BNSZ`(r5)
 	$UMULL	r8,r6,r7
 	$UMULH	r9,r6,r7
@@ -1555,7 +1552,7 @@ $data=<<EOF;
 	addi	r3,r3,-$BNSZ
 	addi	r5,r5,-$BNSZ
 	mtctr	r6
-Lppcasm_sub_mainloop:	
+Lppcasm_sub_mainloop:
 	$LDU	r7,$BNSZ(r4)
 	$LDU	r8,$BNSZ(r5)
 	subfe	r6,r8,r7	# r6 = r7+carry bit + onescomplement(r8)
@@ -1563,7 +1560,7 @@ Lppcasm_sub_mainloop:
 				# is r7-r8 -1 as we need.
 	$STU	r6,$BNSZ(r3)
 	bdnz	Lppcasm_sub_mainloop
-Lppcasm_sub_adios:	
+Lppcasm_sub_adios:
 	subfze	r3,r0		# if carry bit is set then r3 = 0 else -1
 	andi.	r3,r3,1         # keep only last bit.
 	blr
@@ -1604,13 +1601,13 @@ Lppcasm_sub_adios:
 	addi	r3,r3,-$BNSZ
 	addi	r5,r5,-$BNSZ
 	mtctr	r6
-Lppcasm_add_mainloop:	
+Lppcasm_add_mainloop:
 	$LDU	r7,$BNSZ(r4)
 	$LDU	r8,$BNSZ(r5)
 	adde	r8,r7,r8
 	$STU	r8,$BNSZ(r3)
 	bdnz	Lppcasm_add_mainloop
-Lppcasm_add_adios:	
+Lppcasm_add_adios:
 	addze	r3,r0			#return carry bit.
 	blr
 	.long	0
@@ -1633,11 +1630,11 @@ Lppcasm_add_adios:
 #	the PPC instruction to count leading zeros instead
 #	of call to num_bits_word. Since this was compiled
 #	only at level -O2 we can possibly squeeze it more?
-#	
+#
 #	r3 = h
 #	r4 = l
 #	r5 = d
-	
+
 	$UCMPI	0,r5,0			# compare r5 and 0
 	bne	Lppcasm_div1		# proceed if d!=0
 	li	r3,-1			# d=0 return -1
@@ -1653,7 +1650,7 @@ Lppcasm_div1:
 Lppcasm_div2:
 	$UCMP	0,r3,r5			#h>=d?
 	blt	Lppcasm_div3		#goto Lppcasm_div3 if not
-	subf	r3,r5,r3		#h-=d ; 
+	subf	r3,r5,r3		#h-=d ;
 Lppcasm_div3:				#r7 = BN_BITS2-i. so r7=i
 	cmpi	0,0,r7,0		# is (i == 0)?
 	beq	Lppcasm_div4
@@ -1668,7 +1665,7 @@ Lppcasm_div4:
 					# as it saves registers.
 	li	r6,2			#r6=2
 	mtctr	r6			#counter will be in count.
-Lppcasm_divouterloop: 
+Lppcasm_divouterloop:
 	$SHRI	r8,r3,`$BITS/2`		#r8 = (h>>BN_BITS4)
 	$SHRI	r11,r4,`$BITS/2`	#r11= (l&BN_MASK2h)>>BN_BITS4
 					# compute here for innerloop.
@@ -1676,7 +1673,7 @@ Lppcasm_divouterloop:
 	bne	Lppcasm_div5		# goto Lppcasm_div5 if not
 
 	li	r8,-1
-	$CLRU	r8,r8,`$BITS/2`		#q = BN_MASK2l 
+	$CLRU	r8,r8,`$BITS/2`		#q = BN_MASK2l
 	b	Lppcasm_div6
 Lppcasm_div5:
 	$UDIV	r8,r3,r9		#q = h/dh
@@ -1684,7 +1681,7 @@ Lppcasm_div6:
 	$UMULL	r12,r9,r8		#th = q*dh
 	$CLRU	r10,r5,`$BITS/2`	#r10=dl
 	$UMULL	r6,r8,r10		#tl = q*dl
-	
+
 Lppcasm_divinnerloop:
 	subf	r10,r12,r3		#t = h -th
 	$SHRI	r7,r10,`$BITS/2`	#r7= (t &BN_MASK2H), sort of...
@@ -1761,7 +1758,7 @@ Lppcasm_div9:
 	addi	r4,r4,-$BNSZ
 	addi	r3,r3,-$BNSZ
 	mtctr	r5
-Lppcasm_sqr_mainloop:	
+Lppcasm_sqr_mainloop:
 					#sqr(r[0],r[1],a[0]);
 	$LDU	r6,$BNSZ(r4)
 	$UMULL	r7,r6,r6
@@ -1769,7 +1766,7 @@ Lppcasm_sqr_mainloop:
 	$STU	r7,$BNSZ(r3)
 	$STU	r8,$BNSZ(r3)
 	bdnz	Lppcasm_sqr_mainloop
-Lppcasm_sqr_adios:	
+Lppcasm_sqr_adios:
 	blr
 	.long	0
 	.byte	0,12,0x14,0,0,0,3,0
@@ -1783,7 +1780,7 @@ Lppcasm_sqr_adios:
 #		done in the build
 #
 
-.align	4	
+.align	4
 .bn_mul_words:
 #
 # BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
@@ -1797,7 +1794,7 @@ Lppcasm_sqr_adios:
 	rlwinm.	r7,r5,30,2,31		# num >> 2
 	beq	Lppcasm_mw_REM
 	mtctr	r7
-Lppcasm_mw_LOOP:	
+Lppcasm_mw_LOOP:
 					#mul(rp[0],ap[0],w,c1);
 	$LD	r8,`0*$BNSZ`(r4)
 	$UMULL	r9,r6,r8
@@ -1809,7 +1806,7 @@ Lppcasm_mw_LOOP:
 					#using adde.
 	$ST	r9,`0*$BNSZ`(r3)
 					#mul(rp[1],ap[1],w,c1);
-	$LD	r8,`1*$BNSZ`(r4)	
+	$LD	r8,`1*$BNSZ`(r4)
 	$UMULL	r11,r6,r8
 	$UMULH  r12,r6,r8
 	adde	r11,r11,r10
@@ -1830,7 +1827,7 @@ Lppcasm_mw_LOOP:
 	addze	r12,r12			#this spin we collect carry into
 					#r12
 	$ST	r11,`3*$BNSZ`(r3)
-	
+
 	addi	r3,r3,`4*$BNSZ`
 	addi	r4,r4,`4*$BNSZ`
 	bdnz	Lppcasm_mw_LOOP
@@ -1846,25 +1843,25 @@ Lppcasm_mw_REM:
 	addze	r10,r10
 	$ST	r9,`0*$BNSZ`(r3)
 	addi	r12,r10,0
-	
+
 	addi	r5,r5,-1
 	cmpli	0,0,r5,0
 	beq	Lppcasm_mw_OVER
 
-	
+
 					#mul(rp[1],ap[1],w,c1);
-	$LD	r8,`1*$BNSZ`(r4)	
+	$LD	r8,`1*$BNSZ`(r4)
 	$UMULL	r9,r6,r8
 	$UMULH  r10,r6,r8
 	addc	r9,r9,r12
 	addze	r10,r10
 	$ST	r9,`1*$BNSZ`(r3)
 	addi	r12,r10,0
-	
+
 	addi	r5,r5,-1
 	cmpli	0,0,r5,0
 	beq	Lppcasm_mw_OVER
-	
+
 					#mul_add(rp[2],ap[2],w,c1);
 	$LD	r8,`2*$BNSZ`(r4)
 	$UMULL	r9,r6,r8
@@ -1873,14 +1870,14 @@ Lppcasm_mw_REM:
 	addze	r10,r10
 	$ST	r9,`2*$BNSZ`(r3)
 	addi	r12,r10,0
-		
-Lppcasm_mw_OVER:	
+
+Lppcasm_mw_OVER:
 	addi	r3,r12,0
 	blr
 	.long	0
 	.byte	0,12,0x14,0,0,0,4,0
 	.long	0
-.size	bn_mul_words,.-bn_mul_words
+.size	.bn_mul_words,.-.bn_mul_words
 
 #
 #	NOTE:	The following label name should be changed to
@@ -1902,11 +1899,11 @@ Lppcasm_mw_OVER:
 # empirical evidence suggests that unrolled version performs best!!
 #
 	xor	r0,r0,r0		#r0 = 0
-	xor	r12,r12,r12  		#r12 = 0 . used for carry		
+	xor	r12,r12,r12  		#r12 = 0 . used for carry
 	rlwinm.	r7,r5,30,2,31		# num >> 2
 	beq	Lppcasm_maw_leftover	# if (num < 4) go LPPCASM_maw_leftover
 	mtctr	r7
-Lppcasm_maw_mainloop:	
+Lppcasm_maw_mainloop:
 					#mul_add(rp[0],ap[0],w,c1);
 	$LD	r8,`0*$BNSZ`(r4)
 	$LD	r11,`0*$BNSZ`(r3)
@@ -1922,9 +1919,9 @@ Lppcasm_maw_mainloop:
 					#by multiply and will be collected
 					#in the next spin
 	$ST	r9,`0*$BNSZ`(r3)
-	
+
 					#mul_add(rp[1],ap[1],w,c1);
-	$LD	r8,`1*$BNSZ`(r4)	
+	$LD	r8,`1*$BNSZ`(r4)
 	$LD	r9,`1*$BNSZ`(r3)
 	$UMULL	r11,r6,r8
 	$UMULH  r12,r6,r8
@@ -1933,7 +1930,7 @@ Lppcasm_maw_mainloop:
 	addc	r11,r11,r9
 	#addze	r12,r12
 	$ST	r11,`1*$BNSZ`(r3)
-	
+
 					#mul_add(rp[2],ap[2],w,c1);
 	$LD	r8,`2*$BNSZ`(r4)
 	$UMULL	r9,r6,r8
@@ -1944,7 +1941,7 @@ Lppcasm_maw_mainloop:
 	addc	r9,r9,r11
 	#addze	r10,r10
 	$ST	r9,`2*$BNSZ`(r3)
-	
+
 					#mul_add(rp[3],ap[3],w,c1);
 	$LD	r8,`3*$BNSZ`(r4)
 	$UMULL	r11,r6,r8
@@ -1958,7 +1955,7 @@ Lppcasm_maw_mainloop:
 	addi	r3,r3,`4*$BNSZ`
 	addi	r4,r4,`4*$BNSZ`
 	bdnz	Lppcasm_maw_mainloop
-	
+
 Lppcasm_maw_leftover:
 	andi.	r5,r5,0x3
 	beq	Lppcasm_maw_adios
@@ -1975,10 +1972,10 @@ Lppcasm_maw_leftover:
 	addc	r9,r9,r12
 	addze	r12,r10
 	$ST	r9,0(r3)
-	
+
 	bdz	Lppcasm_maw_adios
 					#mul_add(rp[1],ap[1],w,c1);
-	$LDU	r8,$BNSZ(r4)	
+	$LDU	r8,$BNSZ(r4)
 	$UMULL	r9,r6,r8
 	$UMULH  r10,r6,r8
 	$LDU	r11,$BNSZ(r3)
@@ -1987,7 +1984,7 @@ Lppcasm_maw_leftover:
 	addc	r9,r9,r12
 	addze	r12,r10
 	$ST	r9,0(r3)
-	
+
 	bdz	Lppcasm_maw_adios
 					#mul_add(rp[2],ap[2],w,c1);
 	$LDU	r8,$BNSZ(r4)
@@ -1999,8 +1996,8 @@ Lppcasm_maw_leftover:
 	addc	r9,r9,r12
 	addze	r12,r10
 	$ST	r9,0(r3)
-		
-Lppcasm_maw_adios:	
+
+Lppcasm_maw_adios:
 	addi	r3,r12,0
 	blr
 	.long	0
diff --git a/deps/openssl/openssl/crypto/bn/asm/ppc64-mont.pl b/deps/openssl/openssl/crypto/bn/asm/ppc64-mont.pl
index 5d9f43aa5d..c41b620bc2 100644
--- a/deps/openssl/openssl/crypto/bn/asm/ppc64-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/ppc64-mont.pl
@@ -35,7 +35,7 @@
 # key lengths. As it's obviously inappropriate as "best all-round"
 # alternative, it has to be complemented with run-time CPU family
 # detection. Oh! It should also be noted that unlike other PowerPC
-# implementation IALU ppc-mont.pl module performs *suboptimaly* on
+# implementation IALU ppc-mont.pl module performs *suboptimally* on
 # >=1024-bit key lengths on Power 6. It should also be noted that
 # *everything* said so far applies to 64-bit builds! As far as 32-bit
 # application executed on 64-bit CPU goes, this module is likely to
@@ -1353,7 +1353,7 @@ $code.=<<___;
 	std	$t3,-16($tp)		; tp[j-1]
 	std	$t5,-8($tp)		; tp[j]
 
-	add	$carry,$carry,$ovf	; comsume upmost overflow
+	add	$carry,$carry,$ovf	; consume upmost overflow
 	add	$t6,$t6,$carry		; can not overflow
 	srdi	$carry,$t6,16
 	add	$t7,$t7,$carry
diff --git a/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl b/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl
index 0466e11a25..f1292cc75c 100755
--- a/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl
@@ -1,68 +1,30 @@
 #! /usr/bin/env perl
 # Copyright 2013-2018 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright (c) 2012, Intel Corporation. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
 # in the file LICENSE in the source distribution or at
 # https://www.openssl.org/source/license.html
-
-
-##############################################################################
-#                                                                            #
-#  Copyright (c) 2012, Intel Corporation                                     #
-#                                                                            #
-#  All rights reserved.                                                      #
-#                                                                            #
-#  Redistribution and use in source and binary forms, with or without        #
-#  modification, are permitted provided that the following conditions are    #
-#  met:                                                                      #
-#                                                                            #
-#  *  Redistributions of source code must retain the above copyright         #
-#     notice, this list of conditions and the following disclaimer.          #
-#                                                                            #
-#  *  Redistributions in binary form must reproduce the above copyright      #
-#     notice, this list of conditions and the following disclaimer in the    #
-#     documentation and/or other materials provided with the                 #
-#     distribution.                                                          #
-#                                                                            #
-#  *  Neither the name of the Intel Corporation nor the names of its         #
-#     contributors may be used to endorse or promote products derived from   #
-#     this software without specific prior written permission.               #
-#                                                                            #
-#                                                                            #
-#  THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY          #
-#  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE         #
-#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR        #
-#  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR            #
-#  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,     #
-#  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,       #
-#  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR        #
-#  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF    #
-#  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING      #
-#  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS        #
-#  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.              #
-#                                                                            #
-##############################################################################
-# Developers and authors:                                                    #
-# Shay Gueron (1, 2), and Vlad Krasnov (1)                                   #
-# (1) Intel Corporation, Israel Development Center, Haifa, Israel            #
-# (2) University of Haifa, Israel                                            #
-##############################################################################
-# Reference:                                                                 #
-# [1] S. Gueron, V. Krasnov: "Software Implementation of Modular             #
-#     Exponentiation,  Using Advanced Vector Instructions Architectures",    #
-#     F. Ozbudak and F. Rodriguez-Henriquez (Eds.): WAIFI 2012, LNCS 7369,   #
-#     pp. 119?135, 2012. Springer-Verlag Berlin Heidelberg 2012              #
-# [2] S. Gueron: "Efficient Software Implementations of Modular              #
-#     Exponentiation", Journal of Cryptographic Engineering 2:31-43 (2012).  #
-# [3] S. Gueron, V. Krasnov: "Speeding up Big-numbers Squaring",IEEE         #
-#     Proceedings of 9th International Conference on Information Technology: #
-#     New Generations (ITNG 2012), pp.821-823 (2012)                         #
-# [4] S. Gueron, V. Krasnov: "[PATCH] Efficient and side channel analysis    #
-#     resistant 1024-bit modular exponentiation, for optimizing RSA2048      #
-#     on AVX2 capable x86_64 platforms",                                     #
-#     http://rt.openssl.org/Ticket/Display.html?id=2850&user=guest&pass=guest#
-##############################################################################
+#
+# Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1)
+# (1) Intel Corporation, Israel Development Center, Haifa, Israel
+# (2) University of Haifa, Israel
+#
+# References:
+# [1] S. Gueron, V. Krasnov: "Software Implementation of Modular
+#     Exponentiation,  Using Advanced Vector Instructions Architectures",
+#     F. Ozbudak and F. Rodriguez-Henriquez (Eds.): WAIFI 2012, LNCS 7369,
+#     pp. 119?135, 2012. Springer-Verlag Berlin Heidelberg 2012
+# [2] S. Gueron: "Efficient Software Implementations of Modular
+#     Exponentiation", Journal of Cryptographic Engineering 2:31-43 (2012).
+# [3] S. Gueron, V. Krasnov: "Speeding up Big-numbers Squaring",IEEE
+#     Proceedings of 9th International Conference on Information Technology:
+#     New Generations (ITNG 2012), pp.821-823 (2012)
+# [4] S. Gueron, V. Krasnov: "[PATCH] Efficient and side channel analysis
+#     resistant 1024-bit modular exponentiation, for optimizing RSA2048
+#     on AVX2 capable x86_64 platforms",
+#     http://rt.openssl.org/Ticket/Display.html?id=2850&user=guest&pass=guest
 #
 # +13% improvement over original submission by <appro@openssl.org>
 #
@@ -168,13 +130,21 @@ $code.=<<___;
 .type	rsaz_1024_sqr_avx2,\@function,5
 .align	64
 rsaz_1024_sqr_avx2:		# 702 cycles, 14% faster than rsaz_1024_mul_avx2
+.cfi_startproc
 	lea	(%rsp), %rax
+.cfi_def_cfa_register	%rax
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 	vzeroupper
 ___
 $code.=<<___ if ($win64);
@@ -193,6 +163,7 @@ $code.=<<___ if ($win64);
 ___
 $code.=<<___;
 	mov	%rax,%rbp
+.cfi_def_cfa_register	%rbp
 	mov	%rdx, $np			# reassigned argument
 	sub	\$$FrameSize, %rsp
 	mov	$np, $tmp
@@ -382,7 +353,7 @@ $code.=<<___;
 	vpaddq		$TEMP1, $ACC1, $ACC1
 	vpmuludq	32*7-128($aap), $B2, $ACC2
 	 vpbroadcastq	32*5-128($tpa), $B2
-	vpaddq		32*11-448($tp1), $ACC2, $ACC2	
+	vpaddq		32*11-448($tp1), $ACC2, $ACC2
 
 	vmovdqu		$ACC6, 32*6-192($tp0)
 	vmovdqu		$ACC7, 32*7-192($tp0)
@@ -441,7 +412,7 @@ $code.=<<___;
 	vmovdqu		$ACC7, 32*16-448($tp1)
 	lea		8($tp1), $tp1
 
-	dec	$i        
+	dec	$i
 	jnz	.LOOP_SQR_1024
 ___
 $ZERO = $ACC9;
@@ -786,7 +757,7 @@ $code.=<<___;
 	vpblendd	\$3, $TEMP4, $TEMP5, $TEMP4
 	vpaddq		$TEMP3, $ACC7, $ACC7
 	vpaddq		$TEMP4, $ACC8, $ACC8
-     
+
 	vpsrlq		\$29, $ACC4, $TEMP1
 	vpand		$AND_MASK, $ACC4, $ACC4
 	vpsrlq		\$29, $ACC5, $TEMP2
@@ -825,8 +796,10 @@ $code.=<<___;
 
 	vzeroall
 	mov	%rbp, %rax
+.cfi_def_cfa_register	%rax
 ___
 $code.=<<___ if ($win64);
+.Lsqr_1024_in_tail:
 	movaps	-0xd8(%rax),%xmm6
 	movaps	-0xc8(%rax),%xmm7
 	movaps	-0xb8(%rax),%xmm8
@@ -840,14 +813,22 @@ $code.=<<___ if ($win64);
 ___
 $code.=<<___;
 	mov	-48(%rax),%r15
+.cfi_restore	%r15
 	mov	-40(%rax),%r14
+.cfi_restore	%r14
 	mov	-32(%rax),%r13
+.cfi_restore	%r13
 	mov	-24(%rax),%r12
+.cfi_restore	%r12
 	mov	-16(%rax),%rbp
+.cfi_restore	%rbp
 	mov	-8(%rax),%rbx
+.cfi_restore	%rbx
 	lea	(%rax),%rsp		# restore %rsp
+.cfi_def_cfa_register	%rsp
 .Lsqr_1024_epilogue:
 	ret
+.cfi_endproc
 .size	rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2
 ___
 }
@@ -900,13 +881,21 @@ $code.=<<___;
 .type	rsaz_1024_mul_avx2,\@function,5
 .align	64
 rsaz_1024_mul_avx2:
+.cfi_startproc
 	lea	(%rsp), %rax
+.cfi_def_cfa_register	%rax
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 ___
 $code.=<<___ if ($win64);
 	vzeroupper
@@ -925,6 +914,7 @@ $code.=<<___ if ($win64);
 ___
 $code.=<<___;
 	mov	%rax,%rbp
+.cfi_def_cfa_register	%rbp
 	vzeroall
 	mov	%rdx, $bp	# reassigned argument
 	sub	\$64,%rsp
@@ -1450,15 +1440,17 @@ $code.=<<___;
 	vpaddq		$TEMP4, $ACC8, $ACC8
 
 	vmovdqu		$ACC4, 128-128($rp)
-	vmovdqu		$ACC5, 160-128($rp)    
+	vmovdqu		$ACC5, 160-128($rp)
 	vmovdqu		$ACC6, 192-128($rp)
 	vmovdqu		$ACC7, 224-128($rp)
 	vmovdqu		$ACC8, 256-128($rp)
 	vzeroupper
 
 	mov	%rbp, %rax
+.cfi_def_cfa_register	%rax
 ___
 $code.=<<___ if ($win64);
+.Lmul_1024_in_tail:
 	movaps	-0xd8(%rax),%xmm6
 	movaps	-0xc8(%rax),%xmm7
 	movaps	-0xb8(%rax),%xmm8
@@ -1472,14 +1464,22 @@ $code.=<<___ if ($win64);
 ___
 $code.=<<___;
 	mov	-48(%rax),%r15
+.cfi_restore	%r15
 	mov	-40(%rax),%r14
+.cfi_restore	%r14
 	mov	-32(%rax),%r13
+.cfi_restore	%r13
 	mov	-24(%rax),%r12
+.cfi_restore	%r12
 	mov	-16(%rax),%rbp
+.cfi_restore	%rbp
 	mov	-8(%rax),%rbx
+.cfi_restore	%rbx
 	lea	(%rax),%rsp		# restore %rsp
+.cfi_def_cfa_register	%rsp
 .Lmul_1024_epilogue:
 	ret
+.cfi_endproc
 .size	rsaz_1024_mul_avx2,.-rsaz_1024_mul_avx2
 ___
 }
@@ -1598,8 +1598,10 @@ rsaz_1024_scatter5_avx2:
 .type	rsaz_1024_gather5_avx2,\@abi-omnipotent
 .align	32
 rsaz_1024_gather5_avx2:
+.cfi_startproc
 	vzeroupper
 	mov	%rsp,%r11
+.cfi_def_cfa_register	%r11
 ___
 $code.=<<___ if ($win64);
 	lea	-0x88(%rsp),%rax
@@ -1737,11 +1739,13 @@ $code.=<<___ if ($win64);
 	movaps	-0x38(%r11),%xmm13
 	movaps	-0x28(%r11),%xmm14
 	movaps	-0x18(%r11),%xmm15
-.LSEH_end_rsaz_1024_gather5:
 ___
 $code.=<<___;
 	lea	(%r11),%rsp
+.cfi_def_cfa_register	%rsp
 	ret
+.cfi_endproc
+.LSEH_end_rsaz_1024_gather5:
 .size	rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2
 ___
 }
@@ -1814,14 +1818,17 @@ rsaz_se_handler:
 	cmp	%r10,%rbx		# context->Rip<prologue label
 	jb	.Lcommon_seh_tail
 
-	mov	152($context),%rax	# pull context->Rsp
-
 	mov	4(%r11),%r10d		# HandlerData[1]
 	lea	(%rsi,%r10),%r10	# epilogue label
 	cmp	%r10,%rbx		# context->Rip>=epilogue label
 	jae	.Lcommon_seh_tail
 
-	mov	160($context),%rax	# pull context->Rbp
+	mov	160($context),%rbp	# pull context->Rbp
+
+	mov	8(%r11),%r10d		# HandlerData[2]
+	lea	(%rsi,%r10),%r10	# "in tail" label
+	cmp	%r10,%rbx		# context->Rip>="in tail" label
+	cmovc	%rbp,%rax
 
 	mov	-48(%rax),%r15
 	mov	-40(%rax),%r14
@@ -1899,11 +1906,13 @@ rsaz_se_handler:
 .LSEH_info_rsaz_1024_sqr_avx2:
 	.byte	9,0,0,0
 	.rva	rsaz_se_handler
-	.rva	.Lsqr_1024_body,.Lsqr_1024_epilogue
+	.rva	.Lsqr_1024_body,.Lsqr_1024_epilogue,.Lsqr_1024_in_tail
+	.long	0
 .LSEH_info_rsaz_1024_mul_avx2:
 	.byte	9,0,0,0
 	.rva	rsaz_se_handler
-	.rva	.Lmul_1024_body,.Lmul_1024_epilogue
+	.rva	.Lmul_1024_body,.Lmul_1024_epilogue,.Lmul_1024_in_tail
+	.long	0
 .LSEH_info_rsaz_1024_gather5:
 	.byte	0x01,0x36,0x17,0x0b
 	.byte	0x36,0xf8,0x09,0x00	# vmovaps 0x90(rsp),xmm15
diff --git a/deps/openssl/openssl/crypto/bn/asm/rsaz-x86_64.pl b/deps/openssl/openssl/crypto/bn/asm/rsaz-x86_64.pl
index 6f3b664f7a..b1797b649f 100755
--- a/deps/openssl/openssl/crypto/bn/asm/rsaz-x86_64.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/rsaz-x86_64.pl
@@ -1,68 +1,29 @@
 #! /usr/bin/env perl
 # Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright (c) 2012, Intel Corporation. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
 # in the file LICENSE in the source distribution or at
 # https://www.openssl.org/source/license.html
-
-
-##############################################################################
-#                                                                            #
-#  Copyright (c) 2012, Intel Corporation                                     #
-#                                                                            #
-#  All rights reserved.                                                      #
-#                                                                            #
-#  Redistribution and use in source and binary forms, with or without        #
-#  modification, are permitted provided that the following conditions are    #
-#  met:                                                                      #
-#                                                                            #
-#  *  Redistributions of source code must retain the above copyright         #
-#     notice, this list of conditions and the following disclaimer.          #
-#                                                                            #
-#  *  Redistributions in binary form must reproduce the above copyright      #
-#     notice, this list of conditions and the following disclaimer in the    #
-#     documentation and/or other materials provided with the                 #
-#     distribution.                                                          #
-#                                                                            #
-#  *  Neither the name of the Intel Corporation nor the names of its         #
-#     contributors may be used to endorse or promote products derived from   #
-#     this software without specific prior written permission.               #
-#                                                                            #
-#                                                                            #
-#  THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY          #
-#  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE         #
-#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR        #
-#  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR            #
-#  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,     #
-#  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,       #
-#  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR        #
-#  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF    #
-#  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING      #
-#  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS        #
-#  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.              #
-#                                                                            #
-##############################################################################
-# Developers and authors:                                                    #
-# Shay Gueron (1, 2), and Vlad Krasnov (1)                                   #
-# (1) Intel Architecture Group, Microprocessor and Chipset Development,      #
-#     Israel Development Center, Haifa, Israel                               #
-# (2) University of Haifa                                                    #
-##############################################################################
-# Reference:                                                                 #
-# [1] S. Gueron, "Efficient Software Implementations of Modular              #
-#     Exponentiation", http://eprint.iacr.org/2011/239                       #
-# [2] S. Gueron, V. Krasnov. "Speeding up Big-Numbers Squaring".             #
-#     IEEE Proceedings of 9th International Conference on Information        #
-#     Technology: New Generations (ITNG 2012), 821-823 (2012).               #
-# [3] S. Gueron, Efficient Software Implementations of Modular Exponentiation#
-#     Journal of Cryptographic Engineering 2:31-43 (2012).                   #
-# [4] S. Gueron, V. Krasnov: "[PATCH] Efficient and side channel analysis    #
-#     resistant 512-bit and 1024-bit modular exponentiation for optimizing   #
-#     RSA1024 and RSA2048 on x86_64 platforms",                              #
-#     http://rt.openssl.org/Ticket/Display.html?id=2582&user=guest&pass=guest#
-##############################################################################
-
+#
+# Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1)
+# (1) Intel Corporation, Israel Development Center, Haifa, Israel
+# (2) University of Haifa, Israel
+#
+# References:
+# [1] S. Gueron, "Efficient Software Implementations of Modular
+#     Exponentiation", http://eprint.iacr.org/2011/239
+# [2] S. Gueron, V. Krasnov. "Speeding up Big-Numbers Squaring".
+#     IEEE Proceedings of 9th International Conference on Information
+#     Technology: New Generations (ITNG 2012), 821-823 (2012).
+# [3] S. Gueron, Efficient Software Implementations of Modular Exponentiation
+#     Journal of Cryptographic Engineering 2:31-43 (2012).
+# [4] S. Gueron, V. Krasnov: "[PATCH] Efficient and side channel analysis
+#     resistant 512-bit and 1024-bit modular exponentiation for optimizing
+#     RSA1024 and RSA2048 on x86_64 platforms",
+#     http://rt.openssl.org/Ticket/Display.html?id=2582&user=guest&pass=guest
+#
 # While original submission covers 512- and 1024-bit exponentiation,
 # this module is limited to 512-bit version only (and as such
 # accelerates RSA1024 sign). This is because improvement for longer
@@ -138,14 +99,22 @@ $code.=<<___;
 .type	rsaz_512_sqr,\@function,5
 .align	32
 rsaz_512_sqr:				# 25-29% faster than rsaz_512_mul
+.cfi_startproc
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 
 	subq	\$128+24, %rsp
+.cfi_adjust_cfa_offset	128+24
 .Lsqr_body:
 	movq	$mod, %rbp		# common argument
 	movq	($inp), %rdx
@@ -282,9 +251,9 @@ $code.=<<___;
 	movq	%r9, 16(%rsp)
 	movq	%r10, 24(%rsp)
 	shrq	\$63, %rbx
-	
+
 #third iteration
-	movq	16($inp), %r9	
+	movq	16($inp), %r9
 	movq	24($inp), %rax
 	mulq	%r9
 	addq	%rax, %r12
@@ -532,7 +501,7 @@ $code.=<<___;
 	movl	$times,128+8(%rsp)
 	movq	$out, %xmm0		# off-load
 	movq	%rbp, %xmm1		# off-load
-#first iteration	
+#first iteration
 	mulx	%rax, %r8, %r9
 
 	mulx	16($inp), %rcx, %r10
@@ -568,7 +537,7 @@ $code.=<<___;
 	mov	%rax, (%rsp)
 	mov	%r8, 8(%rsp)
 
-#second iteration	
+#second iteration
 	mulx	16($inp), %rax, %rbx
 	adox	%rax, %r10
 	adcx	%rbx, %r11
@@ -607,8 +576,8 @@ $code.=<<___;
 
 	mov	%r9, 16(%rsp)
 	.byte	0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00		# mov	%r10, 24(%rsp)
-	
-#third iteration	
+
+#third iteration
 	.byte	0xc4,0x62,0xc3,0xf6,0x8e,0x18,0x00,0x00,0x00	# mulx	24($inp), $out, %r9
 	adox	$out, %r12
 	adcx	%r9, %r13
@@ -643,8 +612,8 @@ $code.=<<___;
 
 	mov	%r11, 32(%rsp)
 	.byte	0x4c,0x89,0xa4,0x24,0x28,0x00,0x00,0x00		# mov	%r12, 40(%rsp)
-	
-#fourth iteration	
+
+#fourth iteration
 	.byte	0xc4,0xe2,0xfb,0xf6,0x9e,0x20,0x00,0x00,0x00	# mulx	32($inp), %rax, %rbx
 	adox	%rax, %r14
 	adcx	%rbx, %r15
@@ -676,8 +645,8 @@ $code.=<<___;
 
 	mov	%r13, 48(%rsp)
 	mov	%r14, 56(%rsp)
-	
-#fifth iteration	
+
+#fifth iteration
 	.byte	0xc4,0x62,0xc3,0xf6,0x9e,0x28,0x00,0x00,0x00	# mulx	40($inp), $out, %r11
 	adox	$out, %r8
 	adcx	%r11, %r9
@@ -704,8 +673,8 @@ $code.=<<___;
 
 	mov	%r15, 64(%rsp)
 	mov	%r8, 72(%rsp)
-	
-#sixth iteration	
+
+#sixth iteration
 	.byte	0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00	# mulx	48($inp), %rax, %rbx
 	adox	%rax, %r10
 	adcx	%rbx, %r11
@@ -800,15 +769,24 @@ ___
 $code.=<<___;
 
 	leaq	128+24+48(%rsp), %rax
+.cfi_def_cfa	%rax,8
 	movq	-48(%rax), %r15
+.cfi_restore	%r15
 	movq	-40(%rax), %r14
+.cfi_restore	%r14
 	movq	-32(%rax), %r13
+.cfi_restore	%r13
 	movq	-24(%rax), %r12
+.cfi_restore	%r12
 	movq	-16(%rax), %rbp
+.cfi_restore	%rbp
 	movq	-8(%rax), %rbx
+.cfi_restore	%rbx
 	leaq	(%rax), %rsp
+.cfi_def_cfa_register	%rsp
 .Lsqr_epilogue:
 	ret
+.cfi_endproc
 .size	rsaz_512_sqr,.-rsaz_512_sqr
 ___
 }
@@ -819,14 +797,22 @@ $code.=<<___;
 .type	rsaz_512_mul,\@function,5
 .align	32
 rsaz_512_mul:
+.cfi_startproc
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 
 	subq	\$128+24, %rsp
+.cfi_adjust_cfa_offset	128+24
 .Lmul_body:
 	movq	$out, %xmm0		# off-load arguments
 	movq	$mod, %xmm1
@@ -896,15 +882,24 @@ $code.=<<___;
 	call	__rsaz_512_subtract
 
 	leaq	128+24+48(%rsp), %rax
+.cfi_def_cfa	%rax,8
 	movq	-48(%rax), %r15
+.cfi_restore	%r15
 	movq	-40(%rax), %r14
+.cfi_restore	%r14
 	movq	-32(%rax), %r13
+.cfi_restore	%r13
 	movq	-24(%rax), %r12
+.cfi_restore	%r12
 	movq	-16(%rax), %rbp
+.cfi_restore	%rbp
 	movq	-8(%rax), %rbx
+.cfi_restore	%rbx
 	leaq	(%rax), %rsp
+.cfi_def_cfa_register	%rsp
 .Lmul_epilogue:
 	ret
+.cfi_endproc
 .size	rsaz_512_mul,.-rsaz_512_mul
 ___
 }
@@ -915,14 +910,22 @@ $code.=<<___;
 .type	rsaz_512_mul_gather4,\@function,6
 .align	32
 rsaz_512_mul_gather4:
+.cfi_startproc
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 
 	subq	\$`128+24+($win64?0xb0:0)`, %rsp
+.cfi_adjust_cfa_offset	`128+24+($win64?0xb0:0)`
 ___
 $code.=<<___	if ($win64);
 	movaps	%xmm6,0xa0(%rsp)
@@ -1048,7 +1051,7 @@ $code.=<<___;
 	movq	56($ap), %rax
 	movq	%rdx, %r14
 	adcq	\$0, %r14
-	
+
 	mulq	%rbx
 	addq	%rax, %r14
 	 movq	($ap), %rax
@@ -1150,7 +1153,7 @@ $code.=<<___;
 	 movq	($ap), %rax
 	adcq	\$0, %rdx
 	addq	%r15, %r14
-	movq	%rdx, %r15	
+	movq	%rdx, %r15
 	adcq	\$0, %r15
 
 	leaq	8(%rdi), %rdi
@@ -1212,7 +1215,7 @@ $code.=<<___ if ($addx);
 
 	mulx	48($ap), %rbx, %r14
 	adcx	%rax, %r12
-	
+
 	mulx	56($ap), %rax, %r15
 	adcx	%rbx, %r13
 	adcx	%rax, %r14
@@ -1348,15 +1351,24 @@ $code.=<<___	if ($win64);
 	lea	0xb0(%rax),%rax
 ___
 $code.=<<___;
+.cfi_def_cfa	%rax,8
 	movq	-48(%rax), %r15
+.cfi_restore	%r15
 	movq	-40(%rax), %r14
+.cfi_restore	%r14
 	movq	-32(%rax), %r13
+.cfi_restore	%r13
 	movq	-24(%rax), %r12
+.cfi_restore	%r12
 	movq	-16(%rax), %rbp
+.cfi_restore	%rbp
 	movq	-8(%rax), %rbx
+.cfi_restore	%rbx
 	leaq	(%rax), %rsp
+.cfi_def_cfa_register	%rsp
 .Lmul_gather4_epilogue:
 	ret
+.cfi_endproc
 .size	rsaz_512_mul_gather4,.-rsaz_512_mul_gather4
 ___
 }
@@ -1367,15 +1379,23 @@ $code.=<<___;
 .type	rsaz_512_mul_scatter4,\@function,6
 .align	32
 rsaz_512_mul_scatter4:
+.cfi_startproc
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 
 	mov	$pwr, $pwr
 	subq	\$128+24, %rsp
+.cfi_adjust_cfa_offset	128+24
 .Lmul_scatter4_body:
 	leaq	($tbl,$pwr,8), $tbl
 	movq	$out, %xmm0		# off-load arguments
@@ -1411,7 +1431,7 @@ $code.=<<___;
 ___
 $code.=<<___ if ($addx);
 	jmp	.Lmul_scatter_tail
-	
+
 .align	32
 .Lmulx_scatter:
 	movq	($out), %rdx		# pass b[0]
@@ -1458,15 +1478,24 @@ $code.=<<___;
 	movq	%r15, 128*7($inp)
 
 	leaq	128+24+48(%rsp), %rax
+.cfi_def_cfa	%rax,8
 	movq	-48(%rax), %r15
+.cfi_restore	%r15
 	movq	-40(%rax), %r14
+.cfi_restore	%r14
 	movq	-32(%rax), %r13
+.cfi_restore	%r13
 	movq	-24(%rax), %r12
+.cfi_restore	%r12
 	movq	-16(%rax), %rbp
+.cfi_restore	%rbp
 	movq	-8(%rax), %rbx
+.cfi_restore	%rbx
 	leaq	(%rax), %rsp
+.cfi_def_cfa_register	%rsp
 .Lmul_scatter4_epilogue:
 	ret
+.cfi_endproc
 .size	rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4
 ___
 }
@@ -1477,14 +1506,22 @@ $code.=<<___;
 .type	rsaz_512_mul_by_one,\@function,4
 .align	32
 rsaz_512_mul_by_one:
+.cfi_startproc
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 
 	subq	\$128+24, %rsp
+.cfi_adjust_cfa_offset	128+24
 .Lmul_by_one_body:
 ___
 $code.=<<___ if ($addx);
@@ -1539,15 +1576,24 @@ $code.=<<___;
 	movq	%r15, 56($out)
 
 	leaq	128+24+48(%rsp), %rax
+.cfi_def_cfa	%rax,8
 	movq	-48(%rax), %r15
+.cfi_restore	%r15
 	movq	-40(%rax), %r14
+.cfi_restore	%r14
 	movq	-32(%rax), %r13
+.cfi_restore	%r13
 	movq	-24(%rax), %r12
+.cfi_restore	%r12
 	movq	-16(%rax), %rbp
+.cfi_restore	%rbp
 	movq	-8(%rax), %rbx
+.cfi_restore	%rbx
 	leaq	(%rax), %rsp
+.cfi_def_cfa_register	%rsp
 .Lmul_by_one_epilogue:
 	ret
+.cfi_endproc
 .size	rsaz_512_mul_by_one,.-rsaz_512_mul_by_one
 ___
 }
@@ -1824,7 +1870,7 @@ __rsaz_512_mul:
 	movq	56($ap), %rax
 	movq	%rdx, %r14
 	adcq	\$0, %r14
-	
+
 	mulq	%rbx
 	addq	%rax, %r14
 	 movq	($ap), %rax
@@ -1901,7 +1947,7 @@ __rsaz_512_mul:
 	 movq	($ap), %rax
 	adcq	\$0, %rdx
 	addq	%r15, %r14
-	movq	%rdx, %r15	
+	movq	%rdx, %r15
 	adcq	\$0, %r15
 
 	leaq	8(%rdi), %rdi
diff --git a/deps/openssl/openssl/crypto/bn/asm/s390x-gf2m.pl b/deps/openssl/openssl/crypto/bn/asm/s390x-gf2m.pl
index cbd16f4214..06181bf9b9 100644
--- a/deps/openssl/openssl/crypto/bn/asm/s390x-gf2m.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/s390x-gf2m.pl
@@ -20,7 +20,7 @@
 # in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
 # the time being... gcc 4.3 appeared to generate poor code, therefore
 # the effort. And indeed, the module delivers 55%-90%(*) improvement
-# on haviest ECDSA verify and ECDH benchmarks for 163- and 571-bit
+# on heaviest ECDSA verify and ECDH benchmarks for 163- and 571-bit
 # key lengths on z990, 30%-55%(*) - on z10, and 70%-110%(*) - on z196.
 # This is for 64-bit build. In 32-bit "highgprs" case improvement is
 # even higher, for example on z990 it was measured 80%-150%. ECDSA
@@ -198,7 +198,7 @@ $code.=<<___;
 	xgr	$hi,@r[1]
 	xgr	$lo,@r[0]
 	xgr	$hi,@r[2]
-	xgr	$lo,@r[3]	
+	xgr	$lo,@r[3]
 	xgr	$hi,@r[3]
 	xgr	$lo,$hi
 	stg	$hi,16($rp)
diff --git a/deps/openssl/openssl/crypto/bn/asm/s390x-mont.pl b/deps/openssl/openssl/crypto/bn/asm/s390x-mont.pl
index 66780cdf80..c2fc5adffe 100644
--- a/deps/openssl/openssl/crypto/bn/asm/s390x-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/s390x-mont.pl
@@ -8,7 +8,7 @@
 
 
 # ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
diff --git a/deps/openssl/openssl/crypto/bn/asm/sparct4-mont.pl b/deps/openssl/openssl/crypto/bn/asm/sparct4-mont.pl
index 4f339b2279..fcae9cfc5b 100755
--- a/deps/openssl/openssl/crypto/bn/asm/sparct4-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/sparct4-mont.pl
@@ -8,9 +8,9 @@
 
 
 # ====================================================================
-# Written by David S. Miller <davem@devemloft.net> and Andy Polyakov
-# <appro@openssl.org>. The module is licensed under 2-clause BSD
-# license. November 2012. All rights reserved.
+# Written by David S. Miller and Andy Polyakov
+# The module is licensed under 2-clause BSD license.
+# November 2012. All rights reserved.
 # ====================================================================
 
 ######################################################################
diff --git a/deps/openssl/openssl/crypto/bn/asm/sparcv8.S b/deps/openssl/openssl/crypto/bn/asm/sparcv8.S
index 9c31073b24..75d72eb92c 100644
--- a/deps/openssl/openssl/crypto/bn/asm/sparcv8.S
+++ b/deps/openssl/openssl/crypto/bn/asm/sparcv8.S
@@ -1,5 +1,5 @@
 .ident	"sparcv8.s, Version 1.4"
-.ident	"SPARC v8 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
+.ident	"SPARC v8 ISA artwork by Andy Polyakov <appro@openssl.org>"
 
 /*
  * ====================================================================
@@ -13,7 +13,7 @@
  */
 
 /*
- * This is my modest contributon to OpenSSL project (see
+ * This is my modest contribution to OpenSSL project (see
  * http://www.openssl.org/ for more information about it) and is
  * a drop-in SuperSPARC ISA replacement for crypto/bn/bn_asm.c
  * module. For updates see http://fy.chalmers.se/~appro/hpe/.
@@ -159,12 +159,12 @@ bn_mul_add_words:
  */
 bn_mul_words:
 	cmp	%o2,0
-	bg,a	.L_bn_mul_words_proceeed
+	bg,a	.L_bn_mul_words_proceed
 	ld	[%o1],%g2
 	retl
 	clr	%o0
 
-.L_bn_mul_words_proceeed:
+.L_bn_mul_words_proceed:
 	andcc	%o2,-4,%g0
 	bz	.L_bn_mul_words_tail
 	clr	%o5
@@ -251,12 +251,12 @@ bn_mul_words:
  */
 bn_sqr_words:
 	cmp	%o2,0
-	bg,a	.L_bn_sqr_words_proceeed
+	bg,a	.L_bn_sqr_words_proceed
 	ld	[%o1],%g2
 	retl
 	clr	%o0
 
-.L_bn_sqr_words_proceeed:
+.L_bn_sqr_words_proceed:
 	andcc	%o2,-4,%g0
 	bz	.L_bn_sqr_words_tail
 	clr	%o5
diff --git a/deps/openssl/openssl/crypto/bn/asm/sparcv8plus.S b/deps/openssl/openssl/crypto/bn/asm/sparcv8plus.S
index 714a136675..fe4699b2bd 100644
--- a/deps/openssl/openssl/crypto/bn/asm/sparcv8plus.S
+++ b/deps/openssl/openssl/crypto/bn/asm/sparcv8plus.S
@@ -1,5 +1,5 @@
 .ident	"sparcv8plus.s, Version 1.4"
-.ident	"SPARC v9 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
+.ident	"SPARC v9 ISA artwork by Andy Polyakov <appro@openssl.org>"
 
 /*
  * ====================================================================
@@ -13,7 +13,7 @@
  */
 
 /*
- * This is my modest contributon to OpenSSL project (see
+ * This is my modest contribution to OpenSSL project (see
  * http://www.openssl.org/ for more information about it) and is
  * a drop-in UltraSPARC ISA replacement for crypto/bn/bn_asm.c
  * module. For updates see http://fy.chalmers.se/~appro/hpe/.
@@ -144,10 +144,6 @@
  *	    }
  */
 
-#ifdef OPENSSL_FIPSCANISTER
-#include <openssl/fipssyms.h>
-#endif
-
 #if defined(__SUNPRO_C) && defined(__sparcv9)
   /* They've said -xarch=v9 at command line */
   .register	%g2,#scratch
@@ -282,7 +278,7 @@ bn_mul_add_words:
  */
 bn_mul_words:
 	sra	%o2,%g0,%o2	! signx %o2
-	brgz,a	%o2,.L_bn_mul_words_proceeed
+	brgz,a	%o2,.L_bn_mul_words_proceed
 	lduw	[%o1],%g2
 	retl
 	clr	%o0
@@ -290,7 +286,7 @@ bn_mul_words:
 	nop
 	nop
 
-.L_bn_mul_words_proceeed:
+.L_bn_mul_words_proceed:
 	srl	%o3,%g0,%o3	! clruw	%o3
 	andcc	%o2,-4,%g0
 	bz,pn	%icc,.L_bn_mul_words_tail
@@ -370,7 +366,7 @@ bn_mul_words:
  */
 bn_sqr_words:
 	sra	%o2,%g0,%o2	! signx %o2
-	brgz,a	%o2,.L_bn_sqr_words_proceeed
+	brgz,a	%o2,.L_bn_sqr_words_proceed
 	lduw	[%o1],%g2
 	retl
 	clr	%o0
@@ -378,7 +374,7 @@ bn_sqr_words:
 	nop
 	nop
 
-.L_bn_sqr_words_proceeed:
+.L_bn_sqr_words_proceed:
 	andcc	%o2,-4,%g0
 	nop
 	bz,pn	%icc,.L_bn_sqr_words_tail
diff --git a/deps/openssl/openssl/crypto/bn/asm/sparcv9-mont.pl b/deps/openssl/openssl/crypto/bn/asm/sparcv9-mont.pl
index 074f9df14b..b41903af98 100644
--- a/deps/openssl/openssl/crypto/bn/asm/sparcv9-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/sparcv9-mont.pl
@@ -8,7 +8,7 @@
 
 
 # ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
@@ -612,7 +612,7 @@ $code.=<<___;
 	add	$tp,8,$tp
 .type	$fname,#function
 .size	$fname,(.-$fname)
-.asciz	"Montgomery Multipltication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
+.asciz	"Montgomery Multiplication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
 .align	32
 ___
 $code =~ s/\`([^\`]*)\`/eval($1)/gem;
diff --git a/deps/openssl/openssl/crypto/bn/asm/sparcv9a-mont.pl b/deps/openssl/openssl/crypto/bn/asm/sparcv9a-mont.pl
index 50b690653f..c8f759df9f 100755
--- a/deps/openssl/openssl/crypto/bn/asm/sparcv9a-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/sparcv9a-mont.pl
@@ -8,7 +8,7 @@
 
 
 # ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
@@ -865,7 +865,7 @@ $fname:
 	restore
 .type   $fname,#function
 .size	$fname,(.-$fname)
-.asciz	"Montgomery Multipltication for UltraSPARC, CRYPTOGAMS by <appro\@openssl.org>"
+.asciz	"Montgomery Multiplication for UltraSPARC, CRYPTOGAMS by <appro\@openssl.org>"
 .align	32
 ___
 
diff --git a/deps/openssl/openssl/crypto/bn/asm/via-mont.pl b/deps/openssl/openssl/crypto/bn/asm/via-mont.pl
index 9d65a146a2..9cf717e841 100644
--- a/deps/openssl/openssl/crypto/bn/asm/via-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/via-mont.pl
@@ -8,7 +8,7 @@
 
 #
 # ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
@@ -76,7 +76,7 @@
 # dsa 1024 bits 0.001346s 0.001595s    742.7    627.0
 # dsa 2048 bits 0.004745s 0.005582s    210.7    179.1
 #
-# Conclusions: 
+# Conclusions:
 # - VIA SDK leaves a *lot* of room for improvement (which this
 #   implementation successfully fills:-);
 # - 'rep montmul' gives up to >3x performance improvement depending on
@@ -91,7 +91,7 @@ require "x86asm.pl";
 $output = pop;
 open STDOUT,">$output";
 
-&asm_init($ARGV[0],"via-mont.pl");
+&asm_init($ARGV[0]);
 
 # int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
 $func="bn_mul_mont_padlock";
diff --git a/deps/openssl/openssl/crypto/bn/asm/vis3-mont.pl b/deps/openssl/openssl/crypto/bn/asm/vis3-mont.pl
index ba34b36a81..04833a0c87 100644
--- a/deps/openssl/openssl/crypto/bn/asm/vis3-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/vis3-mont.pl
@@ -16,7 +16,7 @@
 
 # October 2012.
 #
-# SPARCv9 VIS3 Montgomery multiplicaion procedure suitable for T3 and
+# SPARCv9 VIS3 Montgomery multiplication procedure suitable for T3 and
 # onward. There are three new instructions used here: umulxhi,
 # addxc[cc] and initializing store. On T3 RSA private key operations
 # are 1.54/1.87/2.11/2.26 times faster for 512/1024/2048/4096-bit key
diff --git a/deps/openssl/openssl/crypto/bn/asm/x86-gf2m.pl b/deps/openssl/openssl/crypto/bn/asm/x86-gf2m.pl
index f464368733..d03efcc750 100644
--- a/deps/openssl/openssl/crypto/bn/asm/x86-gf2m.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/x86-gf2m.pl
@@ -46,7 +46,7 @@ require "x86asm.pl";
 $output = pop;
 open STDOUT,">$output";
 
-&asm_init($ARGV[0],$0,$x86only = $ARGV[$#ARGV] eq "386");
+&asm_init($ARGV[0],$x86only = $ARGV[$#ARGV] eq "386");
 
 $sse2=0;
 for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
@@ -152,7 +152,7 @@ $R="mm0";
 	 &xor	($a4,$a2);		# a2=a4^a2^a4
 	 &mov	(&DWP(5*4,"esp"),$a1);	# a1^a4
 	 &xor	($a4,$a1);		# a1^a2^a4
-	&sar	(@i[1],31);		# broardcast 30th bit
+	&sar	(@i[1],31);		# broadcast 30th bit
 	&and	($lo,$b);
 	 &mov	(&DWP(6*4,"esp"),$a2);	# a2^a4
 	&and	(@i[1],$b);
diff --git a/deps/openssl/openssl/crypto/bn/asm/x86-mont.pl b/deps/openssl/openssl/crypto/bn/asm/x86-mont.pl
index f1abcc5b4c..7ba2133ac9 100755
--- a/deps/openssl/openssl/crypto/bn/asm/x86-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/x86-mont.pl
@@ -8,7 +8,7 @@
 
 
 # ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
@@ -39,8 +39,8 @@ require "x86asm.pl";
 
 $output = pop;
 open STDOUT,">$output";
- 
-&asm_init($ARGV[0],$0);
+
+&asm_init($ARGV[0]);
 
 $sse2=0;
 for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
@@ -78,7 +78,7 @@ $frame=32;				# size of above frame rounded up to 16n
 	&lea	("ebp",&DWP(-$frame,"esp","edi",4));	# future alloca($frame+4*(num+2))
 	&neg	("edi");
 
-	# minimize cache contention by arraning 2K window between stack
+	# minimize cache contention by arranging 2K window between stack
 	# pointer and ap argument [np is also position sensitive vector,
 	# but it's assumed to be near ap, as it's allocated at ~same
 	# time].
diff --git a/deps/openssl/openssl/crypto/bn/asm/x86_64-gcc.c b/deps/openssl/openssl/crypto/bn/asm/x86_64-gcc.c
index 621be33054..31839ba060 100644
--- a/deps/openssl/openssl/crypto/bn/asm/x86_64-gcc.c
+++ b/deps/openssl/openssl/crypto/bn/asm/x86_64-gcc.c
@@ -14,7 +14,7 @@
 /*-
  * x86_64 BIGNUM accelerator version 0.1, December 2002.
  *
- * Implemented by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+ * Implemented by Andy Polyakov <appro@openssl.org> for the OpenSSL
  * project.
  *
  * Rights for redistribution and usage in source and binary forms are
@@ -114,7 +114,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
     BN_ULONG c1 = 0;
 
     if (num <= 0)
-        return (c1);
+        return c1;
 
     while (num & ~3) {
         mul_add(rp[0], ap[0], w, c1);
@@ -136,7 +136,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
         return c1;
     }
 
-    return (c1);
+    return c1;
 }
 
 BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
@@ -144,7 +144,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
     BN_ULONG c1 = 0;
 
     if (num <= 0)
-        return (c1);
+        return c1;
 
     while (num & ~3) {
         mul(rp[0], ap[0], w, c1);
@@ -164,7 +164,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
             return c1;
         mul(rp[2], ap[2], w, c1);
     }
-    return (c1);
+    return c1;
 }
 
 void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
@@ -264,7 +264,7 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
     int c = 0;
 
     if (n <= 0)
-        return ((BN_ULONG)0);
+        return (BN_ULONG)0;
 
     for (;;) {
         t1 = a[0];
@@ -303,7 +303,7 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
         b += 4;
         r += 4;
     }
-    return (c);
+    return c;
 }
 # endif
 
diff --git a/deps/openssl/openssl/crypto/bn/asm/x86_64-gf2m.pl b/deps/openssl/openssl/crypto/bn/asm/x86_64-gf2m.pl
index d962f62033..0fd6e985d7 100644
--- a/deps/openssl/openssl/crypto/bn/asm/x86_64-gf2m.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/x86_64-gf2m.pl
@@ -54,7 +54,9 @@ $code.=<<___;
 .type	_mul_1x1,\@abi-omnipotent
 .align	16
 _mul_1x1:
+.cfi_startproc
 	sub	\$128+8,%rsp
+.cfi_adjust_cfa_offset	128+8
 	mov	\$-1,$a1
 	lea	($a,$a),$i0
 	shr	\$3,$a1
@@ -66,7 +68,7 @@ _mul_1x1:
 	sar	\$63,$i0		# broadcast 62nd bit
 	lea	(,$a1,4),$a4
 	and	$b,$a
-	sar	\$63,$i1		# boardcast 61st bit
+	sar	\$63,$i1		# broadcast 61st bit
 	mov	$a,$hi			# $a is $lo
 	shl	\$63,$lo
 	and	$b,$i0
@@ -160,8 +162,10 @@ $code.=<<___;
 	xor	$i1,$hi
 
 	add	\$128+8,%rsp
+.cfi_adjust_cfa_offset	-128-8
 	ret
 .Lend_mul_1x1:
+.cfi_endproc
 .size	_mul_1x1,.-_mul_1x1
 ___
 
@@ -174,8 +178,10 @@ $code.=<<___;
 .type	bn_GF2m_mul_2x2,\@abi-omnipotent
 .align	16
 bn_GF2m_mul_2x2:
-	mov	OPENSSL_ia32cap_P(%rip),%rax
-	bt	\$33,%rax
+.cfi_startproc
+	mov	%rsp,%rax
+	mov	OPENSSL_ia32cap_P(%rip),%r10
+	bt	\$33,%r10
 	jnc	.Lvanilla_mul_2x2
 
 	movq		$a1,%xmm0
@@ -210,6 +216,7 @@ $code.=<<___;
 .align	16
 .Lvanilla_mul_2x2:
 	lea	-8*17(%rsp),%rsp
+.cfi_adjust_cfa_offset	8*17
 ___
 $code.=<<___ if ($win64);
 	mov	`8*17+40`(%rsp),$b0
@@ -218,10 +225,15 @@ $code.=<<___ if ($win64);
 ___
 $code.=<<___;
 	mov	%r14,8*10(%rsp)
+.cfi_rel_offset	%r14,8*10
 	mov	%r13,8*11(%rsp)
+.cfi_rel_offset	%r13,8*11
 	mov	%r12,8*12(%rsp)
+.cfi_rel_offset	%r12,8*12
 	mov	%rbp,8*13(%rsp)
+.cfi_rel_offset	%rbp,8*13
 	mov	%rbx,8*14(%rsp)
+.cfi_rel_offset	%rbx,8*14
 .Lbody_mul_2x2:
 	mov	$rp,32(%rsp)		# save the arguments
 	mov	$a1,40(%rsp)
@@ -269,10 +281,15 @@ $code.=<<___;
 	mov	$lo,8(%rbp)
 
 	mov	8*10(%rsp),%r14
+.cfi_restore	%r14
 	mov	8*11(%rsp),%r13
+.cfi_restore	%r13
 	mov	8*12(%rsp),%r12
+.cfi_restore	%r12
 	mov	8*13(%rsp),%rbp
+.cfi_restore	%rbp
 	mov	8*14(%rsp),%rbx
+.cfi_restore	%rbx
 ___
 $code.=<<___ if ($win64);
 	mov	8*15(%rsp),%rdi
@@ -280,8 +297,11 @@ $code.=<<___ if ($win64);
 ___
 $code.=<<___;
 	lea	8*17(%rsp),%rsp
+.cfi_adjust_cfa_offset	-8*17
+.Lepilogue_mul_2x2:
 	ret
 .Lend_mul_2x2:
+.cfi_endproc
 .size	bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
 .asciz	"GF(2^m) Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
 .align	16
@@ -312,13 +332,19 @@ se_handler:
 	pushfq
 	sub	\$64,%rsp
 
-	mov	152($context),%rax	# pull context->Rsp
+	mov	120($context),%rax	# pull context->Rax
 	mov	248($context),%rbx	# pull context->Rip
 
 	lea	.Lbody_mul_2x2(%rip),%r10
 	cmp	%r10,%rbx		# context->Rip<"prologue" label
 	jb	.Lin_prologue
 
+	mov	152($context),%rax	# pull context->Rsp
+
+	lea	.Lepilogue_mul_2x2(%rip),%r10
+	cmp	%r10,%rbx		# context->Rip>="epilogue" label
+	jae	.Lin_prologue
+
 	mov	8*10(%rax),%r14		# mimic epilogue
 	mov	8*11(%rax),%r13
 	mov	8*12(%rax),%r12
@@ -335,8 +361,9 @@ se_handler:
 	mov	%r13,224($context)	# restore context->R13
 	mov	%r14,232($context)	# restore context->R14
 
-.Lin_prologue:
 	lea	8*17(%rax),%rax
+
+.Lin_prologue:
 	mov	%rax,152($context)	# restore context->Rsp
 
 	mov	40($disp),%rdi		# disp->ContextRecord
diff --git a/deps/openssl/openssl/crypto/bn/asm/x86_64-mont.pl b/deps/openssl/openssl/crypto/bn/asm/x86_64-mont.pl
index 8d2fb2cebb..c051135e30 100755
--- a/deps/openssl/openssl/crypto/bn/asm/x86_64-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/x86_64-mont.pl
@@ -104,8 +104,10 @@ $code=<<___;
 .type	bn_mul_mont,\@function,6
 .align	16
 bn_mul_mont:
+.cfi_startproc
 	mov	${num}d,${num}d
 	mov	%rsp,%rax
+.cfi_def_cfa_register	%rax
 	test	\$3,${num}d
 	jnz	.Lmul_enter
 	cmp	\$8,${num}d
@@ -124,11 +126,17 @@ $code.=<<___;
 .align	16
 .Lmul_enter:
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 
 	neg	$num
 	mov	%rsp,%r11
@@ -161,6 +169,7 @@ $code.=<<___;
 .Lmul_page_walk_done:
 
 	mov	%rax,8(%rsp,$num,8)	# tp[num+1]=%rsp
+.cfi_cfa_expression	%rsp+8,$num,8,mul,plus,deref,+8
 .Lmul_body:
 	mov	$bp,%r12		# reassign $bp
 ___
@@ -309,7 +318,7 @@ $code.=<<___;
 	mov	%rax,($rp,$i,8)		# rp[i]=tp[i]-np[i]
 	mov	8(%rsp,$i,8),%rax	# tp[i+1]
 	lea	1($i),$i		# i++
-	dec	$j			# doesnn't affect CF!
+	dec	$j			# doesn't affect CF!
 	jnz	.Lsub
 
 	sbb	\$0,%rax		# handle upmost overflow bit
@@ -331,16 +340,25 @@ $code.=<<___;
 	jnz	.Lcopy
 
 	mov	8(%rsp,$num,8),%rsi	# restore %rsp
+.cfi_def_cfa	%rsi,8
 	mov	\$1,%rax
 	mov	-48(%rsi),%r15
+.cfi_restore	%r15
 	mov	-40(%rsi),%r14
+.cfi_restore	%r14
 	mov	-32(%rsi),%r13
+.cfi_restore	%r13
 	mov	-24(%rsi),%r12
+.cfi_restore	%r12
 	mov	-16(%rsi),%rbp
+.cfi_restore	%rbp
 	mov	-8(%rsi),%rbx
+.cfi_restore	%rbx
 	lea	(%rsi),%rsp
+.cfi_def_cfa_register	%rsp
 .Lmul_epilogue:
 	ret
+.cfi_endproc
 .size	bn_mul_mont,.-bn_mul_mont
 ___
 {{{
@@ -350,8 +368,10 @@ $code.=<<___;
 .type	bn_mul4x_mont,\@function,6
 .align	16
 bn_mul4x_mont:
+.cfi_startproc
 	mov	${num}d,${num}d
 	mov	%rsp,%rax
+.cfi_def_cfa_register	%rax
 .Lmul4x_enter:
 ___
 $code.=<<___ if ($addx);
@@ -361,11 +381,17 @@ $code.=<<___ if ($addx);
 ___
 $code.=<<___;
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 
 	neg	$num
 	mov	%rsp,%r11
@@ -389,6 +415,7 @@ $code.=<<___;
 .Lmul4x_page_walk_done:
 
 	mov	%rax,8(%rsp,$num,8)	# tp[num+1]=%rsp
+.cfi_cfa_expression	%rsp+8,$num,8,mul,plus,deref,+8
 .Lmul4x_body:
 	mov	$rp,16(%rsp,$num,8)	# tp[num+2]=$rp
 	mov	%rdx,%r12		# reassign $bp
@@ -721,7 +748,7 @@ $code.=<<___;
 	mov	56($ap,$i,8),@ri[3]
 	sbb	40($np,$i,8),@ri[1]
 	lea	4($i),$i		# i++
-	dec	$j			# doesnn't affect CF!
+	dec	$j			# doesn't affect CF!
 	jnz	.Lsub4x
 
 	mov	@ri[0],0($rp,$i,8)	# rp[i]=tp[i]-np[i]
@@ -766,16 +793,25 @@ ___
 }
 $code.=<<___;
 	mov	8(%rsp,$num,8),%rsi	# restore %rsp
+.cfi_def_cfa	%rsi, 8
 	mov	\$1,%rax
 	mov	-48(%rsi),%r15
+.cfi_restore	%r15
 	mov	-40(%rsi),%r14
+.cfi_restore	%r14
 	mov	-32(%rsi),%r13
+.cfi_restore	%r13
 	mov	-24(%rsi),%r12
+.cfi_restore	%r12
 	mov	-16(%rsi),%rbp
+.cfi_restore	%rbp
 	mov	-8(%rsi),%rbx
+.cfi_restore	%rbx
 	lea	(%rsi),%rsp
+.cfi_def_cfa_register	%rsp
 .Lmul4x_epilogue:
 	ret
+.cfi_endproc
 .size	bn_mul4x_mont,.-bn_mul4x_mont
 ___
 }}}
@@ -803,14 +839,22 @@ $code.=<<___;
 .type	bn_sqr8x_mont,\@function,6
 .align	32
 bn_sqr8x_mont:
+.cfi_startproc
 	mov	%rsp,%rax
+.cfi_def_cfa_register	%rax
 .Lsqr8x_enter:
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 .Lsqr8x_prologue:
 
 	mov	${num}d,%r10d
@@ -866,6 +910,7 @@ bn_sqr8x_mont:
 
 	mov	$n0,  32(%rsp)
 	mov	%rax, 40(%rsp)		# save original %rsp
+.cfi_cfa_expression	%rsp+40,deref,+8
 .Lsqr8x_body:
 
 	movq	$nptr, %xmm2		# save pointer to modulus
@@ -935,6 +980,7 @@ $code.=<<___;
 	pxor	%xmm0,%xmm0
 	pshufd	\$0,%xmm1,%xmm1
 	mov	40(%rsp),%rsi		# restore %rsp
+.cfi_def_cfa	%rsi,8
 	jmp	.Lsqr8x_cond_copy
 
 .align	32
@@ -964,14 +1010,22 @@ $code.=<<___;
 
 	mov	\$1,%rax
 	mov	-48(%rsi),%r15
+.cfi_restore	%r15
 	mov	-40(%rsi),%r14
+.cfi_restore	%r14
 	mov	-32(%rsi),%r13
+.cfi_restore	%r13
 	mov	-24(%rsi),%r12
+.cfi_restore	%r12
 	mov	-16(%rsi),%rbp
+.cfi_restore	%rbp
 	mov	-8(%rsi),%rbx
+.cfi_restore	%rbx
 	lea	(%rsi),%rsp
+.cfi_def_cfa_register	%rsp
 .Lsqr8x_epilogue:
 	ret
+.cfi_endproc
 .size	bn_sqr8x_mont,.-bn_sqr8x_mont
 ___
 }}}
@@ -983,14 +1037,22 @@ $code.=<<___;
 .type	bn_mulx4x_mont,\@function,6
 .align	32
 bn_mulx4x_mont:
+.cfi_startproc
 	mov	%rsp,%rax
+.cfi_def_cfa_register	%rax
 .Lmulx4x_enter:
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 .Lmulx4x_prologue:
 
 	shl	\$3,${num}d		# convert $num to bytes
@@ -1036,6 +1098,7 @@ bn_mulx4x_mont:
 	mov	$n0, 24(%rsp)		# save *n0
 	mov	$rp, 32(%rsp)		# save $rp
 	mov	%rax,40(%rsp)		# save original %rsp
+.cfi_cfa_expression	%rsp+40,deref,+8
 	mov	$num,48(%rsp)		# inner counter
 	jmp	.Lmulx4x_body
 
@@ -1285,6 +1348,7 @@ $code.=<<___;
 	pxor	%xmm0,%xmm0
 	pshufd	\$0,%xmm1,%xmm1
 	mov	40(%rsp),%rsi		# restore %rsp
+.cfi_def_cfa	%rsi,8
 	jmp	.Lmulx4x_cond_copy
 
 .align	32
@@ -1314,14 +1378,22 @@ $code.=<<___;
 
 	mov	\$1,%rax
 	mov	-48(%rsi),%r15
+.cfi_restore	%r15
 	mov	-40(%rsi),%r14
+.cfi_restore	%r14
 	mov	-32(%rsi),%r13
+.cfi_restore	%r13
 	mov	-24(%rsi),%r12
+.cfi_restore	%r12
 	mov	-16(%rsi),%rbp
+.cfi_restore	%rbp
 	mov	-8(%rsi),%rbx
+.cfi_restore	%rbx
 	lea	(%rsi),%rsp
+.cfi_def_cfa_register	%rsp
 .Lmulx4x_epilogue:
 	ret
+.cfi_endproc
 .size	bn_mulx4x_mont,.-bn_mulx4x_mont
 ___
 }}}
@@ -1400,12 +1472,12 @@ sqr_handler:
 
 	mov	0(%r11),%r10d		# HandlerData[0]
 	lea	(%rsi,%r10),%r10	# end of prologue label
-	cmp	%r10,%rbx		# context->Rip<.Lsqr_body
+	cmp	%r10,%rbx		# context->Rip<.Lsqr_prologue
 	jb	.Lcommon_seh_tail
 
 	mov	4(%r11),%r10d		# HandlerData[1]
 	lea	(%rsi,%r10),%r10	# body label
-	cmp	%r10,%rbx		# context->Rip>=.Lsqr_epilogue
+	cmp	%r10,%rbx		# context->Rip<.Lsqr_body
 	jb	.Lcommon_pop_regs
 
 	mov	152($context),%rax	# pull context->Rsp
diff --git a/deps/openssl/openssl/crypto/bn/asm/x86_64-mont5.pl b/deps/openssl/openssl/crypto/bn/asm/x86_64-mont5.pl
index 97d8eee700..ad6e8ada3c 100755
--- a/deps/openssl/openssl/crypto/bn/asm/x86_64-mont5.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/x86_64-mont5.pl
@@ -93,8 +93,10 @@ $code=<<___;
 .type	bn_mul_mont_gather5,\@function,6
 .align	64
 bn_mul_mont_gather5:
+.cfi_startproc
 	mov	${num}d,${num}d
 	mov	%rsp,%rax
+.cfi_def_cfa_register	%rax
 	test	\$7,${num}d
 	jnz	.Lmul_enter
 ___
@@ -108,11 +110,17 @@ $code.=<<___;
 .Lmul_enter:
 	movd	`($win64?56:8)`(%rsp),%xmm5	# load 7th argument
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 
 	neg	$num
 	mov	%rsp,%r11
@@ -145,6 +153,7 @@ $code.=<<___;
 
 	lea	.Linc(%rip),%r10
 	mov	%rax,8(%rsp,$num,8)	# tp[num+1]=%rsp
+.cfi_cfa_expression	%rsp+8,$num,8,mul,plus,deref,+8
 .Lmul_body:
 
 	lea	128($bp),%r12		# reassign $bp (+size optimization)
@@ -410,7 +419,7 @@ $code.=<<___;
 	mov	%rax,($rp,$i,8)		# rp[i]=tp[i]-np[i]
 	mov	8($ap,$i,8),%rax	# tp[i+1]
 	lea	1($i),$i		# i++
-	dec	$j			# doesnn't affect CF!
+	dec	$j			# doesn't affect CF!
 	jnz	.Lsub
 
 	sbb	\$0,%rax		# handle upmost overflow bit
@@ -432,17 +441,26 @@ $code.=<<___;
 	jnz	.Lcopy
 
 	mov	8(%rsp,$num,8),%rsi	# restore %rsp
+.cfi_def_cfa	%rsi,8
 	mov	\$1,%rax
 
 	mov	-48(%rsi),%r15
+.cfi_restore	%r15
 	mov	-40(%rsi),%r14
+.cfi_restore	%r14
 	mov	-32(%rsi),%r13
+.cfi_restore	%r13
 	mov	-24(%rsi),%r12
+.cfi_restore	%r12
 	mov	-16(%rsi),%rbp
+.cfi_restore	%rbp
 	mov	-8(%rsi),%rbx
+.cfi_restore	%rbx
 	lea	(%rsi),%rsp
+.cfi_def_cfa_register	%rsp
 .Lmul_epilogue:
 	ret
+.cfi_endproc
 .size	bn_mul_mont_gather5,.-bn_mul_mont_gather5
 ___
 {{{
@@ -452,8 +470,10 @@ $code.=<<___;
 .type	bn_mul4x_mont_gather5,\@function,6
 .align	32
 bn_mul4x_mont_gather5:
+.cfi_startproc
 	.byte	0x67
 	mov	%rsp,%rax
+.cfi_def_cfa_register	%rax
 .Lmul4x_enter:
 ___
 $code.=<<___ if ($addx);
@@ -463,11 +483,17 @@ $code.=<<___ if ($addx);
 ___
 $code.=<<___;
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 .Lmul4x_prologue:
 
 	.byte	0x67
@@ -523,22 +549,32 @@ $code.=<<___;
 	neg	$num
 
 	mov	%rax,40(%rsp)
+.cfi_cfa_expression	%rsp+40,deref,+8
 .Lmul4x_body:
 
 	call	mul4x_internal
 
 	mov	40(%rsp),%rsi		# restore %rsp
+.cfi_def_cfa	%rsi,8
 	mov	\$1,%rax
 
 	mov	-48(%rsi),%r15
+.cfi_restore	%r15
 	mov	-40(%rsi),%r14
+.cfi_restore	%r14
 	mov	-32(%rsi),%r13
+.cfi_restore	%r13
 	mov	-24(%rsi),%r12
+.cfi_restore	%r12
 	mov	-16(%rsi),%rbp
+.cfi_restore	%rbp
 	mov	-8(%rsi),%rbx
+.cfi_restore	%rbx
 	lea	(%rsi),%rsp
+.cfi_def_cfa_register	%rsp
 .Lmul4x_epilogue:
 	ret
+.cfi_endproc
 .size	bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
 
 .type	mul4x_internal,\@abi-omnipotent
@@ -1050,7 +1086,7 @@ my $bptr="%rdx";	# const void *table,
 my $nptr="%rcx";	# const BN_ULONG *nptr,
 my $n0  ="%r8";		# const BN_ULONG *n0);
 my $num ="%r9";		# int num, has to be divisible by 8
-			# int pwr 
+			# int pwr
 
 my ($i,$j,$tptr)=("%rbp","%rcx",$rptr);
 my @A0=("%r10","%r11");
@@ -1062,7 +1098,9 @@ $code.=<<___;
 .type	bn_power5,\@function,6
 .align	32
 bn_power5:
+.cfi_startproc
 	mov	%rsp,%rax
+.cfi_def_cfa_register	%rax
 ___
 $code.=<<___ if ($addx);
 	mov	OPENSSL_ia32cap_P+8(%rip),%r11d
@@ -1072,11 +1110,17 @@ $code.=<<___ if ($addx);
 ___
 $code.=<<___;
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 .Lpower5_prologue:
 
 	shl	\$3,${num}d		# convert $num to bytes
@@ -1127,7 +1171,7 @@ $code.=<<___;
 	ja	.Lpwr_page_walk
 .Lpwr_page_walk_done:
 
-	mov	$num,%r10	
+	mov	$num,%r10
 	neg	$num
 
 	##############################################################
@@ -1141,6 +1185,7 @@ $code.=<<___;
 	#
 	mov	$n0,  32(%rsp)
 	mov	%rax, 40(%rsp)		# save original %rsp
+.cfi_cfa_expression	%rsp+40,deref,+8
 .Lpower5_body:
 	movq	$rptr,%xmm1		# save $rptr, used in sqr8x
 	movq	$nptr,%xmm2		# save $nptr
@@ -1167,16 +1212,25 @@ $code.=<<___;
 	call	mul4x_internal
 
 	mov	40(%rsp),%rsi		# restore %rsp
+.cfi_def_cfa	%rsi,8
 	mov	\$1,%rax
 	mov	-48(%rsi),%r15
+.cfi_restore	%r15
 	mov	-40(%rsi),%r14
+.cfi_restore	%r14
 	mov	-32(%rsi),%r13
+.cfi_restore	%r13
 	mov	-24(%rsi),%r12
+.cfi_restore	%r12
 	mov	-16(%rsi),%rbp
+.cfi_restore	%rbp
 	mov	-8(%rsi),%rbx
+.cfi_restore	%rbx
 	lea	(%rsi),%rsp
+.cfi_def_cfa_register	%rsp
 .Lpower5_epilogue:
 	ret
+.cfi_endproc
 .size	bn_power5,.-bn_power5
 
 .globl	bn_sqr8x_internal
@@ -2036,7 +2090,7 @@ __bn_post4x_internal:
 	jnz	.Lsqr4x_sub
 
 	mov	$num,%r10		# prepare for back-to-back call
-	neg	$num			# restore $num	
+	neg	$num			# restore $num
 	ret
 .size	__bn_post4x_internal,.-__bn_post4x_internal
 ___
@@ -2056,14 +2110,22 @@ bn_from_montgomery:
 .type	bn_from_mont8x,\@function,6
 .align	32
 bn_from_mont8x:
+.cfi_startproc
 	.byte	0x67
 	mov	%rsp,%rax
+.cfi_def_cfa_register	%rax
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 .Lfrom_prologue:
 
 	shl	\$3,${num}d		# convert $num to bytes
@@ -2128,6 +2190,7 @@ bn_from_mont8x:
 	#
 	mov	$n0,  32(%rsp)
 	mov	%rax, 40(%rsp)		# save original %rsp
+.cfi_cfa_expression	%rsp+40,deref,+8
 .Lfrom_body:
 	mov	$num,%r11
 	lea	48(%rsp),%rax
@@ -2171,7 +2234,6 @@ $code.=<<___ if ($addx);
 
 	pxor	%xmm0,%xmm0
 	lea	48(%rsp),%rax
-	mov	40(%rsp),%rsi		# restore %rsp
 	jmp	.Lfrom_mont_zero
 
 .align	32
@@ -2183,11 +2245,12 @@ $code.=<<___;
 
 	pxor	%xmm0,%xmm0
 	lea	48(%rsp),%rax
-	mov	40(%rsp),%rsi		# restore %rsp
 	jmp	.Lfrom_mont_zero
 
 .align	32
 .Lfrom_mont_zero:
+	mov	40(%rsp),%rsi		# restore %rsp
+.cfi_def_cfa	%rsi,8
 	movdqa	%xmm0,16*0(%rax)
 	movdqa	%xmm0,16*1(%rax)
 	movdqa	%xmm0,16*2(%rax)
@@ -2198,14 +2261,22 @@ $code.=<<___;
 
 	mov	\$1,%rax
 	mov	-48(%rsi),%r15
+.cfi_restore	%r15
 	mov	-40(%rsi),%r14
+.cfi_restore	%r14
 	mov	-32(%rsi),%r13
+.cfi_restore	%r13
 	mov	-24(%rsi),%r12
+.cfi_restore	%r12
 	mov	-16(%rsi),%rbp
+.cfi_restore	%rbp
 	mov	-8(%rsi),%rbx
+.cfi_restore	%rbx
 	lea	(%rsi),%rsp
+.cfi_def_cfa_register	%rsp
 .Lfrom_epilogue:
 	ret
+.cfi_endproc
 .size	bn_from_mont8x,.-bn_from_mont8x
 ___
 }
@@ -2218,14 +2289,22 @@ $code.=<<___;
 .type	bn_mulx4x_mont_gather5,\@function,6
 .align	32
 bn_mulx4x_mont_gather5:
+.cfi_startproc
 	mov	%rsp,%rax
+.cfi_def_cfa_register	%rax
 .Lmulx4x_enter:
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 .Lmulx4x_prologue:
 
 	shl	\$3,${num}d		# convert $num to bytes
@@ -2259,7 +2338,7 @@ bn_mulx4x_mont_gather5:
 	mov	\$0,%r10
 	cmovc	%r10,%r11
 	sub	%r11,%rbp
-.Lmulx4xsp_done:	
+.Lmulx4xsp_done:
 	and	\$-64,%rbp		# ensure alignment
 	mov	%rsp,%r11
 	sub	%rbp,%r11
@@ -2291,21 +2370,31 @@ bn_mulx4x_mont_gather5:
 	#
 	mov	$n0, 32(%rsp)		# save *n0
 	mov	%rax,40(%rsp)		# save original %rsp
+.cfi_cfa_expression	%rsp+40,deref,+8
 .Lmulx4x_body:
 	call	mulx4x_internal
 
 	mov	40(%rsp),%rsi		# restore %rsp
+.cfi_def_cfa	%rsi,8
 	mov	\$1,%rax
 
 	mov	-48(%rsi),%r15
+.cfi_restore	%r15
 	mov	-40(%rsi),%r14
+.cfi_restore	%r14
 	mov	-32(%rsi),%r13
+.cfi_restore	%r13
 	mov	-24(%rsi),%r12
+.cfi_restore	%r12
 	mov	-16(%rsi),%rbp
+.cfi_restore	%rbp
 	mov	-8(%rsi),%rbx
+.cfi_restore	%rbx
 	lea	(%rsi),%rsp
+.cfi_def_cfa_register	%rsp
 .Lmulx4x_epilogue:
 	ret
+.cfi_endproc
 .size	bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5
 
 .type	mulx4x_internal,\@abi-omnipotent
@@ -2333,7 +2422,7 @@ my $N=$STRIDE/4;		# should match cache line size
 $code.=<<___;
 	movdqa	0(%rax),%xmm0		# 00000001000000010000000000000000
 	movdqa	16(%rax),%xmm1		# 00000002000000020000000200000002
-	lea	88-112(%rsp,%r10),%r10	# place the mask after tp[num+1] (+ICache optimizaton)
+	lea	88-112(%rsp,%r10),%r10	# place the mask after tp[num+1] (+ICache optimization)
 	lea	128($bp),$bptr		# size optimization
 
 	pshufd	\$0,%xmm5,%xmm5		# broadcast index
@@ -2683,14 +2772,22 @@ $code.=<<___;
 .type	bn_powerx5,\@function,6
 .align	32
 bn_powerx5:
+.cfi_startproc
 	mov	%rsp,%rax
+.cfi_def_cfa_register	%rax
 .Lpowerx5_enter:
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 .Lpowerx5_prologue:
 
 	shl	\$3,${num}d		# convert $num to bytes
@@ -2741,7 +2838,7 @@ bn_powerx5:
 	ja	.Lpwrx_page_walk
 .Lpwrx_page_walk_done:
 
-	mov	$num,%r10	
+	mov	$num,%r10
 	neg	$num
 
 	##############################################################
@@ -2762,6 +2859,7 @@ bn_powerx5:
 	movq	$bptr,%xmm4
 	mov	$n0,  32(%rsp)
 	mov	%rax, 40(%rsp)		# save original %rsp
+.cfi_cfa_expression	%rsp+40,deref,+8
 .Lpowerx5_body:
 
 	call	__bn_sqrx8x_internal
@@ -2784,17 +2882,26 @@ bn_powerx5:
 	call	mulx4x_internal
 
 	mov	40(%rsp),%rsi		# restore %rsp
+.cfi_def_cfa	%rsi,8
 	mov	\$1,%rax
 
 	mov	-48(%rsi),%r15
+.cfi_restore	%r15
 	mov	-40(%rsi),%r14
+.cfi_restore	%r14
 	mov	-32(%rsi),%r13
+.cfi_restore	%r13
 	mov	-24(%rsi),%r12
+.cfi_restore	%r12
 	mov	-16(%rsi),%rbp
+.cfi_restore	%rbp
 	mov	-8(%rsi),%rbx
+.cfi_restore	%rbx
 	lea	(%rsi),%rsp
+.cfi_def_cfa_register	%rsp
 .Lpowerx5_epilogue:
 	ret
+.cfi_endproc
 .size	bn_powerx5,.-bn_powerx5
 
 .globl	bn_sqrx8x_internal
@@ -3678,8 +3785,8 @@ mul_handler:
 	jb	.Lcommon_seh_tail
 
 	mov	4(%r11),%r10d		# HandlerData[1]
-	lea	(%rsi,%r10),%r10	# epilogue label
-	cmp	%r10,%rbx		# context->Rip>=epilogue label
+	lea	(%rsi,%r10),%r10	# beginning of body label
+	cmp	%r10,%rbx		# context->Rip<body label
 	jb	.Lcommon_pop_regs
 
 	mov	152($context),%rax	# pull context->Rsp
diff --git a/deps/openssl/openssl/crypto/bn/bn_add.c b/deps/openssl/openssl/crypto/bn/bn_add.c
index 7cdefa77a1..f2736b8f6d 100644
--- a/deps/openssl/openssl/crypto/bn/bn_add.c
+++ b/deps/openssl/openssl/crypto/bn/bn_add.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -10,51 +10,69 @@
 #include "internal/cryptlib.h"
 #include "bn_lcl.h"
 
-/* r can == a or b */
+/* signed add of b to a. */
 int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
 {
-    int a_neg = a->neg, ret;
+    int ret, r_neg, cmp_res;
 
     bn_check_top(a);
     bn_check_top(b);
 
-    /*-
-     *  a +  b      a+b
-     *  a + -b      a-b
-     * -a +  b      b-a
-     * -a + -b      -(a+b)
-     */
-    if (a_neg ^ b->neg) {
-        /* only one is negative */
-        if (a_neg) {
-            const BIGNUM *tmp;
-
-            tmp = a;
-            a = b;
-            b = tmp;
+    if (a->neg == b->neg) {
+        r_neg = a->neg;
+        ret = BN_uadd(r, a, b);
+    } else {
+        cmp_res = BN_ucmp(a, b);
+        if (cmp_res > 0) {
+            r_neg = a->neg;
+            ret = BN_usub(r, a, b);
+        } else if (cmp_res < 0) {
+            r_neg = b->neg;
+            ret = BN_usub(r, b, a);
+        } else {
+            r_neg = 0;
+            BN_zero(r);
+            ret = 1;
         }
+    }
+
+    r->neg = r_neg;
+    bn_check_top(r);
+    return ret;
+}
+
+/* signed sub of b from a. */
+int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
+{
+    int ret, r_neg, cmp_res;
 
-        /* we are now a - b */
+    bn_check_top(a);
+    bn_check_top(b);
 
-        if (BN_ucmp(a, b) < 0) {
-            if (!BN_usub(r, b, a))
-                return 0;
-            r->neg = 1;
+    if (a->neg != b->neg) {
+        r_neg = a->neg;
+        ret = BN_uadd(r, a, b);
+    } else {
+        cmp_res = BN_ucmp(a, b);
+        if (cmp_res > 0) {
+            r_neg = a->neg;
+            ret = BN_usub(r, a, b);
+        } else if (cmp_res < 0) {
+            r_neg = !b->neg;
+            ret = BN_usub(r, b, a);
         } else {
-            if (!BN_usub(r, a, b))
-                return 0;
-            r->neg = 0;
+            r_neg = 0;
+            BN_zero(r);
+            ret = 1;
         }
-        return 1;
     }
 
-    ret = BN_uadd(r, a, b);
-    r->neg = a_neg;
+    r->neg = r_neg;
     bn_check_top(r);
     return ret;
 }
 
-/* unsigned add of b to a */
+/* unsigned add of b to a, r can be equal to a or b. */
 int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
 {
     int max, min, dif;
@@ -151,59 +169,3 @@ int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
     return 1;
 }
 
-int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
-{
-    int max;
-    int add = 0, neg = 0;
-
-    bn_check_top(a);
-    bn_check_top(b);
-
-    /*-
-     *  a -  b      a-b
-     *  a - -b      a+b
-     * -a -  b      -(a+b)
-     * -a - -b      b-a
-     */
-    if (a->neg) {
-        if (b->neg) {
-            const BIGNUM *tmp;
-
-            tmp = a;
-            a = b;
-            b = tmp;
-        } else {
-            add = 1;
-            neg = 1;
-        }
-    } else {
-        if (b->neg) {
-            add = 1;
-            neg = 0;
-        }
-    }
-
-    if (add) {
-        if (!BN_uadd(r, a, b))
-            return 0;
-        r->neg = neg;
-        return 1;
-    }
-
-    /* We are actually doing a - b :-) */
-
-    max = (a->top > b->top) ? a->top : b->top;
-    if (bn_wexpand(r, max) == NULL)
-        return 0;
-    if (BN_ucmp(a, b) < 0) {
-        if (!BN_usub(r, b, a))
-            return 0;
-        r->neg = 1;
-    } else {
-        if (!BN_usub(r, a, b))
-            return 0;
-        r->neg = 0;
-    }
-    bn_check_top(r);
-    return 1;
-}
diff --git a/deps/openssl/openssl/crypto/bn/bn_asm.c b/deps/openssl/openssl/crypto/bn/bn_asm.c
index 39c6c2134b..729b2480ac 100644
--- a/deps/openssl/openssl/crypto/bn/bn_asm.c
+++ b/deps/openssl/openssl/crypto/bn/bn_asm.c
@@ -21,7 +21,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
 
     assert(num >= 0);
     if (num <= 0)
-        return (c1);
+        return c1;
 
 # ifndef OPENSSL_SMALL_FOOTPRINT
     while (num & ~3) {
@@ -41,7 +41,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
         num--;
     }
 
-    return (c1);
+    return c1;
 }
 
 BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
@@ -50,7 +50,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
 
     assert(num >= 0);
     if (num <= 0)
-        return (c1);
+        return c1;
 
 # ifndef OPENSSL_SMALL_FOOTPRINT
     while (num & ~3) {
@@ -69,7 +69,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
         rp++;
         num--;
     }
-    return (c1);
+    return c1;
 }
 
 void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
@@ -108,7 +108,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
 
     assert(num >= 0);
     if (num <= 0)
-        return ((BN_ULONG)0);
+        return (BN_ULONG)0;
 
     bl = LBITS(w);
     bh = HBITS(w);
@@ -130,7 +130,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
         rp++;
         num--;
     }
-    return (c);
+    return c;
 }
 
 BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
@@ -140,7 +140,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
 
     assert(num >= 0);
     if (num <= 0)
-        return ((BN_ULONG)0);
+        return (BN_ULONG)0;
 
     bl = LBITS(w);
     bh = HBITS(w);
@@ -162,7 +162,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
         rp++;
         num--;
     }
-    return (carry);
+    return carry;
 }
 
 void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
@@ -210,7 +210,7 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
     int i, count = 2;
 
     if (d == 0)
-        return (BN_MASK2);
+        return BN_MASK2;
 
     i = BN_num_bits_word(d);
     assert((i == BN_BITS2) || (h <= (BN_ULONG)1 << i));
@@ -264,7 +264,7 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
         l = (l & BN_MASK2l) << BN_BITS4;
     }
     ret |= q;
-    return (ret);
+    return ret;
 }
 #endif                          /* !defined(BN_LLONG) && defined(BN_DIV2W) */
 
@@ -276,7 +276,7 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
 
     assert(n >= 0);
     if (n <= 0)
-        return ((BN_ULONG)0);
+        return (BN_ULONG)0;
 
 # ifndef OPENSSL_SMALL_FOOTPRINT
     while (n & ~3) {
@@ -307,7 +307,7 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
         r++;
         n--;
     }
-    return ((BN_ULONG)ll);
+    return (BN_ULONG)ll;
 }
 #else                           /* !BN_LLONG */
 BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
@@ -317,7 +317,7 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
 
     assert(n >= 0);
     if (n <= 0)
-        return ((BN_ULONG)0);
+        return (BN_ULONG)0;
 
     c = 0;
 # ifndef OPENSSL_SMALL_FOOTPRINT
@@ -364,7 +364,7 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
         r++;
         n--;
     }
-    return ((BN_ULONG)c);
+    return (BN_ULONG)c;
 }
 #endif                          /* !BN_LLONG */
 
@@ -376,7 +376,7 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
 
     assert(n >= 0);
     if (n <= 0)
-        return ((BN_ULONG)0);
+        return (BN_ULONG)0;
 
 #ifndef OPENSSL_SMALL_FOOTPRINT
     while (n & ~3) {
@@ -417,7 +417,7 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
         r++;
         n--;
     }
-    return (c);
+    return c;
 }
 
 #if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT)
diff --git a/deps/openssl/openssl/crypto/bn/bn_blind.c b/deps/openssl/openssl/crypto/bn/bn_blind.c
index 9474e21e4c..450cdfb348 100644
--- a/deps/openssl/openssl/crypto/bn/bn_blind.c
+++ b/deps/openssl/openssl/crypto/bn/bn_blind.c
@@ -82,7 +82,6 @@ void BN_BLINDING_free(BN_BLINDING *r)
 {
     if (r == NULL)
         return;
-
     BN_free(r->A);
     BN_free(r->Ai);
     BN_free(r->e);
@@ -124,7 +123,7 @@ int BN_BLINDING_update(BN_BLINDING *b, BN_CTX *ctx)
  err:
     if (b->counter == BN_BLINDING_COUNTER)
         b->counter = 0;
-    return (ret);
+    return ret;
 }
 
 int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx)
@@ -140,14 +139,14 @@ int BN_BLINDING_convert_ex(BIGNUM *n, BIGNUM *r, BN_BLINDING *b, BN_CTX *ctx)
 
     if ((b->A == NULL) || (b->Ai == NULL)) {
         BNerr(BN_F_BN_BLINDING_CONVERT_EX, BN_R_NOT_INITIALIZED);
-        return (0);
+        return 0;
     }
 
     if (b->counter == -1)
         /* Fresh blinding, doesn't need updating. */
         b->counter = 0;
     else if (!BN_BLINDING_update(b, ctx))
-        return (0);
+        return 0;
 
     if (r != NULL && (BN_copy(r, b->Ai) == NULL))
         return 0;
@@ -198,7 +197,7 @@ int BN_BLINDING_invert_ex(BIGNUM *n, const BIGNUM *r, BN_BLINDING *b,
     }
 
     bn_check_top(n);
-    return (ret);
+    return ret;
 }
 
 int BN_BLINDING_is_current_thread(BN_BLINDING *b)
@@ -271,7 +270,7 @@ BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b,
 
     do {
         int rv;
-        if (!BN_rand_range(ret->A, ret->mod))
+        if (!BN_priv_rand_range(ret->A, ret->mod))
             goto err;
         if (int_bn_mod_inverse(ret->Ai, ret->A, ret->mod, ctx, &rv))
             break;
diff --git a/deps/openssl/openssl/crypto/bn/bn_ctx.c b/deps/openssl/openssl/crypto/bn/bn_ctx.c
index 68c0468743..aa08b31a34 100644
--- a/deps/openssl/openssl/crypto/bn/bn_ctx.c
+++ b/deps/openssl/openssl/crypto/bn/bn_ctx.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2000-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2000-2018 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -255,9 +255,12 @@ static int BN_STACK_push(BN_STACK *st, unsigned int idx)
         /* Need to expand */
         unsigned int newsize =
             st->size ? (st->size * 3 / 2) : BN_CTX_START_FRAMES;
-        unsigned int *newitems = OPENSSL_malloc(sizeof(*newitems) * newsize);
-        if (newitems == NULL)
+        unsigned int *newitems;
+        
+        if ((newitems = OPENSSL_malloc(sizeof(*newitems) * newsize)) == NULL) {
+            BNerr(BN_F_BN_STACK_PUSH, ERR_R_MALLOC_FAILURE);
             return 0;
+        }
         if (st->depth)
             memcpy(newitems, st->indexes, sizeof(*newitems) * st->depth);
         OPENSSL_free(st->indexes);
@@ -306,9 +309,12 @@ static BIGNUM *BN_POOL_get(BN_POOL *p, int flag)
 
     /* Full; allocate a new pool item and link it in. */
     if (p->used == p->size) {
-        BN_POOL_ITEM *item = OPENSSL_malloc(sizeof(*item));
-        if (item == NULL)
+        BN_POOL_ITEM *item;
+        
+        if ((item = OPENSSL_malloc(sizeof(*item))) == NULL) {
+            BNerr(BN_F_BN_POOL_GET, ERR_R_MALLOC_FAILURE);
             return NULL;
+        }
         for (loop = 0, bn = item->vals; loop++ < BN_CTX_POOL_SIZE; bn++) {
             bn_init(bn);
             if ((flag & BN_FLG_SECURE) != 0)
diff --git a/deps/openssl/openssl/crypto/bn/bn_dh.c b/deps/openssl/openssl/crypto/bn/bn_dh.c
index 17d05597b3..38acdee234 100644
--- a/deps/openssl/openssl/crypto/bn/bn_dh.c
+++ b/deps/openssl/openssl/crypto/bn/bn_dh.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2014-2017 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -8,7 +8,7 @@
  */
 
 #include "bn_lcl.h"
-#include "e_os.h"
+#include "internal/nelem.h"
 
 #ifndef OPENSSL_NO_DH
 #include <openssl/dh.h>
@@ -104,6 +104,146 @@ static const BN_ULONG dh2048_256_q[] = {
     0x8CF83642A709A097ULL
 };
 
+/* Primes from RFC 7919 */
+static const BN_ULONG ffdhe2048_p[] = {
+    0xFFFFFFFFFFFFFFFFULL, 0x886B423861285C97ULL, 0xC6F34A26C1B2EFFAULL,
+    0xC58EF1837D1683B2ULL, 0x3BB5FCBC2EC22005ULL, 0xC3FE3B1B4C6FAD73ULL,
+    0x8E4F1232EEF28183ULL, 0x9172FE9CE98583FFULL, 0xC03404CD28342F61ULL,
+    0x9E02FCE1CDF7E2ECULL, 0x0B07A7C8EE0A6D70ULL, 0xAE56EDE76372BB19ULL,
+    0x1D4F42A3DE394DF4ULL, 0xB96ADAB760D7F468ULL, 0xD108A94BB2C8E3FBULL,
+    0xBC0AB182B324FB61ULL, 0x30ACCA4F483A797AULL, 0x1DF158A136ADE735ULL,
+    0xE2A689DAF3EFE872ULL, 0x984F0C70E0E68B77ULL, 0xB557135E7F57C935ULL,
+    0x856365553DED1AF3ULL, 0x2433F51F5F066ED0ULL, 0xD3DF1ED5D5FD6561ULL,
+    0xF681B202AEC4617AULL, 0x7D2FE363630C75D8ULL, 0xCC939DCE249B3EF9ULL,
+    0xA9E13641146433FBULL, 0xD8B9C583CE2D3695ULL, 0xAFDC5620273D3CF1ULL,
+    0xADF85458A2BB4A9AULL, 0xFFFFFFFFFFFFFFFFULL
+};
+
+static const BN_ULONG ffdhe3072_p[] = {
+    0xFFFFFFFFFFFFFFFFULL, 0x25E41D2B66C62E37ULL, 0x3C1B20EE3FD59D7CULL,
+    0x0ABCD06BFA53DDEFULL, 0x1DBF9A42D5C4484EULL, 0xABC521979B0DEADAULL,
+    0xE86D2BC522363A0DULL, 0x5CAE82AB9C9DF69EULL, 0x64F2E21E71F54BFFULL,
+    0xF4FD4452E2D74DD3ULL, 0xB4130C93BC437944ULL, 0xAEFE130985139270ULL,
+    0x598CB0FAC186D91CULL, 0x7AD91D2691F7F7EEULL, 0x61B46FC9D6E6C907ULL,
+    0xBC34F4DEF99C0238ULL, 0xDE355B3B6519035BULL, 0x886B4238611FCFDCULL,
+    0xC6F34A26C1B2EFFAULL, 0xC58EF1837D1683B2ULL, 0x3BB5FCBC2EC22005ULL,
+    0xC3FE3B1B4C6FAD73ULL, 0x8E4F1232EEF28183ULL, 0x9172FE9CE98583FFULL,
+    0xC03404CD28342F61ULL, 0x9E02FCE1CDF7E2ECULL, 0x0B07A7C8EE0A6D70ULL,
+    0xAE56EDE76372BB19ULL, 0x1D4F42A3DE394DF4ULL, 0xB96ADAB760D7F468ULL,
+    0xD108A94BB2C8E3FBULL, 0xBC0AB182B324FB61ULL, 0x30ACCA4F483A797AULL,
+    0x1DF158A136ADE735ULL, 0xE2A689DAF3EFE872ULL, 0x984F0C70E0E68B77ULL,
+    0xB557135E7F57C935ULL, 0x856365553DED1AF3ULL, 0x2433F51F5F066ED0ULL,
+    0xD3DF1ED5D5FD6561ULL, 0xF681B202AEC4617AULL, 0x7D2FE363630C75D8ULL,
+    0xCC939DCE249B3EF9ULL, 0xA9E13641146433FBULL, 0xD8B9C583CE2D3695ULL,
+    0xAFDC5620273D3CF1ULL, 0xADF85458A2BB4A9AULL, 0xFFFFFFFFFFFFFFFFULL
+};
+
+static const BN_ULONG ffdhe4096_p[] = {
+    0xFFFFFFFFFFFFFFFFULL, 0xC68A007E5E655F6AULL, 0x4DB5A851F44182E1ULL,
+    0x8EC9B55A7F88A46BULL, 0x0A8291CDCEC97DCFULL, 0x2A4ECEA9F98D0ACCULL,
+    0x1A1DB93D7140003CULL, 0x092999A333CB8B7AULL, 0x6DC778F971AD0038ULL,
+    0xA907600A918130C4ULL, 0xED6A1E012D9E6832ULL, 0x7135C886EFB4318AULL,
+    0x87F55BA57E31CC7AULL, 0x7763CF1D55034004ULL, 0xAC7D5F42D69F6D18ULL,
+    0x7930E9E4E58857B6ULL, 0x6E6F52C3164DF4FBULL, 0x25E41D2B669E1EF1ULL,
+    0x3C1B20EE3FD59D7CULL, 0x0ABCD06BFA53DDEFULL, 0x1DBF9A42D5C4484EULL,
+    0xABC521979B0DEADAULL, 0xE86D2BC522363A0DULL, 0x5CAE82AB9C9DF69EULL,
+    0x64F2E21E71F54BFFULL, 0xF4FD4452E2D74DD3ULL, 0xB4130C93BC437944ULL,
+    0xAEFE130985139270ULL, 0x598CB0FAC186D91CULL, 0x7AD91D2691F7F7EEULL,
+    0x61B46FC9D6E6C907ULL, 0xBC34F4DEF99C0238ULL, 0xDE355B3B6519035BULL,
+    0x886B4238611FCFDCULL, 0xC6F34A26C1B2EFFAULL, 0xC58EF1837D1683B2ULL,
+    0x3BB5FCBC2EC22005ULL, 0xC3FE3B1B4C6FAD73ULL, 0x8E4F1232EEF28183ULL,
+    0x9172FE9CE98583FFULL, 0xC03404CD28342F61ULL, 0x9E02FCE1CDF7E2ECULL,
+    0x0B07A7C8EE0A6D70ULL, 0xAE56EDE76372BB19ULL, 0x1D4F42A3DE394DF4ULL,
+    0xB96ADAB760D7F468ULL, 0xD108A94BB2C8E3FBULL, 0xBC0AB182B324FB61ULL,
+    0x30ACCA4F483A797AULL, 0x1DF158A136ADE735ULL, 0xE2A689DAF3EFE872ULL,
+    0x984F0C70E0E68B77ULL, 0xB557135E7F57C935ULL, 0x856365553DED1AF3ULL,
+    0x2433F51F5F066ED0ULL, 0xD3DF1ED5D5FD6561ULL, 0xF681B202AEC4617AULL,
+    0x7D2FE363630C75D8ULL, 0xCC939DCE249B3EF9ULL, 0xA9E13641146433FBULL,
+    0xD8B9C583CE2D3695ULL, 0xAFDC5620273D3CF1ULL, 0xADF85458A2BB4A9AULL,
+    0xFFFFFFFFFFFFFFFFULL
+};
+
+static const BN_ULONG ffdhe6144_p[] = {
+    0xFFFFFFFFFFFFFFFFULL, 0xA40E329CD0E40E65ULL, 0xA41D570D7938DAD4ULL,
+    0x62A69526D43161C1ULL, 0x3FDD4A8E9ADB1E69ULL, 0x5B3B71F9DC6B80D6ULL,
+    0xEC9D1810C6272B04ULL, 0x8CCF2DD5CACEF403ULL, 0xE49F5235C95B9117ULL,
+    0x505DC82DB854338AULL, 0x62292C311562A846ULL, 0xD72B03746AE77F5EULL,
+    0xF9C9091B462D538CULL, 0x0AE8DB5847A67CBEULL, 0xB3A739C122611682ULL,
+    0xEEAAC0232A281BF6ULL, 0x94C6651E77CAF992ULL, 0x763E4E4B94B2BBC1ULL,
+    0x587E38DA0077D9B4ULL, 0x7FB29F8C183023C3ULL, 0x0ABEC1FFF9E3A26EULL,
+    0xA00EF092350511E3ULL, 0xB855322EDB6340D8ULL, 0xA52471F7A9A96910ULL,
+    0x388147FB4CFDB477ULL, 0x9B1F5C3E4E46041FULL, 0xCDAD0657FCCFEC71ULL,
+    0xB38E8C334C701C3AULL, 0x917BDD64B1C0FD4CULL, 0x3BB454329B7624C8ULL,
+    0x23BA4442CAF53EA6ULL, 0x4E677D2C38532A3AULL, 0x0BFD64B645036C7AULL,
+    0xC68A007E5E0DD902ULL, 0x4DB5A851F44182E1ULL, 0x8EC9B55A7F88A46BULL,
+    0x0A8291CDCEC97DCFULL, 0x2A4ECEA9F98D0ACCULL, 0x1A1DB93D7140003CULL,
+    0x092999A333CB8B7AULL, 0x6DC778F971AD0038ULL, 0xA907600A918130C4ULL,
+    0xED6A1E012D9E6832ULL, 0x7135C886EFB4318AULL, 0x87F55BA57E31CC7AULL,
+    0x7763CF1D55034004ULL, 0xAC7D5F42D69F6D18ULL, 0x7930E9E4E58857B6ULL,
+    0x6E6F52C3164DF4FBULL, 0x25E41D2B669E1EF1ULL, 0x3C1B20EE3FD59D7CULL,
+    0x0ABCD06BFA53DDEFULL, 0x1DBF9A42D5C4484EULL, 0xABC521979B0DEADAULL,
+    0xE86D2BC522363A0DULL, 0x5CAE82AB9C9DF69EULL, 0x64F2E21E71F54BFFULL,
+    0xF4FD4452E2D74DD3ULL, 0xB4130C93BC437944ULL, 0xAEFE130985139270ULL,
+    0x598CB0FAC186D91CULL, 0x7AD91D2691F7F7EEULL, 0x61B46FC9D6E6C907ULL,
+    0xBC34F4DEF99C0238ULL, 0xDE355B3B6519035BULL, 0x886B4238611FCFDCULL,
+    0xC6F34A26C1B2EFFAULL, 0xC58EF1837D1683B2ULL, 0x3BB5FCBC2EC22005ULL,
+    0xC3FE3B1B4C6FAD73ULL, 0x8E4F1232EEF28183ULL, 0x9172FE9CE98583FFULL,
+    0xC03404CD28342F61ULL, 0x9E02FCE1CDF7E2ECULL, 0x0B07A7C8EE0A6D70ULL,
+    0xAE56EDE76372BB19ULL, 0x1D4F42A3DE394DF4ULL, 0xB96ADAB760D7F468ULL,
+    0xD108A94BB2C8E3FBULL, 0xBC0AB182B324FB61ULL, 0x30ACCA4F483A797AULL,
+    0x1DF158A136ADE735ULL, 0xE2A689DAF3EFE872ULL, 0x984F0C70E0E68B77ULL,
+    0xB557135E7F57C935ULL, 0x856365553DED1AF3ULL, 0x2433F51F5F066ED0ULL,
+    0xD3DF1ED5D5FD6561ULL, 0xF681B202AEC4617AULL, 0x7D2FE363630C75D8ULL,
+    0xCC939DCE249B3EF9ULL, 0xA9E13641146433FBULL, 0xD8B9C583CE2D3695ULL,
+    0xAFDC5620273D3CF1ULL, 0xADF85458A2BB4A9AULL, 0xFFFFFFFFFFFFFFFFULL
+};
+
+static const BN_ULONG ffdhe8192_p[] = {
+    0xFFFFFFFFFFFFFFFFULL, 0xD68C8BB7C5C6424CULL, 0x011E2A94838FF88CULL,
+    0x0822E506A9F4614EULL, 0x97D11D49F7A8443DULL, 0xA6BBFDE530677F0DULL,
+    0x2F741EF8C1FE86FEULL, 0xFAFABE1C5D71A87EULL, 0xDED2FBABFBE58A30ULL,
+    0xB6855DFE72B0A66EULL, 0x1EFC8CE0BA8A4FE8ULL, 0x83F81D4A3F2FA457ULL,
+    0xA1FE3075A577E231ULL, 0xD5B8019488D9C0A0ULL, 0x624816CDAD9A95F9ULL,
+    0x99E9E31650C1217BULL, 0x51AA691E0E423CFCULL, 0x1C217E6C3826E52CULL,
+    0x51A8A93109703FEEULL, 0xBB7099876A460E74ULL, 0x541FC68C9C86B022ULL,
+    0x59160CC046FD8251ULL, 0x2846C0BA35C35F5CULL, 0x54504AC78B758282ULL,
+    0x29388839D2AF05E4ULL, 0xCB2C0F1CC01BD702ULL, 0x555B2F747C932665ULL,
+    0x86B63142A3AB8829ULL, 0x0B8CC3BDF64B10EFULL, 0x687FEB69EDD1CC5EULL,
+    0xFDB23FCEC9509D43ULL, 0x1E425A31D951AE64ULL, 0x36AD004CF600C838ULL,
+    0xA40E329CCFF46AAAULL, 0xA41D570D7938DAD4ULL, 0x62A69526D43161C1ULL,
+    0x3FDD4A8E9ADB1E69ULL, 0x5B3B71F9DC6B80D6ULL, 0xEC9D1810C6272B04ULL,
+    0x8CCF2DD5CACEF403ULL, 0xE49F5235C95B9117ULL, 0x505DC82DB854338AULL,
+    0x62292C311562A846ULL, 0xD72B03746AE77F5EULL, 0xF9C9091B462D538CULL,
+    0x0AE8DB5847A67CBEULL, 0xB3A739C122611682ULL, 0xEEAAC0232A281BF6ULL,
+    0x94C6651E77CAF992ULL, 0x763E4E4B94B2BBC1ULL, 0x587E38DA0077D9B4ULL,
+    0x7FB29F8C183023C3ULL, 0x0ABEC1FFF9E3A26EULL, 0xA00EF092350511E3ULL,
+    0xB855322EDB6340D8ULL, 0xA52471F7A9A96910ULL, 0x388147FB4CFDB477ULL,
+    0x9B1F5C3E4E46041FULL, 0xCDAD0657FCCFEC71ULL, 0xB38E8C334C701C3AULL,
+    0x917BDD64B1C0FD4CULL, 0x3BB454329B7624C8ULL, 0x23BA4442CAF53EA6ULL,
+    0x4E677D2C38532A3AULL, 0x0BFD64B645036C7AULL, 0xC68A007E5E0DD902ULL,
+    0x4DB5A851F44182E1ULL, 0x8EC9B55A7F88A46BULL, 0x0A8291CDCEC97DCFULL,
+    0x2A4ECEA9F98D0ACCULL, 0x1A1DB93D7140003CULL, 0x092999A333CB8B7AULL,
+    0x6DC778F971AD0038ULL, 0xA907600A918130C4ULL, 0xED6A1E012D9E6832ULL,
+    0x7135C886EFB4318AULL, 0x87F55BA57E31CC7AULL, 0x7763CF1D55034004ULL,
+    0xAC7D5F42D69F6D18ULL, 0x7930E9E4E58857B6ULL, 0x6E6F52C3164DF4FBULL,
+    0x25E41D2B669E1EF1ULL, 0x3C1B20EE3FD59D7CULL, 0x0ABCD06BFA53DDEFULL,
+    0x1DBF9A42D5C4484EULL, 0xABC521979B0DEADAULL, 0xE86D2BC522363A0DULL,
+    0x5CAE82AB9C9DF69EULL, 0x64F2E21E71F54BFFULL, 0xF4FD4452E2D74DD3ULL,
+    0xB4130C93BC437944ULL, 0xAEFE130985139270ULL, 0x598CB0FAC186D91CULL,
+    0x7AD91D2691F7F7EEULL, 0x61B46FC9D6E6C907ULL, 0xBC34F4DEF99C0238ULL,
+    0xDE355B3B6519035BULL, 0x886B4238611FCFDCULL, 0xC6F34A26C1B2EFFAULL,
+    0xC58EF1837D1683B2ULL, 0x3BB5FCBC2EC22005ULL, 0xC3FE3B1B4C6FAD73ULL,
+    0x8E4F1232EEF28183ULL, 0x9172FE9CE98583FFULL, 0xC03404CD28342F61ULL,
+    0x9E02FCE1CDF7E2ECULL, 0x0B07A7C8EE0A6D70ULL, 0xAE56EDE76372BB19ULL,
+    0x1D4F42A3DE394DF4ULL, 0xB96ADAB760D7F468ULL, 0xD108A94BB2C8E3FBULL,
+    0xBC0AB182B324FB61ULL, 0x30ACCA4F483A797AULL, 0x1DF158A136ADE735ULL,
+    0xE2A689DAF3EFE872ULL, 0x984F0C70E0E68B77ULL, 0xB557135E7F57C935ULL,
+    0x856365553DED1AF3ULL, 0x2433F51F5F066ED0ULL, 0xD3DF1ED5D5FD6561ULL,
+    0xF681B202AEC4617AULL, 0x7D2FE363630C75D8ULL, 0xCC939DCE249B3EF9ULL,
+    0xA9E13641146433FBULL, 0xD8B9C583CE2D3695ULL, 0xAFDC5620273D3CF1ULL,
+    0xADF85458A2BB4A9AULL, 0xFFFFFFFFFFFFFFFFULL
+};
+
 # elif BN_BITS2 == 32
 
 static const BN_ULONG dh1024_160_p[] = {
@@ -194,6 +334,147 @@ static const BN_ULONG dh2048_256_q[] = {
     0xA709A097, 0x8CF83642
 };
 
+/* Primes from RFC 7919 */
+
+static const BN_ULONG ffdhe2048_p[] = {
+    0xFFFFFFFF, 0xFFFFFFFF, 0x61285C97, 0x886B4238, 0xC1B2EFFA, 0xC6F34A26,
+    0x7D1683B2, 0xC58EF183, 0x2EC22005, 0x3BB5FCBC, 0x4C6FAD73, 0xC3FE3B1B,
+    0xEEF28183, 0x8E4F1232, 0xE98583FF, 0x9172FE9C, 0x28342F61, 0xC03404CD,
+    0xCDF7E2EC, 0x9E02FCE1, 0xEE0A6D70, 0x0B07A7C8, 0x6372BB19, 0xAE56EDE7,
+    0xDE394DF4, 0x1D4F42A3, 0x60D7F468, 0xB96ADAB7, 0xB2C8E3FB, 0xD108A94B,
+    0xB324FB61, 0xBC0AB182, 0x483A797A, 0x30ACCA4F, 0x36ADE735, 0x1DF158A1,
+    0xF3EFE872, 0xE2A689DA, 0xE0E68B77, 0x984F0C70, 0x7F57C935, 0xB557135E,
+    0x3DED1AF3, 0x85636555, 0x5F066ED0, 0x2433F51F, 0xD5FD6561, 0xD3DF1ED5,
+    0xAEC4617A, 0xF681B202, 0x630C75D8, 0x7D2FE363, 0x249B3EF9, 0xCC939DCE,
+    0x146433FB, 0xA9E13641, 0xCE2D3695, 0xD8B9C583, 0x273D3CF1, 0xAFDC5620,
+    0xA2BB4A9A, 0xADF85458, 0xFFFFFFFF, 0xFFFFFFFF
+};
+
+static const BN_ULONG ffdhe3072_p[] = {
+    0xFFFFFFFF, 0xFFFFFFFF, 0x66C62E37, 0x25E41D2B, 0x3FD59D7C, 0x3C1B20EE,
+    0xFA53DDEF, 0x0ABCD06B, 0xD5C4484E, 0x1DBF9A42, 0x9B0DEADA, 0xABC52197,
+    0x22363A0D, 0xE86D2BC5, 0x9C9DF69E, 0x5CAE82AB, 0x71F54BFF, 0x64F2E21E,
+    0xE2D74DD3, 0xF4FD4452, 0xBC437944, 0xB4130C93, 0x85139270, 0xAEFE1309,
+    0xC186D91C, 0x598CB0FA, 0x91F7F7EE, 0x7AD91D26, 0xD6E6C907, 0x61B46FC9,
+    0xF99C0238, 0xBC34F4DE, 0x6519035B, 0xDE355B3B, 0x611FCFDC, 0x886B4238,
+    0xC1B2EFFA, 0xC6F34A26, 0x7D1683B2, 0xC58EF183, 0x2EC22005, 0x3BB5FCBC,
+    0x4C6FAD73, 0xC3FE3B1B, 0xEEF28183, 0x8E4F1232, 0xE98583FF, 0x9172FE9C,
+    0x28342F61, 0xC03404CD, 0xCDF7E2EC, 0x9E02FCE1, 0xEE0A6D70, 0x0B07A7C8,
+    0x6372BB19, 0xAE56EDE7, 0xDE394DF4, 0x1D4F42A3, 0x60D7F468, 0xB96ADAB7,
+    0xB2C8E3FB, 0xD108A94B, 0xB324FB61, 0xBC0AB182, 0x483A797A, 0x30ACCA4F,
+    0x36ADE735, 0x1DF158A1, 0xF3EFE872, 0xE2A689DA, 0xE0E68B77, 0x984F0C70,
+    0x7F57C935, 0xB557135E, 0x3DED1AF3, 0x85636555, 0x5F066ED0, 0x2433F51F,
+    0xD5FD6561, 0xD3DF1ED5, 0xAEC4617A, 0xF681B202, 0x630C75D8, 0x7D2FE363,
+    0x249B3EF9, 0xCC939DCE, 0x146433FB, 0xA9E13641, 0xCE2D3695, 0xD8B9C583,
+    0x273D3CF1, 0xAFDC5620, 0xA2BB4A9A, 0xADF85458, 0xFFFFFFFF, 0xFFFFFFFF
+};
+
+static const BN_ULONG ffdhe4096_p[] = {
+    0xFFFFFFFF, 0xFFFFFFFF, 0x5E655F6A, 0xC68A007E, 0xF44182E1, 0x4DB5A851,
+    0x7F88A46B, 0x8EC9B55A, 0xCEC97DCF, 0x0A8291CD, 0xF98D0ACC, 0x2A4ECEA9,
+    0x7140003C, 0x1A1DB93D, 0x33CB8B7A, 0x092999A3, 0x71AD0038, 0x6DC778F9,
+    0x918130C4, 0xA907600A, 0x2D9E6832, 0xED6A1E01, 0xEFB4318A, 0x7135C886,
+    0x7E31CC7A, 0x87F55BA5, 0x55034004, 0x7763CF1D, 0xD69F6D18, 0xAC7D5F42,
+    0xE58857B6, 0x7930E9E4, 0x164DF4FB, 0x6E6F52C3, 0x669E1EF1, 0x25E41D2B,
+    0x3FD59D7C, 0x3C1B20EE, 0xFA53DDEF, 0x0ABCD06B, 0xD5C4484E, 0x1DBF9A42,
+    0x9B0DEADA, 0xABC52197, 0x22363A0D, 0xE86D2BC5, 0x9C9DF69E, 0x5CAE82AB,
+    0x71F54BFF, 0x64F2E21E, 0xE2D74DD3, 0xF4FD4452, 0xBC437944, 0xB4130C93,
+    0x85139270, 0xAEFE1309, 0xC186D91C, 0x598CB0FA, 0x91F7F7EE, 0x7AD91D26,
+    0xD6E6C907, 0x61B46FC9, 0xF99C0238, 0xBC34F4DE, 0x6519035B, 0xDE355B3B,
+    0x611FCFDC, 0x886B4238, 0xC1B2EFFA, 0xC6F34A26, 0x7D1683B2, 0xC58EF183,
+    0x2EC22005, 0x3BB5FCBC, 0x4C6FAD73, 0xC3FE3B1B, 0xEEF28183, 0x8E4F1232,
+    0xE98583FF, 0x9172FE9C, 0x28342F61, 0xC03404CD, 0xCDF7E2EC, 0x9E02FCE1,
+    0xEE0A6D70, 0x0B07A7C8, 0x6372BB19, 0xAE56EDE7, 0xDE394DF4, 0x1D4F42A3,
+    0x60D7F468, 0xB96ADAB7, 0xB2C8E3FB, 0xD108A94B, 0xB324FB61, 0xBC0AB182,
+    0x483A797A, 0x30ACCA4F, 0x36ADE735, 0x1DF158A1, 0xF3EFE872, 0xE2A689DA,
+    0xE0E68B77, 0x984F0C70, 0x7F57C935, 0xB557135E, 0x3DED1AF3, 0x85636555,
+    0x5F066ED0, 0x2433F51F, 0xD5FD6561, 0xD3DF1ED5, 0xAEC4617A, 0xF681B202,
+    0x630C75D8, 0x7D2FE363, 0x249B3EF9, 0xCC939DCE, 0x146433FB, 0xA9E13641,
+    0xCE2D3695, 0xD8B9C583, 0x273D3CF1, 0xAFDC5620, 0xA2BB4A9A, 0xADF85458,
+    0xFFFFFFFF, 0xFFFFFFFF
+};
+
+static const BN_ULONG ffdhe6144_p[] = {
+    0xFFFFFFFF, 0xFFFFFFFF, 0xD0E40E65, 0xA40E329C, 0x7938DAD4, 0xA41D570D,
+    0xD43161C1, 0x62A69526, 0x9ADB1E69, 0x3FDD4A8E, 0xDC6B80D6, 0x5B3B71F9,
+    0xC6272B04, 0xEC9D1810, 0xCACEF403, 0x8CCF2DD5, 0xC95B9117, 0xE49F5235,
+    0xB854338A, 0x505DC82D, 0x1562A846, 0x62292C31, 0x6AE77F5E, 0xD72B0374,
+    0x462D538C, 0xF9C9091B, 0x47A67CBE, 0x0AE8DB58, 0x22611682, 0xB3A739C1,
+    0x2A281BF6, 0xEEAAC023, 0x77CAF992, 0x94C6651E, 0x94B2BBC1, 0x763E4E4B,
+    0x0077D9B4, 0x587E38DA, 0x183023C3, 0x7FB29F8C, 0xF9E3A26E, 0x0ABEC1FF,
+    0x350511E3, 0xA00EF092, 0xDB6340D8, 0xB855322E, 0xA9A96910, 0xA52471F7,
+    0x4CFDB477, 0x388147FB, 0x4E46041F, 0x9B1F5C3E, 0xFCCFEC71, 0xCDAD0657,
+    0x4C701C3A, 0xB38E8C33, 0xB1C0FD4C, 0x917BDD64, 0x9B7624C8, 0x3BB45432,
+    0xCAF53EA6, 0x23BA4442, 0x38532A3A, 0x4E677D2C, 0x45036C7A, 0x0BFD64B6,
+    0x5E0DD902, 0xC68A007E, 0xF44182E1, 0x4DB5A851, 0x7F88A46B, 0x8EC9B55A,
+    0xCEC97DCF, 0x0A8291CD, 0xF98D0ACC, 0x2A4ECEA9, 0x7140003C, 0x1A1DB93D,
+    0x33CB8B7A, 0x092999A3, 0x71AD0038, 0x6DC778F9, 0x918130C4, 0xA907600A,
+    0x2D9E6832, 0xED6A1E01, 0xEFB4318A, 0x7135C886, 0x7E31CC7A, 0x87F55BA5,
+    0x55034004, 0x7763CF1D, 0xD69F6D18, 0xAC7D5F42, 0xE58857B6, 0x7930E9E4,
+    0x164DF4FB, 0x6E6F52C3, 0x669E1EF1, 0x25E41D2B, 0x3FD59D7C, 0x3C1B20EE,
+    0xFA53DDEF, 0x0ABCD06B, 0xD5C4484E, 0x1DBF9A42, 0x9B0DEADA, 0xABC52197,
+    0x22363A0D, 0xE86D2BC5, 0x9C9DF69E, 0x5CAE82AB, 0x71F54BFF, 0x64F2E21E,
+    0xE2D74DD3, 0xF4FD4452, 0xBC437944, 0xB4130C93, 0x85139270, 0xAEFE1309,
+    0xC186D91C, 0x598CB0FA, 0x91F7F7EE, 0x7AD91D26, 0xD6E6C907, 0x61B46FC9,
+    0xF99C0238, 0xBC34F4DE, 0x6519035B, 0xDE355B3B, 0x611FCFDC, 0x886B4238,
+    0xC1B2EFFA, 0xC6F34A26, 0x7D1683B2, 0xC58EF183, 0x2EC22005, 0x3BB5FCBC,
+    0x4C6FAD73, 0xC3FE3B1B, 0xEEF28183, 0x8E4F1232, 0xE98583FF, 0x9172FE9C,
+    0x28342F61, 0xC03404CD, 0xCDF7E2EC, 0x9E02FCE1, 0xEE0A6D70, 0x0B07A7C8,
+    0x6372BB19, 0xAE56EDE7, 0xDE394DF4, 0x1D4F42A3, 0x60D7F468, 0xB96ADAB7,
+    0xB2C8E3FB, 0xD108A94B, 0xB324FB61, 0xBC0AB182, 0x483A797A, 0x30ACCA4F,
+    0x36ADE735, 0x1DF158A1, 0xF3EFE872, 0xE2A689DA, 0xE0E68B77, 0x984F0C70,
+    0x7F57C935, 0xB557135E, 0x3DED1AF3, 0x85636555, 0x5F066ED0, 0x2433F51F,
+    0xD5FD6561, 0xD3DF1ED5, 0xAEC4617A, 0xF681B202, 0x630C75D8, 0x7D2FE363,
+    0x249B3EF9, 0xCC939DCE, 0x146433FB, 0xA9E13641, 0xCE2D3695, 0xD8B9C583,
+    0x273D3CF1, 0xAFDC5620, 0xA2BB4A9A, 0xADF85458, 0xFFFFFFFF, 0xFFFFFFFF
+};
+
+static const BN_ULONG ffdhe8192_p[] = {
+    0xFFFFFFFF, 0xFFFFFFFF, 0xC5C6424C, 0xD68C8BB7, 0x838FF88C, 0x011E2A94,
+    0xA9F4614E, 0x0822E506, 0xF7A8443D, 0x97D11D49, 0x30677F0D, 0xA6BBFDE5,
+    0xC1FE86FE, 0x2F741EF8, 0x5D71A87E, 0xFAFABE1C, 0xFBE58A30, 0xDED2FBAB,
+    0x72B0A66E, 0xB6855DFE, 0xBA8A4FE8, 0x1EFC8CE0, 0x3F2FA457, 0x83F81D4A,
+    0xA577E231, 0xA1FE3075, 0x88D9C0A0, 0xD5B80194, 0xAD9A95F9, 0x624816CD,
+    0x50C1217B, 0x99E9E316, 0x0E423CFC, 0x51AA691E, 0x3826E52C, 0x1C217E6C,
+    0x09703FEE, 0x51A8A931, 0x6A460E74, 0xBB709987, 0x9C86B022, 0x541FC68C,
+    0x46FD8251, 0x59160CC0, 0x35C35F5C, 0x2846C0BA, 0x8B758282, 0x54504AC7,
+    0xD2AF05E4, 0x29388839, 0xC01BD702, 0xCB2C0F1C, 0x7C932665, 0x555B2F74,
+    0xA3AB8829, 0x86B63142, 0xF64B10EF, 0x0B8CC3BD, 0xEDD1CC5E, 0x687FEB69,
+    0xC9509D43, 0xFDB23FCE, 0xD951AE64, 0x1E425A31, 0xF600C838, 0x36AD004C,
+    0xCFF46AAA, 0xA40E329C, 0x7938DAD4, 0xA41D570D, 0xD43161C1, 0x62A69526,
+    0x9ADB1E69, 0x3FDD4A8E, 0xDC6B80D6, 0x5B3B71F9, 0xC6272B04, 0xEC9D1810,
+    0xCACEF403, 0x8CCF2DD5, 0xC95B9117, 0xE49F5235, 0xB854338A, 0x505DC82D,
+    0x1562A846, 0x62292C31, 0x6AE77F5E, 0xD72B0374, 0x462D538C, 0xF9C9091B,
+    0x47A67CBE, 0x0AE8DB58, 0x22611682, 0xB3A739C1, 0x2A281BF6, 0xEEAAC023,
+    0x77CAF992, 0x94C6651E, 0x94B2BBC1, 0x763E4E4B, 0x0077D9B4, 0x587E38DA,
+    0x183023C3, 0x7FB29F8C, 0xF9E3A26E, 0x0ABEC1FF, 0x350511E3, 0xA00EF092,
+    0xDB6340D8, 0xB855322E, 0xA9A96910, 0xA52471F7, 0x4CFDB477, 0x388147FB,
+    0x4E46041F, 0x9B1F5C3E, 0xFCCFEC71, 0xCDAD0657, 0x4C701C3A, 0xB38E8C33,
+    0xB1C0FD4C, 0x917BDD64, 0x9B7624C8, 0x3BB45432, 0xCAF53EA6, 0x23BA4442,
+    0x38532A3A, 0x4E677D2C, 0x45036C7A, 0x0BFD64B6, 0x5E0DD902, 0xC68A007E,
+    0xF44182E1, 0x4DB5A851, 0x7F88A46B, 0x8EC9B55A, 0xCEC97DCF, 0x0A8291CD,
+    0xF98D0ACC, 0x2A4ECEA9, 0x7140003C, 0x1A1DB93D, 0x33CB8B7A, 0x092999A3,
+    0x71AD0038, 0x6DC778F9, 0x918130C4, 0xA907600A, 0x2D9E6832, 0xED6A1E01,
+    0xEFB4318A, 0x7135C886, 0x7E31CC7A, 0x87F55BA5, 0x55034004, 0x7763CF1D,
+    0xD69F6D18, 0xAC7D5F42, 0xE58857B6, 0x7930E9E4, 0x164DF4FB, 0x6E6F52C3,
+    0x669E1EF1, 0x25E41D2B, 0x3FD59D7C, 0x3C1B20EE, 0xFA53DDEF, 0x0ABCD06B,
+    0xD5C4484E, 0x1DBF9A42, 0x9B0DEADA, 0xABC52197, 0x22363A0D, 0xE86D2BC5,
+    0x9C9DF69E, 0x5CAE82AB, 0x71F54BFF, 0x64F2E21E, 0xE2D74DD3, 0xF4FD4452,
+    0xBC437944, 0xB4130C93, 0x85139270, 0xAEFE1309, 0xC186D91C, 0x598CB0FA,
+    0x91F7F7EE, 0x7AD91D26, 0xD6E6C907, 0x61B46FC9, 0xF99C0238, 0xBC34F4DE,
+    0x6519035B, 0xDE355B3B, 0x611FCFDC, 0x886B4238, 0xC1B2EFFA, 0xC6F34A26,
+    0x7D1683B2, 0xC58EF183, 0x2EC22005, 0x3BB5FCBC, 0x4C6FAD73, 0xC3FE3B1B,
+    0xEEF28183, 0x8E4F1232, 0xE98583FF, 0x9172FE9C, 0x28342F61, 0xC03404CD,
+    0xCDF7E2EC, 0x9E02FCE1, 0xEE0A6D70, 0x0B07A7C8, 0x6372BB19, 0xAE56EDE7,
+    0xDE394DF4, 0x1D4F42A3, 0x60D7F468, 0xB96ADAB7, 0xB2C8E3FB, 0xD108A94B,
+    0xB324FB61, 0xBC0AB182, 0x483A797A, 0x30ACCA4F, 0x36ADE735, 0x1DF158A1,
+    0xF3EFE872, 0xE2A689DA, 0xE0E68B77, 0x984F0C70, 0x7F57C935, 0xB557135E,
+    0x3DED1AF3, 0x85636555, 0x5F066ED0, 0x2433F51F, 0xD5FD6561, 0xD3DF1ED5,
+    0xAEC4617A, 0xF681B202, 0x630C75D8, 0x7D2FE363, 0x249B3EF9, 0xCC939DCE,
+    0x146433FB, 0xA9E13641, 0xCE2D3695, 0xD8B9C583, 0x273D3CF1, 0xAFDC5620,
+    0xA2BB4A9A, 0xADF85458, 0xFFFFFFFF, 0xFFFFFFFF
+};
+
 # else
 #  error "unsupported BN_BITS2"
 # endif
@@ -206,6 +487,10 @@ static const BN_ULONG dh2048_256_q[] = {
                         OSSL_NELEM(x),\
                         0, BN_FLG_STATIC_DATA };
 
+static const BN_ULONG value_2 = 2;
+
+const BIGNUM _bignum_const_2 =
+    { (BN_ULONG *)&value_2, 1, 1, 0, BN_FLG_STATIC_DATA };
 
 make_dh_bn(dh1024_160_p)
 make_dh_bn(dh1024_160_g)
@@ -217,4 +502,11 @@ make_dh_bn(dh2048_256_p)
 make_dh_bn(dh2048_256_g)
 make_dh_bn(dh2048_256_q)
 
+make_dh_bn(ffdhe2048_p)
+make_dh_bn(ffdhe3072_p)
+make_dh_bn(ffdhe4096_p)
+make_dh_bn(ffdhe6144_p)
+make_dh_bn(ffdhe8192_p)
+
+
 #endif
diff --git a/deps/openssl/openssl/crypto/bn/bn_div.c b/deps/openssl/openssl/crypto/bn/bn_div.c
index 884ff29917..70add10c7d 100644
--- a/deps/openssl/openssl/crypto/bn/bn_div.c
+++ b/deps/openssl/openssl/crypto/bn/bn_div.c
@@ -24,17 +24,17 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
     bn_check_top(d);
     if (BN_is_zero(d)) {
         BNerr(BN_F_BN_DIV, BN_R_DIV_BY_ZERO);
-        return (0);
+        return 0;
     }
 
     if (BN_ucmp(m, d) < 0) {
         if (rem != NULL) {
             if (BN_copy(rem, m) == NULL)
-                return (0);
+                return 0;
         }
         if (dv != NULL)
             BN_zero(dv);
-        return (1);
+        return 1;
     }
 
     BN_CTX_start(ctx);
@@ -81,7 +81,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
     ret = 1;
  end:
     BN_CTX_end(ctx);
-    return (ret);
+    return ret;
 }
 
 #else
@@ -97,8 +97,6 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
     *   understand why...);
     * - divl doesn't only calculate quotient, but also leaves
     *   remainder in %edx which we can definitely use here:-)
-    *
-    *                                   <appro@fy.chalmers.se>
     */
 #    undef bn_div_words
 #    define bn_div_words(n0,n1,d0)                \
@@ -113,7 +111,6 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
 #   elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG)
    /*
     * Same story here, but it's 128-bit by 64-bit division. Wow!
-    *                                   <appro@fy.chalmers.se>
     */
 #    undef bn_div_words
 #    define bn_div_words(n0,n1,d0)                \
@@ -177,28 +174,25 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
 
     if (BN_is_zero(divisor)) {
         BNerr(BN_F_BN_DIV, BN_R_DIV_BY_ZERO);
-        return (0);
+        return 0;
     }
 
     if (!no_branch && BN_ucmp(num, divisor) < 0) {
         if (rm != NULL) {
             if (BN_copy(rm, num) == NULL)
-                return (0);
+                return 0;
         }
         if (dv != NULL)
             BN_zero(dv);
-        return (1);
+        return 1;
     }
 
     BN_CTX_start(ctx);
+    res = (dv == NULL) ? BN_CTX_get(ctx) : dv;
     tmp = BN_CTX_get(ctx);
     snum = BN_CTX_get(ctx);
     sdiv = BN_CTX_get(ctx);
-    if (dv == NULL)
-        res = BN_CTX_get(ctx);
-    else
-        res = dv;
-    if (sdiv == NULL || res == NULL || tmp == NULL || snum == NULL)
+    if (sdiv == NULL)
         goto err;
 
     /* First we normalise the numbers */
@@ -415,10 +409,10 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
     if (no_branch)
         bn_correct_top(res);
     BN_CTX_end(ctx);
-    return (1);
+    return 1;
  err:
     bn_check_top(rm);
     BN_CTX_end(ctx);
-    return (0);
+    return 0;
 }
 #endif
diff --git a/deps/openssl/openssl/crypto/bn/bn_err.c b/deps/openssl/openssl/crypto/bn/bn_err.c
index 5fe9db9ede..dd87c152cf 100644
--- a/deps/openssl/openssl/crypto/bn/bn_err.c
+++ b/deps/openssl/openssl/crypto/bn/bn_err.c
@@ -1,6 +1,6 @@
 /*
  * Generated by util/mkerr.pl DO NOT EDIT
- * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -8,87 +8,99 @@
  * https://www.openssl.org/source/license.html
  */
 
-#include <stdio.h>
 #include <openssl/err.h>
-#include <openssl/bn.h>
+#include <openssl/bnerr.h>
 
-/* BEGIN ERROR CODES */
 #ifndef OPENSSL_NO_ERR
 
-# define ERR_FUNC(func) ERR_PACK(ERR_LIB_BN,func,0)
-# define ERR_REASON(reason) ERR_PACK(ERR_LIB_BN,0,reason)
-
-static ERR_STRING_DATA BN_str_functs[] = {
-    {ERR_FUNC(BN_F_BNRAND), "bnrand"},
-    {ERR_FUNC(BN_F_BN_BLINDING_CONVERT_EX), "BN_BLINDING_convert_ex"},
-    {ERR_FUNC(BN_F_BN_BLINDING_CREATE_PARAM), "BN_BLINDING_create_param"},
-    {ERR_FUNC(BN_F_BN_BLINDING_INVERT_EX), "BN_BLINDING_invert_ex"},
-    {ERR_FUNC(BN_F_BN_BLINDING_NEW), "BN_BLINDING_new"},
-    {ERR_FUNC(BN_F_BN_BLINDING_UPDATE), "BN_BLINDING_update"},
-    {ERR_FUNC(BN_F_BN_BN2DEC), "BN_bn2dec"},
-    {ERR_FUNC(BN_F_BN_BN2HEX), "BN_bn2hex"},
-    {ERR_FUNC(BN_F_BN_COMPUTE_WNAF), "bn_compute_wNAF"},
-    {ERR_FUNC(BN_F_BN_CTX_GET), "BN_CTX_get"},
-    {ERR_FUNC(BN_F_BN_CTX_NEW), "BN_CTX_new"},
-    {ERR_FUNC(BN_F_BN_CTX_START), "BN_CTX_start"},
-    {ERR_FUNC(BN_F_BN_DIV), "BN_div"},
-    {ERR_FUNC(BN_F_BN_DIV_RECP), "BN_div_recp"},
-    {ERR_FUNC(BN_F_BN_EXP), "BN_exp"},
-    {ERR_FUNC(BN_F_BN_EXPAND_INTERNAL), "bn_expand_internal"},
-    {ERR_FUNC(BN_F_BN_GENCB_NEW), "BN_GENCB_new"},
-    {ERR_FUNC(BN_F_BN_GENERATE_DSA_NONCE), "BN_generate_dsa_nonce"},
-    {ERR_FUNC(BN_F_BN_GENERATE_PRIME_EX), "BN_generate_prime_ex"},
-    {ERR_FUNC(BN_F_BN_GF2M_MOD), "BN_GF2m_mod"},
-    {ERR_FUNC(BN_F_BN_GF2M_MOD_EXP), "BN_GF2m_mod_exp"},
-    {ERR_FUNC(BN_F_BN_GF2M_MOD_MUL), "BN_GF2m_mod_mul"},
-    {ERR_FUNC(BN_F_BN_GF2M_MOD_SOLVE_QUAD), "BN_GF2m_mod_solve_quad"},
-    {ERR_FUNC(BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR), "BN_GF2m_mod_solve_quad_arr"},
-    {ERR_FUNC(BN_F_BN_GF2M_MOD_SQR), "BN_GF2m_mod_sqr"},
-    {ERR_FUNC(BN_F_BN_GF2M_MOD_SQRT), "BN_GF2m_mod_sqrt"},
-    {ERR_FUNC(BN_F_BN_LSHIFT), "BN_lshift"},
-    {ERR_FUNC(BN_F_BN_MOD_EXP2_MONT), "BN_mod_exp2_mont"},
-    {ERR_FUNC(BN_F_BN_MOD_EXP_MONT), "BN_mod_exp_mont"},
-    {ERR_FUNC(BN_F_BN_MOD_EXP_MONT_CONSTTIME), "BN_mod_exp_mont_consttime"},
-    {ERR_FUNC(BN_F_BN_MOD_EXP_MONT_WORD), "BN_mod_exp_mont_word"},
-    {ERR_FUNC(BN_F_BN_MOD_EXP_RECP), "BN_mod_exp_recp"},
-    {ERR_FUNC(BN_F_BN_MOD_EXP_SIMPLE), "BN_mod_exp_simple"},
-    {ERR_FUNC(BN_F_BN_MOD_INVERSE), "BN_mod_inverse"},
-    {ERR_FUNC(BN_F_BN_MOD_INVERSE_NO_BRANCH), "BN_mod_inverse_no_branch"},
-    {ERR_FUNC(BN_F_BN_MOD_LSHIFT_QUICK), "BN_mod_lshift_quick"},
-    {ERR_FUNC(BN_F_BN_MOD_SQRT), "BN_mod_sqrt"},
-    {ERR_FUNC(BN_F_BN_MPI2BN), "BN_mpi2bn"},
-    {ERR_FUNC(BN_F_BN_NEW), "BN_new"},
-    {ERR_FUNC(BN_F_BN_RAND), "BN_rand"},
-    {ERR_FUNC(BN_F_BN_RAND_RANGE), "BN_rand_range"},
-    {ERR_FUNC(BN_F_BN_RSHIFT), "BN_rshift"},
-    {ERR_FUNC(BN_F_BN_SET_WORDS), "bn_set_words"},
-    {ERR_FUNC(BN_F_BN_USUB), "BN_usub"},
+static const ERR_STRING_DATA BN_str_functs[] = {
+    {ERR_PACK(ERR_LIB_BN, BN_F_BNRAND, 0), "bnrand"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BNRAND_RANGE, 0), "bnrand_range"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_BLINDING_CONVERT_EX, 0),
+     "BN_BLINDING_convert_ex"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_BLINDING_CREATE_PARAM, 0),
+     "BN_BLINDING_create_param"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_BLINDING_INVERT_EX, 0),
+     "BN_BLINDING_invert_ex"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_BLINDING_NEW, 0), "BN_BLINDING_new"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_BLINDING_UPDATE, 0), "BN_BLINDING_update"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_BN2DEC, 0), "BN_bn2dec"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_BN2HEX, 0), "BN_bn2hex"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_COMPUTE_WNAF, 0), "bn_compute_wNAF"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_CTX_GET, 0), "BN_CTX_get"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_CTX_NEW, 0), "BN_CTX_new"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_CTX_START, 0), "BN_CTX_start"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_DIV, 0), "BN_div"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_DIV_RECP, 0), "BN_div_recp"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_EXP, 0), "BN_exp"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_EXPAND_INTERNAL, 0), "bn_expand_internal"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_GENCB_NEW, 0), "BN_GENCB_new"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_GENERATE_DSA_NONCE, 0),
+     "BN_generate_dsa_nonce"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_GENERATE_PRIME_EX, 0),
+     "BN_generate_prime_ex"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD, 0), "BN_GF2m_mod"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_EXP, 0), "BN_GF2m_mod_exp"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_MUL, 0), "BN_GF2m_mod_mul"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_SOLVE_QUAD, 0),
+     "BN_GF2m_mod_solve_quad"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR, 0),
+     "BN_GF2m_mod_solve_quad_arr"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_SQR, 0), "BN_GF2m_mod_sqr"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_GF2M_MOD_SQRT, 0), "BN_GF2m_mod_sqrt"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_LSHIFT, 0), "BN_lshift"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP2_MONT, 0), "BN_mod_exp2_mont"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP_MONT, 0), "BN_mod_exp_mont"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP_MONT_CONSTTIME, 0),
+     "BN_mod_exp_mont_consttime"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP_MONT_WORD, 0),
+     "BN_mod_exp_mont_word"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP_RECP, 0), "BN_mod_exp_recp"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_EXP_SIMPLE, 0), "BN_mod_exp_simple"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_INVERSE, 0), "BN_mod_inverse"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_INVERSE_NO_BRANCH, 0),
+     "BN_mod_inverse_no_branch"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_LSHIFT_QUICK, 0), "BN_mod_lshift_quick"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_MOD_SQRT, 0), "BN_mod_sqrt"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_MONT_CTX_NEW, 0), "BN_MONT_CTX_new"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_MPI2BN, 0), "BN_mpi2bn"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_NEW, 0), "BN_new"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_POOL_GET, 0), "BN_POOL_get"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_RAND, 0), "BN_rand"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_RAND_RANGE, 0), "BN_rand_range"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_RECP_CTX_NEW, 0), "BN_RECP_CTX_new"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_RSHIFT, 0), "BN_rshift"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_SET_WORDS, 0), "bn_set_words"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_STACK_PUSH, 0), "BN_STACK_push"},
+    {ERR_PACK(ERR_LIB_BN, BN_F_BN_USUB, 0), "BN_usub"},
     {0, NULL}
 };
 
-static ERR_STRING_DATA BN_str_reasons[] = {
-    {ERR_REASON(BN_R_ARG2_LT_ARG3), "arg2 lt arg3"},
-    {ERR_REASON(BN_R_BAD_RECIPROCAL), "bad reciprocal"},
-    {ERR_REASON(BN_R_BIGNUM_TOO_LONG), "bignum too long"},
-    {ERR_REASON(BN_R_BITS_TOO_SMALL), "bits too small"},
-    {ERR_REASON(BN_R_CALLED_WITH_EVEN_MODULUS), "called with even modulus"},
-    {ERR_REASON(BN_R_DIV_BY_ZERO), "div by zero"},
-    {ERR_REASON(BN_R_ENCODING_ERROR), "encoding error"},
-    {ERR_REASON(BN_R_EXPAND_ON_STATIC_BIGNUM_DATA),
-     "expand on static bignum data"},
-    {ERR_REASON(BN_R_INPUT_NOT_REDUCED), "input not reduced"},
-    {ERR_REASON(BN_R_INVALID_LENGTH), "invalid length"},
-    {ERR_REASON(BN_R_INVALID_RANGE), "invalid range"},
-    {ERR_REASON(BN_R_INVALID_SHIFT), "invalid shift"},
-    {ERR_REASON(BN_R_NOT_A_SQUARE), "not a square"},
-    {ERR_REASON(BN_R_NOT_INITIALIZED), "not initialized"},
-    {ERR_REASON(BN_R_NO_INVERSE), "no inverse"},
-    {ERR_REASON(BN_R_NO_SOLUTION), "no solution"},
-    {ERR_REASON(BN_R_PRIVATE_KEY_TOO_LARGE), "private key too large"},
-    {ERR_REASON(BN_R_P_IS_NOT_PRIME), "p is not prime"},
-    {ERR_REASON(BN_R_TOO_MANY_ITERATIONS), "too many iterations"},
-    {ERR_REASON(BN_R_TOO_MANY_TEMPORARY_VARIABLES),
-     "too many temporary variables"},
+static const ERR_STRING_DATA BN_str_reasons[] = {
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_ARG2_LT_ARG3), "arg2 lt arg3"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_BAD_RECIPROCAL), "bad reciprocal"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_BIGNUM_TOO_LONG), "bignum too long"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_BITS_TOO_SMALL), "bits too small"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_CALLED_WITH_EVEN_MODULUS),
+    "called with even modulus"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_DIV_BY_ZERO), "div by zero"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_ENCODING_ERROR), "encoding error"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_EXPAND_ON_STATIC_BIGNUM_DATA),
+    "expand on static bignum data"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_INPUT_NOT_REDUCED), "input not reduced"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_INVALID_LENGTH), "invalid length"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_INVALID_RANGE), "invalid range"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_INVALID_SHIFT), "invalid shift"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_NOT_A_SQUARE), "not a square"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_NOT_INITIALIZED), "not initialized"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_NO_INVERSE), "no inverse"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_NO_SOLUTION), "no solution"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_PRIVATE_KEY_TOO_LARGE),
+    "private key too large"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_P_IS_NOT_PRIME), "p is not prime"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_TOO_MANY_ITERATIONS), "too many iterations"},
+    {ERR_PACK(ERR_LIB_BN, 0, BN_R_TOO_MANY_TEMPORARY_VARIABLES),
+    "too many temporary variables"},
     {0, NULL}
 };
 
@@ -97,10 +109,9 @@ static ERR_STRING_DATA BN_str_reasons[] = {
 int ERR_load_BN_strings(void)
 {
 #ifndef OPENSSL_NO_ERR
-
     if (ERR_func_error_string(BN_str_functs[0].error) == NULL) {
-        ERR_load_strings(0, BN_str_functs);
-        ERR_load_strings(0, BN_str_reasons);
+        ERR_load_strings_const(BN_str_functs);
+        ERR_load_strings_const(BN_str_reasons);
     }
 #endif
     return 1;
diff --git a/deps/openssl/openssl/crypto/bn/bn_exp.c b/deps/openssl/openssl/crypto/bn/bn_exp.c
index a6ad475a0b..c026ffcb33 100644
--- a/deps/openssl/openssl/crypto/bn/bn_exp.c
+++ b/deps/openssl/openssl/crypto/bn/bn_exp.c
@@ -51,10 +51,7 @@ int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
     }
 
     BN_CTX_start(ctx);
-    if ((r == a) || (r == p))
-        rr = BN_CTX_get(ctx);
-    else
-        rr = r;
+    rr = ((r == a) || (r == p)) ? BN_CTX_get(ctx) : r;
     v = BN_CTX_get(ctx);
     if (rr == NULL || v == NULL)
         goto err;
@@ -86,7 +83,7 @@ int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
  err:
     BN_CTX_end(ctx);
     bn_check_top(r);
-    return (ret);
+    return ret;
 }
 
 int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
@@ -134,13 +131,6 @@ int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
 #define RECP_MUL_MOD
 
 #ifdef MONT_MUL_MOD
-    /*
-     * I have finally been able to take out this pre-condition of the top bit
-     * being set.  It was caused by an error in BN_div with negatives.  There
-     * was also another problem when for a^b%m a >= m.  eay 07-May-97
-     */
-    /* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
-
     if (BN_is_odd(m)) {
 # ifdef MONT_EXP_WORD
         if (a->top == 1 && !a->neg
@@ -165,7 +155,7 @@ int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
 #endif
 
     bn_check_top(r);
-    return (ret);
+    return ret;
 }
 
 int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
@@ -201,7 +191,7 @@ int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
     BN_CTX_start(ctx);
     aa = BN_CTX_get(ctx);
     val[0] = BN_CTX_get(ctx);
-    if (!aa || !val[0])
+    if (val[0] == NULL)
         goto err;
 
     BN_RECP_CTX_init(&recp);
@@ -300,7 +290,7 @@ int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
     BN_CTX_end(ctx);
     BN_RECP_CTX_free(&recp);
     bn_check_top(r);
-    return (ret);
+    return ret;
 }
 
 int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
@@ -326,7 +316,7 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
 
     if (!BN_is_odd(m)) {
         BNerr(BN_F_BN_MOD_EXP_MONT, BN_R_CALLED_WITH_EVEN_MODULUS);
-        return (0);
+        return 0;
     }
     bits = BN_num_bits(p);
     if (bits == 0) {
@@ -344,7 +334,7 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
     d = BN_CTX_get(ctx);
     r = BN_CTX_get(ctx);
     val[0] = BN_CTX_get(ctx);
-    if (!d || !r || !val[0])
+    if (val[0] == NULL)
         goto err;
 
     /*
@@ -366,11 +356,6 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
         aa = val[0];
     } else
         aa = a;
-    if (BN_is_zero(aa)) {
-        BN_zero(rr);
-        ret = 1;
-        goto err;
-    }
     if (!bn_to_mont_fixed_top(val[0], aa, mont, ctx))
         goto err;               /* 1 */
 
@@ -481,10 +466,9 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
         BN_MONT_CTX_free(mont);
     BN_CTX_end(ctx);
     bn_check_top(rr);
-    return (ret);
+    return ret;
 }
 
-#if defined(SPARC_T4_MONT)
 static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos)
 {
     BN_ULONG ret = 0;
@@ -503,7 +487,6 @@ static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos)
 
     return ret & BN_MASK2;
 }
-#endif
 
 /*
  * BN_mod_exp_mont_consttime() stores the precomputed powers in a specific
@@ -610,7 +593,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
                               const BIGNUM *m, BN_CTX *ctx,
                               BN_MONT_CTX *in_mont)
 {
-    int i, bits, ret = 0, window, wvalue;
+    int i, bits, ret = 0, window, wvalue, wmask, window0;
     int top;
     BN_MONT_CTX *mont = NULL;
 
@@ -629,7 +612,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
 
     if (!BN_is_odd(m)) {
         BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME, BN_R_CALLED_WITH_EVEN_MODULUS);
-        return (0);
+        return 0;
     }
 
     top = m->top;
@@ -666,31 +649,33 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
     }
 
 #ifdef RSAZ_ENABLED
-    /*
-     * If the size of the operands allow it, perform the optimized
-     * RSAZ exponentiation. For further information see
-     * crypto/bn/rsaz_exp.c and accompanying assembly modules.
-     */
-    if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024)
-        && rsaz_avx2_eligible()) {
-        if (NULL == bn_wexpand(rr, 16))
+    if (!a->neg) {
+        /*
+         * If the size of the operands allow it, perform the optimized
+         * RSAZ exponentiation. For further information see
+         * crypto/bn/rsaz_exp.c and accompanying assembly modules.
+         */
+        if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024)
+            && rsaz_avx2_eligible()) {
+            if (NULL == bn_wexpand(rr, 16))
+                goto err;
+            RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d,
+                                   mont->n0[0]);
+            rr->top = 16;
+            rr->neg = 0;
+            bn_correct_top(rr);
+            ret = 1;
             goto err;
-        RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d,
-                               mont->n0[0]);
-        rr->top = 16;
-        rr->neg = 0;
-        bn_correct_top(rr);
-        ret = 1;
-        goto err;
-    } else if ((8 == a->top) && (8 == p->top) && (BN_num_bits(m) == 512)) {
-        if (NULL == bn_wexpand(rr, 8))
+        } else if ((8 == a->top) && (8 == p->top) && (BN_num_bits(m) == 512)) {
+            if (NULL == bn_wexpand(rr, 8))
+                goto err;
+            RSAZ_512_mod_exp(rr->d, a->d, p->d, m->d, mont->n0[0], mont->RR.d);
+            rr->top = 8;
+            rr->neg = 0;
+            bn_correct_top(rr);
+            ret = 1;
             goto err;
-        RSAZ_512_mod_exp(rr->d, a->d, p->d, m->d, mont->n0[0], mont->RR.d);
-        rr->top = 8;
-        rr->neg = 0;
-        bn_correct_top(rr);
-        ret = 1;
-        goto err;
+        }
     }
 #endif
 
@@ -763,7 +748,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
 
     /* prepare a^1 in Montgomery domain */
     if (a->neg || BN_ucmp(a, m) >= 0) {
-        if (!BN_mod(&am, a, m, ctx))
+        if (!BN_nnmod(&am, a, m, ctx))
             goto err;
         if (!bn_to_mont_fixed_top(&am, &am, mont, ctx))
             goto err;
@@ -861,20 +846,27 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
         top /= 2;
         bn_flip_t4(np, mont->N.d, top);
 
-        bits--;
-        for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
-            wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
+        /*
+         * The exponent may not have a whole number of fixed-size windows.
+         * To simplify the main loop, the initial window has between 1 and
+         * full-window-size bits such that what remains is always a whole
+         * number of windows
+         */
+        window0 = (bits - 1) % 5 + 1;
+        wmask = (1 << window0) - 1;
+        bits -= window0;
+        wvalue = bn_get_bits(p, bits) & wmask;
         bn_gather5_t4(tmp.d, top, powerbuf, wvalue);
 
         /*
          * Scan the exponent one window at a time starting from the most
          * significant bits.
          */
-        while (bits >= 0) {
+        while (bits > 0) {
             if (bits < stride)
-                stride = bits + 1;
+                stride = bits;
             bits -= stride;
-            wvalue = bn_get_bits(p, bits + 1);
+            wvalue = bn_get_bits(p, bits);
 
             if ((*pwr5_worker) (tmp.d, np, n0, powerbuf, wvalue, stride))
                 continue;
@@ -982,32 +974,36 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
             bn_scatter5(tmp.d, top, powerbuf, i);
         }
 # endif
-        bits--;
-        for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
-            wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
+        /*
+         * The exponent may not have a whole number of fixed-size windows.
+         * To simplify the main loop, the initial window has between 1 and
+         * full-window-size bits such that what remains is always a whole
+         * number of windows
+         */
+        window0 = (bits - 1) % 5 + 1;
+        wmask = (1 << window0) - 1;
+        bits -= window0;
+        wvalue = bn_get_bits(p, bits) & wmask;
         bn_gather5(tmp.d, top, powerbuf, wvalue);
 
         /*
          * Scan the exponent one window at a time starting from the most
          * significant bits.
          */
-        if (top & 7)
-            while (bits >= 0) {
-                for (wvalue = 0, i = 0; i < 5; i++, bits--)
-                    wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
-
+        if (top & 7) {
+            while (bits > 0) {
                 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
                 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
                 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
                 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
                 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
                 bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top,
-                                    wvalue);
+                                    bn_get_bits5(p->d, bits -= 5));
+            }
         } else {
-            while (bits >= 0) {
-                wvalue = bn_get_bits5(p->d, bits - 4);
-                bits -= 5;
-                bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
+            while (bits > 0) {
+                bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top,
+                          bn_get_bits5(p->d, bits -= 5));
             }
         }
 
@@ -1049,28 +1045,45 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
             }
         }
 
-        bits--;
-        for (wvalue = 0, i = bits % window; i >= 0; i--, bits--)
-            wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
+        /*
+         * The exponent may not have a whole number of fixed-size windows.
+         * To simplify the main loop, the initial window has between 1 and
+         * full-window-size bits such that what remains is always a whole
+         * number of windows
+         */
+        window0 = (bits - 1) % window + 1;
+        wmask = (1 << window0) - 1;
+        bits -= window0;
+        wvalue = bn_get_bits(p, bits) & wmask;
         if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf, wvalue,
                                             window))
             goto err;
 
+        wmask = (1 << window) - 1;
         /*
          * Scan the exponent one window at a time starting from the most
          * significant bits.
          */
-        while (bits >= 0) {
-            wvalue = 0;         /* The 'value' of the window */
+        while (bits > 0) {
 
-            /* Scan the window, squaring the result as we go */
-            for (i = 0; i < window; i++, bits--) {
+            /* Square the result window-size times */
+            for (i = 0; i < window; i++)
                 if (!bn_mul_mont_fixed_top(&tmp, &tmp, &tmp, mont, ctx))
                     goto err;
-                wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
-            }
 
             /*
+             * Get a window's worth of bits from the exponent
+             * This avoids calling BN_is_bit_set for each bit, which
+             * is not only slower but also makes each bit vulnerable to
+             * EM (and likely other) side-channel attacks like One&Done
+             * (for details see "One&Done: A Single-Decryption EM-Based
+             *  Attack on OpenSSL's Constant-Time Blinded RSA" by M. Alam,
+             *  H. Khan, M. Dey, N. Sinha, R. Callan, A. Zajic, and
+             *  M. Prvulovic, in USENIX Security'18)
+             */
+            bits -= window;
+            wvalue = bn_get_bits(p, bits) & wmask;
+            /*
              * Fetch the appropriate pre-computed value from the pre-buf
              */
             if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, wvalue,
@@ -1108,7 +1121,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
         OPENSSL_free(powerbufFree);
     }
     BN_CTX_end(ctx);
-    return (ret);
+    return ret;
 }
 
 int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
@@ -1118,7 +1131,7 @@ int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
     int b, bits, ret = 0;
     int r_is_one;
     BN_ULONG w, next_w;
-    BIGNUM *d, *r, *t;
+    BIGNUM *r, *t;
     BIGNUM *swap_tmp;
 #define BN_MOD_MUL_WORD(r, w, m) \
                 (BN_mul_word(r, (w)) && \
@@ -1149,7 +1162,7 @@ int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
 
     if (!BN_is_odd(m)) {
         BNerr(BN_F_BN_MOD_EXP_MONT_WORD, BN_R_CALLED_WITH_EVEN_MODULUS);
-        return (0);
+        return 0;
     }
     if (m->top == 1)
         a %= m->d[0];           /* make sure that 'a' is reduced */
@@ -1172,10 +1185,9 @@ int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
     }
 
     BN_CTX_start(ctx);
-    d = BN_CTX_get(ctx);
     r = BN_CTX_get(ctx);
     t = BN_CTX_get(ctx);
-    if (d == NULL || r == NULL || t == NULL)
+    if (t == NULL)
         goto err;
 
     if (in_mont != NULL)
@@ -1256,7 +1268,7 @@ int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
         BN_MONT_CTX_free(mont);
     BN_CTX_end(ctx);
     bn_check_top(rr);
-    return (ret);
+    return ret;
 }
 
 /* The old fallback, simple version :-) */
@@ -1292,7 +1304,7 @@ int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
     BN_CTX_start(ctx);
     d = BN_CTX_get(ctx);
     val[0] = BN_CTX_get(ctx);
-    if (!d || !val[0])
+    if (val[0] == NULL)
         goto err;
 
     if (!BN_nnmod(val[0], a, m, ctx))
@@ -1377,5 +1389,5 @@ int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
  err:
     BN_CTX_end(ctx);
     bn_check_top(r);
-    return (ret);
+    return ret;
 }
diff --git a/deps/openssl/openssl/crypto/bn/bn_exp2.c b/deps/openssl/openssl/crypto/bn/bn_exp2.c
index 5141c21f6d..082c9286a0 100644
--- a/deps/openssl/openssl/crypto/bn/bn_exp2.c
+++ b/deps/openssl/openssl/crypto/bn/bn_exp2.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2017 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -34,7 +34,7 @@ int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1,
 
     if (!(m->d[0] & 1)) {
         BNerr(BN_F_BN_MOD_EXP2_MONT, BN_R_CALLED_WITH_EVEN_MODULUS);
-        return (0);
+        return 0;
     }
     bits1 = BN_num_bits(p1);
     bits2 = BN_num_bits(p2);
@@ -50,7 +50,7 @@ int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1,
     r = BN_CTX_get(ctx);
     val1[0] = BN_CTX_get(ctx);
     val2[0] = BN_CTX_get(ctx);
-    if (!d || !r || !val1[0] || !val2[0])
+    if (val2[0] == NULL)
         goto err;
 
     if (in_mont != NULL)
@@ -197,5 +197,5 @@ int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1,
         BN_MONT_CTX_free(mont);
     BN_CTX_end(ctx);
     bn_check_top(rr);
-    return (ret);
+    return ret;
 }
diff --git a/deps/openssl/openssl/crypto/bn/bn_gcd.c b/deps/openssl/openssl/crypto/bn/bn_gcd.c
index bed231c8fa..0091ea4e08 100644
--- a/deps/openssl/openssl/crypto/bn/bn_gcd.c
+++ b/deps/openssl/openssl/crypto/bn/bn_gcd.c
@@ -23,7 +23,7 @@ int BN_gcd(BIGNUM *r, const BIGNUM *in_a, const BIGNUM *in_b, BN_CTX *ctx)
     BN_CTX_start(ctx);
     a = BN_CTX_get(ctx);
     b = BN_CTX_get(ctx);
-    if (a == NULL || b == NULL)
+    if (b == NULL)
         goto err;
 
     if (BN_copy(a, in_a) == NULL)
@@ -48,7 +48,7 @@ int BN_gcd(BIGNUM *r, const BIGNUM *in_a, const BIGNUM *in_b, BN_CTX *ctx)
  err:
     BN_CTX_end(ctx);
     bn_check_top(r);
-    return (ret);
+    return ret;
 }
 
 static BIGNUM *euclid(BIGNUM *a, BIGNUM *b)
@@ -111,9 +111,9 @@ static BIGNUM *euclid(BIGNUM *a, BIGNUM *b)
             goto err;
     }
     bn_check_top(a);
-    return (a);
+    return a;
  err:
-    return (NULL);
+    return NULL;
 }
 
 /* solves ax == 1 (mod n) */
@@ -448,7 +448,7 @@ BIGNUM *int_bn_mod_inverse(BIGNUM *in,
         BN_free(R);
     BN_CTX_end(ctx);
     bn_check_top(ret);
-    return (ret);
+    return ret;
 }
 
 /*
@@ -619,5 +619,5 @@ static BIGNUM *BN_mod_inverse_no_branch(BIGNUM *in,
         BN_free(R);
     BN_CTX_end(ctx);
     bn_check_top(ret);
-    return (ret);
+    return ret;
 }
diff --git a/deps/openssl/openssl/crypto/bn/bn_gf2m.c b/deps/openssl/openssl/crypto/bn/bn_gf2m.c
index d80f3ec940..34d8b69c1e 100644
--- a/deps/openssl/openssl/crypto/bn/bn_gf2m.c
+++ b/deps/openssl/openssl/crypto/bn/bn_gf2m.c
@@ -1,5 +1,6 @@
 /*
  * Copyright 2002-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -7,17 +8,6 @@
  * https://www.openssl.org/source/license.html
  */
 
-/* ====================================================================
- * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED.
- *
- * The Elliptic Curve Public-Key Crypto Library (ECC Code) included
- * herein is developed by SUN MICROSYSTEMS, INC., and is contributed
- * to the OpenSSL project.
- *
- * The ECC Code is licensed pursuant to the OpenSSL open source
- * license provided below.
- */
-
 #include <assert.h>
 #include <limits.h>
 #include <stdio.h>
@@ -559,7 +549,8 @@ int BN_GF2m_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
  * Hernandez, J.L., and Menezes, A.  "Software Implementation of Elliptic
  * Curve Cryptography Over Binary Fields".
  */
-int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
+static int BN_GF2m_mod_inv_vartime(BIGNUM *r, const BIGNUM *a,
+                                   const BIGNUM *p, BN_CTX *ctx)
 {
     BIGNUM *b, *c = NULL, *u = NULL, *v = NULL, *tmp;
     int ret = 0;
@@ -569,13 +560,11 @@ int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
 
     BN_CTX_start(ctx);
 
-    if ((b = BN_CTX_get(ctx)) == NULL)
-        goto err;
-    if ((c = BN_CTX_get(ctx)) == NULL)
-        goto err;
-    if ((u = BN_CTX_get(ctx)) == NULL)
-        goto err;
-    if ((v = BN_CTX_get(ctx)) == NULL)
+    b = BN_CTX_get(ctx);
+    c = BN_CTX_get(ctx);
+    u = BN_CTX_get(ctx);
+    v = BN_CTX_get(ctx);
+    if (v == NULL)
         goto err;
 
     if (!BN_GF2m_mod(u, a, p))
@@ -727,6 +716,46 @@ int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
     return ret;
 }
 
+/*-
+ * Wrapper for BN_GF2m_mod_inv_vartime that blinds the input before calling.
+ * This is not constant time.
+ * But it does eliminate first order deduction on the input.
+ */
+int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
+{
+    BIGNUM *b = NULL;
+    int ret = 0;
+
+    BN_CTX_start(ctx);
+    if ((b = BN_CTX_get(ctx)) == NULL)
+        goto err;
+
+    /* generate blinding value */
+    do {
+        if (!BN_priv_rand(b, BN_num_bits(p) - 1,
+                          BN_RAND_TOP_ANY, BN_RAND_BOTTOM_ANY))
+            goto err;
+    } while (BN_is_zero(b));
+
+    /* r := a * b */
+    if (!BN_GF2m_mod_mul(r, a, b, p, ctx))
+        goto err;
+
+    /* r := 1/(a * b) */
+    if (!BN_GF2m_mod_inv_vartime(r, r, p, ctx))
+        goto err;
+
+    /* r := b/(a * b) = 1/a */
+    if (!BN_GF2m_mod_mul(r, r, b, p, ctx))
+        goto err;
+
+    ret = 1;
+
+ err:
+    BN_CTX_end(ctx);
+    return ret;
+}
+
 /*
  * Invert xx, reduce modulo p, and store the result in r. r could be xx.
  * This function calls down to the BN_GF2m_mod_inv implementation; this
@@ -754,7 +783,6 @@ int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *xx, const int p[],
     return ret;
 }
 
-# ifndef OPENSSL_SUN_GF2M_DIV
 /*
  * Divide y by x, reduce modulo p, and store the result in r. r could be x
  * or y, x could equal y.
@@ -785,94 +813,6 @@ int BN_GF2m_mod_div(BIGNUM *r, const BIGNUM *y, const BIGNUM *x,
     BN_CTX_end(ctx);
     return ret;
 }
-# else
-/*
- * Divide y by x, reduce modulo p, and store the result in r. r could be x
- * or y, x could equal y. Uses algorithm Modular_Division_GF(2^m) from
- * Chang-Shantz, S.  "From Euclid's GCD to Montgomery Multiplication to the
- * Great Divide".
- */
-int BN_GF2m_mod_div(BIGNUM *r, const BIGNUM *y, const BIGNUM *x,
-                    const BIGNUM *p, BN_CTX *ctx)
-{
-    BIGNUM *a, *b, *u, *v;
-    int ret = 0;
-
-    bn_check_top(y);
-    bn_check_top(x);
-    bn_check_top(p);
-
-    BN_CTX_start(ctx);
-
-    a = BN_CTX_get(ctx);
-    b = BN_CTX_get(ctx);
-    u = BN_CTX_get(ctx);
-    v = BN_CTX_get(ctx);
-    if (v == NULL)
-        goto err;
-
-    /* reduce x and y mod p */
-    if (!BN_GF2m_mod(u, y, p))
-        goto err;
-    if (!BN_GF2m_mod(a, x, p))
-        goto err;
-    if (!BN_copy(b, p))
-        goto err;
-
-    while (!BN_is_odd(a)) {
-        if (!BN_rshift1(a, a))
-            goto err;
-        if (BN_is_odd(u))
-            if (!BN_GF2m_add(u, u, p))
-                goto err;
-        if (!BN_rshift1(u, u))
-            goto err;
-    }
-
-    do {
-        if (BN_GF2m_cmp(b, a) > 0) {
-            if (!BN_GF2m_add(b, b, a))
-                goto err;
-            if (!BN_GF2m_add(v, v, u))
-                goto err;
-            do {
-                if (!BN_rshift1(b, b))
-                    goto err;
-                if (BN_is_odd(v))
-                    if (!BN_GF2m_add(v, v, p))
-                        goto err;
-                if (!BN_rshift1(v, v))
-                    goto err;
-            } while (!BN_is_odd(b));
-        } else if (BN_abs_is_word(a, 1))
-            break;
-        else {
-            if (!BN_GF2m_add(a, a, b))
-                goto err;
-            if (!BN_GF2m_add(u, u, v))
-                goto err;
-            do {
-                if (!BN_rshift1(a, a))
-                    goto err;
-                if (BN_is_odd(u))
-                    if (!BN_GF2m_add(u, u, p))
-                        goto err;
-                if (!BN_rshift1(u, u))
-                    goto err;
-            } while (!BN_is_odd(a));
-        }
-    } while (1);
-
-    if (!BN_copy(r, u))
-        goto err;
-    bn_check_top(r);
-    ret = 1;
-
- err:
-    BN_CTX_end(ctx);
-    return ret;
-}
-# endif
 
 /*
  * Divide yy by xx, reduce modulo p, and store the result in r. r could be xx
@@ -918,7 +858,7 @@ int BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
     bn_check_top(b);
 
     if (BN_is_zero(b))
-        return (BN_one(r));
+        return BN_one(r);
 
     if (BN_abs_is_word(b, 1))
         return (BN_copy(r, a) != NULL);
@@ -1091,7 +1031,7 @@ int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a_, const int p[],
         if (tmp == NULL)
             goto err;
         do {
-            if (!BN_rand(rho, p[0], BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY))
+            if (!BN_priv_rand(rho, p[0], BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY))
                 goto err;
             if (!BN_GF2m_mod_arr(rho, rho, p))
                 goto err;
diff --git a/deps/openssl/openssl/crypto/bn/bn_intern.c b/deps/openssl/openssl/crypto/bn/bn_intern.c
index 7b25927f9b..46bc97575d 100644
--- a/deps/openssl/openssl/crypto/bn/bn_intern.c
+++ b/deps/openssl/openssl/crypto/bn/bn_intern.c
@@ -143,11 +143,6 @@ int bn_get_top(const BIGNUM *a)
     return a->top;
 }
 
-void bn_set_top(BIGNUM *a, int top)
-{
-    a->top = top;
-}
-
 int bn_get_dmax(const BIGNUM *a)
 {
     return a->dmax;
@@ -202,13 +197,3 @@ int bn_set_words(BIGNUM *a, const BN_ULONG *words, int num_words)
     bn_correct_top(a);
     return 1;
 }
-
-size_t bn_sizeof_BIGNUM(void)
-{
-    return sizeof(BIGNUM);
-}
-
-BIGNUM *bn_array_el(BIGNUM *base, int el)
-{
-    return &base[el];
-}
diff --git a/deps/openssl/openssl/crypto/bn/bn_lcl.h b/deps/openssl/openssl/crypto/bn/bn_lcl.h
index 4d9808f5b8..8a36db2e8b 100644
--- a/deps/openssl/openssl/crypto/bn/bn_lcl.h
+++ b/deps/openssl/openssl/crypto/bn/bn_lcl.h
@@ -23,10 +23,6 @@
 
 # include "internal/bn_int.h"
 
-#ifdef  __cplusplus
-extern "C" {
-#endif
-
 /*
  * These preprocessor symbols control various aspects of the bignum headers
  * and library code. They're not defined by any "normal" configuration, as
@@ -156,11 +152,6 @@ extern "C" {
  */
 #  define BN_FLG_FIXED_TOP 0x10000
 #  ifdef BN_DEBUG_RAND
-/* To avoid "make update" cvs wars due to BN_DEBUG, use some tricks */
-#   ifndef RAND_bytes
-int RAND_bytes(unsigned char *buf, int num);
-#    define BN_DEBUG_TRIX
-#   endif
 #   define bn_pollute(a) \
         do { \
             const BIGNUM *_bnum1 = (a); \
@@ -176,9 +167,6 @@ int RAND_bytes(unsigned char *buf, int num);
                        sizeof(*_not_const) * (_bnum1->dmax - _bnum1->top)); \
             } \
         } while(0)
-#   ifdef BN_DEBUG_TRIX
-#    undef RAND_bytes
-#   endif
 #  else
 #   define bn_pollute(a)
 #  endif
@@ -187,9 +175,9 @@ int RAND_bytes(unsigned char *buf, int num);
                 const BIGNUM *_bnum2 = (a); \
                 if (_bnum2 != NULL) { \
                         int _top = _bnum2->top; \
-                        OPENSSL_assert((_top == 0 && !_bnum2->neg) || \
-                               (_top && ((_bnum2->flags & BN_FLG_FIXED_TOP) \
-                                         || _bnum2->d[_top - 1] != 0))); \
+                        (void)ossl_assert((_top == 0 && !_bnum2->neg) || \
+                                  (_top && ((_bnum2->flags & BN_FLG_FIXED_TOP) \
+                                            || _bnum2->d[_top - 1] != 0))); \
                         bn_pollute(_bnum2); \
                 } \
         } while(0)
@@ -200,8 +188,8 @@ int RAND_bytes(unsigned char *buf, int num);
 #  define bn_wcheck_size(bn, words) \
         do { \
                 const BIGNUM *_bnum2 = (bn); \
-                OPENSSL_assert((words) <= (_bnum2)->dmax && \
-                        (words) >= (_bnum2)->top); \
+                assert((words) <= (_bnum2)->dmax && \
+                       (words) >= (_bnum2)->top); \
                 /* avoid unused variable warning with NDEBUG */ \
                 (void)(_bnum2); \
         } while(0)
@@ -370,59 +358,58 @@ struct bn_gencb_st {
 # if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) && !defined(PEDANTIC)
 /*
  * BN_UMULT_HIGH section.
- *
- * No, I'm not trying to overwhelm you when stating that the
- * product of N-bit numbers is 2*N bits wide:-) No, I don't expect
- * you to be impressed when I say that if the compiler doesn't
- * support 2*N integer type, then you have to replace every N*N
- * multiplication with 4 (N/2)*(N/2) accompanied by some shifts
- * and additions which unavoidably results in severe performance
- * penalties. Of course provided that the hardware is capable of
- * producing 2*N result... That's when you normally start
- * considering assembler implementation. However! It should be
- * pointed out that some CPUs (most notably Alpha, PowerPC and
- * upcoming IA-64 family:-) provide *separate* instruction
- * calculating the upper half of the product placing the result
- * into a general purpose register. Now *if* the compiler supports
- * inline assembler, then it's not impossible to implement the
- * "bignum" routines (and have the compiler optimize 'em)
- * exhibiting "native" performance in C. That's what BN_UMULT_HIGH
- * macro is about:-)
- *
- *                                      <appro@fy.chalmers.se>
+ * If the compiler doesn't support 2*N integer type, then you have to
+ * replace every N*N multiplication with 4 (N/2)*(N/2) accompanied by some
+ * shifts and additions which unavoidably results in severe performance
+ * penalties. Of course provided that the hardware is capable of producing
+ * 2*N result... That's when you normally start considering assembler
+ * implementation. However! It should be pointed out that some CPUs (e.g.,
+ * PowerPC, Alpha, and IA-64) provide *separate* instruction calculating
+ * the upper half of the product placing the result into a general
+ * purpose register. Now *if* the compiler supports inline assembler,
+ * then it's not impossible to implement the "bignum" routines (and have
+ * the compiler optimize 'em) exhibiting "native" performance in C. That's
+ * what BN_UMULT_HIGH macro is about:-) Note that more recent compilers do
+ * support 2*64 integer type, which is also used here.
  */
-#  if defined(__alpha) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
+#  if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16 && \
+      (defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG))
+#   define BN_UMULT_HIGH(a,b)          (((__uint128_t)(a)*(b))>>64)
+#   define BN_UMULT_LOHI(low,high,a,b) ({       \
+        __uint128_t ret=(__uint128_t)(a)*(b);   \
+        (high)=ret>>64; (low)=ret;      })
+#  elif defined(__alpha) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
 #   if defined(__DECC)
 #    include <c_asm.h>
 #    define BN_UMULT_HIGH(a,b)   (BN_ULONG)asm("umulh %a0,%a1,%v0",(a),(b))
 #   elif defined(__GNUC__) && __GNUC__>=2
-#    define BN_UMULT_HIGH(a,b)   ({      \
+#    define BN_UMULT_HIGH(a,b)   ({     \
         register BN_ULONG ret;          \
         asm ("umulh     %1,%2,%0"       \
              : "=r"(ret)                \
              : "r"(a), "r"(b));         \
-        ret;                    })
+        ret;                      })
 #   endif                       /* compiler */
-#  elif defined(_ARCH_PPC) && defined(__64BIT__) && defined(SIXTY_FOUR_BIT_LONG)
+#  elif defined(_ARCH_PPC64) && defined(SIXTY_FOUR_BIT_LONG)
 #   if defined(__GNUC__) && __GNUC__>=2
-#    define BN_UMULT_HIGH(a,b)   ({      \
+#    define BN_UMULT_HIGH(a,b)   ({     \
         register BN_ULONG ret;          \
         asm ("mulhdu    %0,%1,%2"       \
              : "=r"(ret)                \
              : "r"(a), "r"(b));         \
-        ret;                    })
+        ret;                      })
 #   endif                       /* compiler */
 #  elif (defined(__x86_64) || defined(__x86_64__)) && \
        (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
 #   if defined(__GNUC__) && __GNUC__>=2
-#    define BN_UMULT_HIGH(a,b)   ({      \
+#    define BN_UMULT_HIGH(a,b)   ({     \
         register BN_ULONG ret,discard;  \
         asm ("mulq      %3"             \
              : "=a"(discard),"=d"(ret)  \
              : "a"(a), "g"(b)           \
              : "cc");                   \
-        ret;                    })
-#    define BN_UMULT_LOHI(low,high,a,b)  \
+        ret;                      })
+#    define BN_UMULT_LOHI(low,high,a,b) \
         asm ("mulq      %3"             \
                 : "=a"(low),"=d"(high)  \
                 : "a"(a),"g"(b)         \
@@ -439,43 +426,29 @@ unsigned __int64 _umul128(unsigned __int64 a, unsigned __int64 b,
 #   endif
 #  elif defined(__mips) && (defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG))
 #   if defined(__GNUC__) && __GNUC__>=2
-#    if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
-      /* "h" constraint is not an option on R6 and was removed in 4.4 */
-#     define BN_UMULT_HIGH(a,b)          (((__uint128_t)(a)*(b))>>64)
-#     define BN_UMULT_LOHI(low,high,a,b) ({     \
-        __uint128_t ret=(__uint128_t)(a)*(b);   \
-        (high)=ret>>64; (low)=ret;       })
-#    else
-#     define BN_UMULT_HIGH(a,b) ({      \
+#    define BN_UMULT_HIGH(a,b) ({       \
         register BN_ULONG ret;          \
         asm ("dmultu    %1,%2"          \
              : "=h"(ret)                \
              : "r"(a), "r"(b) : "l");   \
         ret;                    })
-#     define BN_UMULT_LOHI(low,high,a,b)\
+#    define BN_UMULT_LOHI(low,high,a,b) \
         asm ("dmultu    %2,%3"          \
              : "=l"(low),"=h"(high)     \
              : "r"(a), "r"(b));
-#    endif
 #   endif
 #  elif defined(__aarch64__) && defined(SIXTY_FOUR_BIT_LONG)
 #   if defined(__GNUC__) && __GNUC__>=2
-#    define BN_UMULT_HIGH(a,b)   ({      \
+#    define BN_UMULT_HIGH(a,b)   ({     \
         register BN_ULONG ret;          \
         asm ("umulh     %0,%1,%2"       \
              : "=r"(ret)                \
              : "r"(a), "r"(b));         \
-        ret;                    })
+        ret;                      })
 #   endif
 #  endif                        /* cpu */
 # endif                         /* OPENSSL_NO_ASM */
 
-/*************************************************************
- * Using the long long type
- */
-# define Lw(t)    (((BN_ULONG)(t))&BN_MASK2)
-# define Hw(t)    (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2)
-
 # ifdef BN_DEBUG_RAND
 #  define bn_clear_top2max(a) \
         { \
@@ -489,6 +462,12 @@ unsigned __int64 _umul128(unsigned __int64 a, unsigned __int64 b,
 # endif
 
 # ifdef BN_LLONG
+/*******************************************************************
+ * Using the long long type, has to be twice as wide as BN_ULONG...
+ */
+#  define Lw(t)    (((BN_ULONG)(t))&BN_MASK2)
+#  define Hw(t)    (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2)
+
 #  define mul_add(r,a,w,c) { \
         BN_ULLONG t; \
         t=(BN_ULLONG)w * (a) + (r) + (c); \
@@ -666,10 +645,6 @@ void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t);
 void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n);
 void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
                           BN_ULONG *t);
-void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
-                 BN_ULONG *t);
-BN_ULONG bn_add_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
-                           int cl, int dl);
 BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
                            int cl, int dl);
 int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
@@ -681,8 +656,6 @@ BIGNUM *int_bn_mod_inverse(BIGNUM *in,
 
 int bn_probable_prime_dh(BIGNUM *rnd, int bits,
                          const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx);
-int bn_probable_prime_dh_retry(BIGNUM *rnd, int bits, BN_CTX *ctx);
-int bn_probable_prime_dh_coprime(BIGNUM *rnd, int bits, BN_CTX *ctx);
 
 static ossl_inline BIGNUM *bn_expand(BIGNUM *a, int bits)
 {
@@ -695,8 +668,4 @@ static ossl_inline BIGNUM *bn_expand(BIGNUM *a, int bits)
     return bn_expand2((a),(bits+BN_BITS2-1)/BN_BITS2);
 }
 
-#ifdef  __cplusplus
-}
-#endif
-
 #endif
diff --git a/deps/openssl/openssl/crypto/bn/bn_lib.c b/deps/openssl/openssl/crypto/bn/bn_lib.c
index 3f3c7bbb2f..80f910c807 100644
--- a/deps/openssl/openssl/crypto/bn/bn_lib.c
+++ b/deps/openssl/openssl/crypto/bn/bn_lib.c
@@ -66,15 +66,15 @@ void BN_set_params(int mult, int high, int low, int mont)
 int BN_get_params(int which)
 {
     if (which == 0)
-        return (bn_limit_bits);
+        return bn_limit_bits;
     else if (which == 1)
-        return (bn_limit_bits_high);
+        return bn_limit_bits_high;
     else if (which == 2)
-        return (bn_limit_bits_low);
+        return bn_limit_bits_low;
     else if (which == 3)
-        return (bn_limit_bits_mont);
+        return bn_limit_bits_mont;
     else
-        return (0);
+        return 0;
 }
 #endif
 
@@ -84,7 +84,7 @@ const BIGNUM *BN_value_one(void)
     static const BIGNUM const_one =
         { (BN_ULONG *)&data_one, 1, 1, 0, BN_FLG_STATIC_DATA };
 
-    return (&const_one);
+    return &const_one;
 }
 
 int BN_num_bits_word(BN_ULONG l)
@@ -153,37 +153,26 @@ static void bn_free_d(BIGNUM *a)
 
 void BN_clear_free(BIGNUM *a)
 {
-    int i;
-
     if (a == NULL)
         return;
-    bn_check_top(a);
-    if (a->d != NULL) {
+    if (a->d != NULL && !BN_get_flags(a, BN_FLG_STATIC_DATA)) {
         OPENSSL_cleanse(a->d, a->dmax * sizeof(a->d[0]));
-        if (!BN_get_flags(a, BN_FLG_STATIC_DATA))
-            bn_free_d(a);
+        bn_free_d(a);
     }
-    i = BN_get_flags(a, BN_FLG_MALLOCED);
-    OPENSSL_cleanse(a, sizeof(*a));
-    if (i)
+    if (BN_get_flags(a, BN_FLG_MALLOCED)) {
+        OPENSSL_cleanse(a, sizeof(*a));
         OPENSSL_free(a);
+    }
 }
 
 void BN_free(BIGNUM *a)
 {
     if (a == NULL)
         return;
-    bn_check_top(a);
     if (!BN_get_flags(a, BN_FLG_STATIC_DATA))
         bn_free_d(a);
     if (a->flags & BN_FLG_MALLOCED)
         OPENSSL_free(a);
-    else {
-#if OPENSSL_API_COMPAT < 0x00908000L
-        a->flags |= BN_FLG_FREE;
-#endif
-        a->d = NULL;
-    }
 }
 
 void bn_init(BIGNUM *a)
@@ -200,11 +189,11 @@ BIGNUM *BN_new(void)
 
     if ((ret = OPENSSL_zalloc(sizeof(*ret))) == NULL) {
         BNerr(BN_F_BN_NEW, ERR_R_MALLOC_FAILURE);
-        return (NULL);
+        return NULL;
     }
     ret->flags = BN_FLG_MALLOCED;
     bn_check_top(ret);
-    return (ret);
+    return ret;
 }
 
  BIGNUM *BN_secure_new(void)
@@ -212,16 +201,14 @@ BIGNUM *BN_new(void)
      BIGNUM *ret = BN_new();
      if (ret != NULL)
          ret->flags |= BN_FLG_SECURE;
-     return (ret);
+     return ret;
  }
 
 /* This is used by bn_expand2() */
 /* The caller MUST check that words > b->dmax before calling this */
 static BN_ULONG *bn_expand_internal(const BIGNUM *b, int words)
 {
-    BN_ULONG *A, *a = NULL;
-    const BN_ULONG *B;
-    int i;
+    BN_ULONG *a = NULL;
 
     if (words > (INT_MAX / (4 * BN_BITS2))) {
         BNerr(BN_F_BN_EXPAND_INTERNAL, BN_R_BIGNUM_TOO_LONG);
@@ -229,62 +216,22 @@ static BN_ULONG *bn_expand_internal(const BIGNUM *b, int words)
     }
     if (BN_get_flags(b, BN_FLG_STATIC_DATA)) {
         BNerr(BN_F_BN_EXPAND_INTERNAL, BN_R_EXPAND_ON_STATIC_BIGNUM_DATA);
-        return (NULL);
+        return NULL;
     }
     if (BN_get_flags(b, BN_FLG_SECURE))
-        a = A = OPENSSL_secure_zalloc(words * sizeof(*a));
+        a = OPENSSL_secure_zalloc(words * sizeof(*a));
     else
-        a = A = OPENSSL_zalloc(words * sizeof(*a));
-    if (A == NULL) {
+        a = OPENSSL_zalloc(words * sizeof(*a));
+    if (a == NULL) {
         BNerr(BN_F_BN_EXPAND_INTERNAL, ERR_R_MALLOC_FAILURE);
-        return (NULL);
+        return NULL;
     }
 
-#if 1
-    B = b->d;
-    /* Check if the previous number needs to be copied */
-    if (B != NULL) {
-        for (i = b->top >> 2; i > 0; i--, A += 4, B += 4) {
-            /*
-             * The fact that the loop is unrolled
-             * 4-wise is a tribute to Intel. It's
-             * the one that doesn't have enough
-             * registers to accommodate more data.
-             * I'd unroll it 8-wise otherwise:-)
-             *
-             *              <appro@fy.chalmers.se>
-             */
-            BN_ULONG a0, a1, a2, a3;
-            a0 = B[0];
-            a1 = B[1];
-            a2 = B[2];
-            a3 = B[3];
-            A[0] = a0;
-            A[1] = a1;
-            A[2] = a2;
-            A[3] = a3;
-        }
-        switch (b->top & 3) {
-        case 3:
-            A[2] = B[2];
-            /* fall thru */
-        case 2:
-            A[1] = B[1];
-            /* fall thru */
-        case 1:
-            A[0] = B[0];
-            /* fall thru */
-        case 0:
-            /* Without the "case 0" some old optimizers got this wrong. */
-            ;
-        }
-    }
-#else
-    memset(A, 0, sizeof(*A) * words);
-    memcpy(A, b->d, sizeof(b->d[0]) * b->top);
-#endif
+    assert(b->top <= words);
+    if (b->top > 0)
+        memcpy(a, b->d, sizeof(*a) * b->top);
 
-    return (a);
+    return a;
 }
 
 /*
@@ -333,53 +280,21 @@ BIGNUM *BN_dup(const BIGNUM *a)
 
 BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b)
 {
-    int i;
-    BN_ULONG *A;
-    const BN_ULONG *B;
-
     bn_check_top(b);
 
     if (a == b)
-        return (a);
+        return a;
     if (bn_wexpand(a, b->top) == NULL)
-        return (NULL);
-
-#if 1
-    A = a->d;
-    B = b->d;
-    for (i = b->top >> 2; i > 0; i--, A += 4, B += 4) {
-        BN_ULONG a0, a1, a2, a3;
-        a0 = B[0];
-        a1 = B[1];
-        a2 = B[2];
-        a3 = B[3];
-        A[0] = a0;
-        A[1] = a1;
-        A[2] = a2;
-        A[3] = a3;
-    }
-    /* ultrix cc workaround, see comments in bn_expand_internal */
-    switch (b->top & 3) {
-    case 3:
-        A[2] = B[2];
-        /* fall thru */
-    case 2:
-        A[1] = B[1];
-        /* fall thru */
-    case 1:
-        A[0] = B[0];
-        /* fall thru */
-    case 0:;
-    }
-#else
-    memcpy(a->d, b->d, sizeof(b->d[0]) * b->top);
-#endif
+        return NULL;
+
+    if (b->top > 0)
+        memcpy(a->d, b->d, sizeof(b->d[0]) * b->top);
 
     a->neg = b->neg;
     a->top = b->top;
     a->flags |= b->flags & BN_FLG_FIXED_TOP;
     bn_check_top(a);
-    return (a);
+    return a;
 }
 
 #define FLAGS_DATA(flags) ((flags) & (BN_FLG_STATIC_DATA \
@@ -445,13 +360,13 @@ int BN_set_word(BIGNUM *a, BN_ULONG w)
 {
     bn_check_top(a);
     if (bn_expand(a, (int)sizeof(BN_ULONG) * 8) == NULL)
-        return (0);
+        return 0;
     a->neg = 0;
     a->d[0] = w;
     a->top = (w ? 1 : 0);
     a->flags &= ~BN_FLG_FIXED_TOP;
     bn_check_top(a);
-    return (1);
+    return 1;
 }
 
 BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret)
@@ -464,7 +379,7 @@ BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret)
     if (ret == NULL)
         ret = bn = BN_new();
     if (ret == NULL)
-        return (NULL);
+        return NULL;
     bn_check_top(ret);
     /* Skip leading zero's. */
     for ( ; len > 0 && *s == 0; s++, len--)
@@ -472,7 +387,7 @@ BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret)
     n = len;
     if (n == 0) {
         ret->top = 0;
-        return (ret);
+        return ret;
     }
     i = ((n - 1) / BN_BYTES) + 1;
     m = ((n - 1) % (BN_BYTES));
@@ -496,7 +411,7 @@ BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret)
      * bit set (-ve number)
      */
     bn_correct_top(ret);
-    return (ret);
+    return ret;
 }
 
 /* ignore negative */
@@ -564,7 +479,7 @@ BIGNUM *BN_lebin2bn(const unsigned char *s, int len, BIGNUM *ret)
     if (ret == NULL)
         ret = bn = BN_new();
     if (ret == NULL)
-        return (NULL);
+        return NULL;
     bn_check_top(ret);
     s += len;
     /* Skip trailing zeroes. */
@@ -631,7 +546,7 @@ int BN_ucmp(const BIGNUM *a, const BIGNUM *b)
 
     i = a->top - b->top;
     if (i != 0)
-        return (i);
+        return i;
     ap = a->d;
     bp = b->d;
     for (i = a->top - 1; i >= 0; i--) {
@@ -640,7 +555,7 @@ int BN_ucmp(const BIGNUM *a, const BIGNUM *b)
         if (t1 != t2)
             return ((t1 > t2) ? 1 : -1);
     }
-    return (0);
+    return 0;
 }
 
 int BN_cmp(const BIGNUM *a, const BIGNUM *b)
@@ -651,11 +566,11 @@ int BN_cmp(const BIGNUM *a, const BIGNUM *b)
 
     if ((a == NULL) || (b == NULL)) {
         if (a != NULL)
-            return (-1);
+            return -1;
         else if (b != NULL)
-            return (1);
+            return 1;
         else
-            return (0);
+            return 0;
     }
 
     bn_check_top(a);
@@ -663,9 +578,9 @@ int BN_cmp(const BIGNUM *a, const BIGNUM *b)
 
     if (a->neg != b->neg) {
         if (a->neg)
-            return (-1);
+            return -1;
         else
-            return (1);
+            return 1;
     }
     if (a->neg == 0) {
         gt = 1;
@@ -676,18 +591,18 @@ int BN_cmp(const BIGNUM *a, const BIGNUM *b)
     }
 
     if (a->top > b->top)
-        return (gt);
+        return gt;
     if (a->top < b->top)
-        return (lt);
+        return lt;
     for (i = a->top - 1; i >= 0; i--) {
         t1 = a->d[i];
         t2 = b->d[i];
         if (t1 > t2)
-            return (gt);
+            return gt;
         if (t1 < t2)
-            return (lt);
+            return lt;
     }
-    return (0);
+    return 0;
 }
 
 int BN_set_bit(BIGNUM *a, int n)
@@ -701,7 +616,7 @@ int BN_set_bit(BIGNUM *a, int n)
     j = n % BN_BITS2;
     if (a->top <= i) {
         if (bn_wexpand(a, i + 1) == NULL)
-            return (0);
+            return 0;
         for (k = a->top; k < i + 1; k++)
             a->d[k] = 0;
         a->top = i + 1;
@@ -710,7 +625,7 @@ int BN_set_bit(BIGNUM *a, int n)
 
     a->d[i] |= (((BN_ULONG)1) << j);
     bn_check_top(a);
-    return (1);
+    return 1;
 }
 
 int BN_clear_bit(BIGNUM *a, int n)
@@ -724,11 +639,11 @@ int BN_clear_bit(BIGNUM *a, int n)
     i = n / BN_BITS2;
     j = n % BN_BITS2;
     if (a->top <= i)
-        return (0);
+        return 0;
 
     a->d[i] &= (~(((BN_ULONG)1) << j));
     bn_correct_top(a);
-    return (1);
+    return 1;
 }
 
 int BN_is_bit_set(const BIGNUM *a, int n)
@@ -764,7 +679,7 @@ int BN_mask_bits(BIGNUM *a, int n)
         a->d[w] &= ~(BN_MASK2 << b);
     }
     bn_correct_top(a);
-    return (1);
+    return 1;
 }
 
 void BN_set_negative(BIGNUM *a, int b)
@@ -790,7 +705,7 @@ int bn_cmp_words(const BN_ULONG *a, const BN_ULONG *b, int n)
         if (aa != bb)
             return ((aa > bb) ? 1 : -1);
     }
-    return (0);
+    return 0;
 }
 
 /*
@@ -1000,7 +915,7 @@ BN_GENCB *BN_GENCB_new(void)
 
     if ((ret = OPENSSL_malloc(sizeof(*ret))) == NULL) {
         BNerr(BN_F_BN_GENCB_NEW, ERR_R_MALLOC_FAILURE);
-        return (NULL);
+        return NULL;
     }
 
     return ret;
diff --git a/deps/openssl/openssl/crypto/bn/bn_mod.c b/deps/openssl/openssl/crypto/bn/bn_mod.c
index 2e98035bd8..712fc8ac14 100644
--- a/deps/openssl/openssl/crypto/bn/bn_mod.c
+++ b/deps/openssl/openssl/crypto/bn/bn_mod.c
@@ -216,7 +216,7 @@ int BN_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m,
     ret = 1;
  err:
     BN_CTX_end(ctx);
-    return (ret);
+    return ret;
 }
 
 int BN_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx)
diff --git a/deps/openssl/openssl/crypto/bn/bn_mont.c b/deps/openssl/openssl/crypto/bn/bn_mont.c
index 41214334b8..393d27c392 100644
--- a/deps/openssl/openssl/crypto/bn/bn_mont.c
+++ b/deps/openssl/openssl/crypto/bn/bn_mont.c
@@ -44,12 +44,12 @@ int bn_mul_mont_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
 #if defined(OPENSSL_BN_ASM_MONT) && defined(MONT_WORD)
     if (num > 1 && a->top == num && b->top == num) {
         if (bn_wexpand(r, num) == NULL)
-            return (0);
+            return 0;
         if (bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num)) {
             r->neg = a->neg ^ b->neg;
             r->top = num;
             r->flags |= BN_FLG_FIXED_TOP;
-            return (1);
+            return 1;
         }
     }
 #endif
@@ -81,7 +81,7 @@ int bn_mul_mont_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
     ret = 1;
  err:
     BN_CTX_end(ctx);
-    return (ret);
+    return ret;
 }
 
 #ifdef MONT_WORD
@@ -96,17 +96,18 @@ static int bn_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
     nl = n->top;
     if (nl == 0) {
         ret->top = 0;
-        return (1);
+        return 1;
     }
 
     max = (2 * nl);             /* carry is stored separately */
     if (bn_wexpand(r, max) == NULL)
-        return (0);
+        return 0;
 
     r->neg ^= n->neg;
     np = n->d;
     rp = r->d;
 
+    /* clear the top words of T */
     for (rtop = r->top, i = 0; i < max; i++) {
         v = (BN_ULONG)0 - ((i - rtop) >> (8 * sizeof(rtop) - 1));
         rp[i] &= v;
@@ -130,7 +131,7 @@ static int bn_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
     }
 
     if (bn_wexpand(ret, nl) == NULL)
-        return (0);
+        return 0;
     ret->top = nl;
     ret->flags |= BN_FLG_FIXED_TOP;
     ret->neg = r->neg;
@@ -154,7 +155,7 @@ static int bn_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
         ap[i] = 0;
     }
 
-    return (1);
+    return 1;
 }
 #endif                          /* MONT_WORD */
 
@@ -188,7 +189,7 @@ int bn_from_mont_fixed_top(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
     BN_CTX_start(ctx);
     t1 = BN_CTX_get(ctx);
     t2 = BN_CTX_get(ctx);
-    if (t1 == NULL || t2 == NULL)
+    if (t2 == NULL)
         goto err;
 
     if (!BN_copy(t1, a))
@@ -215,7 +216,7 @@ int bn_from_mont_fixed_top(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
  err:
     BN_CTX_end(ctx);
 #endif                          /* MONT_WORD */
-    return (retn);
+    return retn;
 }
 
 int bn_to_mont_fixed_top(BIGNUM *r, const BIGNUM *a, BN_MONT_CTX *mont,
@@ -228,20 +229,22 @@ BN_MONT_CTX *BN_MONT_CTX_new(void)
 {
     BN_MONT_CTX *ret;
 
-    if ((ret = OPENSSL_malloc(sizeof(*ret))) == NULL)
-        return (NULL);
+    if ((ret = OPENSSL_malloc(sizeof(*ret))) == NULL) {
+        BNerr(BN_F_BN_MONT_CTX_NEW, ERR_R_MALLOC_FAILURE);
+        return NULL;
+    }
 
     BN_MONT_CTX_init(ret);
     ret->flags = BN_FLG_MALLOCED;
-    return (ret);
+    return ret;
 }
 
 void BN_MONT_CTX_init(BN_MONT_CTX *ctx)
 {
     ctx->ri = 0;
-    bn_init(&(ctx->RR));
-    bn_init(&(ctx->N));
-    bn_init(&(ctx->Ni));
+    bn_init(&ctx->RR);
+    bn_init(&ctx->N);
+    bn_init(&ctx->Ni);
     ctx->n0[0] = ctx->n0[1] = 0;
     ctx->flags = 0;
 }
@@ -250,10 +253,9 @@ void BN_MONT_CTX_free(BN_MONT_CTX *mont)
 {
     if (mont == NULL)
         return;
-
-    BN_clear_free(&(mont->RR));
-    BN_clear_free(&(mont->N));
-    BN_clear_free(&(mont->Ni));
+    BN_clear_free(&mont->RR);
+    BN_clear_free(&mont->N);
+    BN_clear_free(&mont->Ni);
     if (mont->flags & BN_FLG_MALLOCED)
         OPENSSL_free(mont);
 }
@@ -409,7 +411,7 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
 BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from)
 {
     if (to == from)
-        return (to);
+        return to;
 
     if (!BN_copy(&(to->RR), &(from->RR)))
         return NULL;
@@ -420,7 +422,7 @@ BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from)
     to->ri = from->ri;
     to->n0[0] = from->n0[0];
     to->n0[1] = from->n0[1];
-    return (to);
+    return to;
 }
 
 BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, CRYPTO_RWLOCK *lock,
diff --git a/deps/openssl/openssl/crypto/bn/bn_mul.c b/deps/openssl/openssl/crypto/bn/bn_mul.c
index 237d7df106..5eda65cfbb 100644
--- a/deps/openssl/openssl/crypto/bn/bn_mul.c
+++ b/deps/openssl/openssl/crypto/bn/bn_mul.c
@@ -154,170 +154,6 @@ BN_ULONG bn_sub_part_words(BN_ULONG *r,
 }
 #endif
 
-BN_ULONG bn_add_part_words(BN_ULONG *r,
-                           const BN_ULONG *a, const BN_ULONG *b,
-                           int cl, int dl)
-{
-    BN_ULONG c, l, t;
-
-    assert(cl >= 0);
-    c = bn_add_words(r, a, b, cl);
-
-    if (dl == 0)
-        return c;
-
-    r += cl;
-    a += cl;
-    b += cl;
-
-    if (dl < 0) {
-        int save_dl = dl;
-        while (c) {
-            l = (c + b[0]) & BN_MASK2;
-            c = (l < c);
-            r[0] = l;
-            if (++dl >= 0)
-                break;
-
-            l = (c + b[1]) & BN_MASK2;
-            c = (l < c);
-            r[1] = l;
-            if (++dl >= 0)
-                break;
-
-            l = (c + b[2]) & BN_MASK2;
-            c = (l < c);
-            r[2] = l;
-            if (++dl >= 0)
-                break;
-
-            l = (c + b[3]) & BN_MASK2;
-            c = (l < c);
-            r[3] = l;
-            if (++dl >= 0)
-                break;
-
-            save_dl = dl;
-            b += 4;
-            r += 4;
-        }
-        if (dl < 0) {
-            if (save_dl < dl) {
-                switch (dl - save_dl) {
-                case 1:
-                    r[1] = b[1];
-                    if (++dl >= 0)
-                        break;
-                    /* fall thru */
-                case 2:
-                    r[2] = b[2];
-                    if (++dl >= 0)
-                        break;
-                    /* fall thru */
-                case 3:
-                    r[3] = b[3];
-                    if (++dl >= 0)
-                        break;
-                }
-                b += 4;
-                r += 4;
-            }
-        }
-        if (dl < 0) {
-            for (;;) {
-                r[0] = b[0];
-                if (++dl >= 0)
-                    break;
-                r[1] = b[1];
-                if (++dl >= 0)
-                    break;
-                r[2] = b[2];
-                if (++dl >= 0)
-                    break;
-                r[3] = b[3];
-                if (++dl >= 0)
-                    break;
-
-                b += 4;
-                r += 4;
-            }
-        }
-    } else {
-        int save_dl = dl;
-        while (c) {
-            t = (a[0] + c) & BN_MASK2;
-            c = (t < c);
-            r[0] = t;
-            if (--dl <= 0)
-                break;
-
-            t = (a[1] + c) & BN_MASK2;
-            c = (t < c);
-            r[1] = t;
-            if (--dl <= 0)
-                break;
-
-            t = (a[2] + c) & BN_MASK2;
-            c = (t < c);
-            r[2] = t;
-            if (--dl <= 0)
-                break;
-
-            t = (a[3] + c) & BN_MASK2;
-            c = (t < c);
-            r[3] = t;
-            if (--dl <= 0)
-                break;
-
-            save_dl = dl;
-            a += 4;
-            r += 4;
-        }
-        if (dl > 0) {
-            if (save_dl > dl) {
-                switch (save_dl - dl) {
-                case 1:
-                    r[1] = a[1];
-                    if (--dl <= 0)
-                        break;
-                    /* fall thru */
-                case 2:
-                    r[2] = a[2];
-                    if (--dl <= 0)
-                        break;
-                    /* fall thru */
-                case 3:
-                    r[3] = a[3];
-                    if (--dl <= 0)
-                        break;
-                }
-                a += 4;
-                r += 4;
-            }
-        }
-        if (dl > 0) {
-            for (;;) {
-                r[0] = a[0];
-                if (--dl <= 0)
-                    break;
-                r[1] = a[1];
-                if (--dl <= 0)
-                    break;
-                r[2] = a[2];
-                if (--dl <= 0)
-                    break;
-                r[3] = a[3];
-                if (--dl <= 0)
-                    break;
-
-                a += 4;
-                r += 4;
-            }
-        }
-    }
-    return c;
-}
-
 #ifdef BN_RECURSION
 /*
  * Karatsuba recursive multiplication algorithm (cf. Knuth, The Art of
@@ -505,7 +341,6 @@ void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
         bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
         break;
     case -3:
-        /* break; */
     case -2:
         bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
         bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
@@ -514,14 +349,12 @@ void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
     case -1:
     case 0:
     case 1:
-        /* break; */
     case 2:
         bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
         bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
         neg = 1;
         break;
     case 3:
-        /* break; */
     case 4:
         bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
         bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
@@ -659,176 +492,6 @@ void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
         bn_add_words(&(r[n]), &(r[n]), &(t[n]), n);
     }
 }
-
-/*-
- * a and b must be the same size, which is n2.
- * r needs to be n2 words and t needs to be n2*2
- * l is the low words of the output.
- * t needs to be n2*3
- */
-void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
-                 BN_ULONG *t)
-{
-    int i, n;
-    int c1, c2;
-    int neg, oneg, zero;
-    BN_ULONG ll, lc, *lp, *mp;
-
-    n = n2 / 2;
-
-    /* Calculate (al-ah)*(bh-bl) */
-    neg = zero = 0;
-    c1 = bn_cmp_words(&(a[0]), &(a[n]), n);
-    c2 = bn_cmp_words(&(b[n]), &(b[0]), n);
-    switch (c1 * 3 + c2) {
-    case -4:
-        bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n);
-        bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n);
-        break;
-    case -3:
-        zero = 1;
-        break;
-    case -2:
-        bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n);
-        bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n);
-        neg = 1;
-        break;
-    case -1:
-    case 0:
-    case 1:
-        zero = 1;
-        break;
-    case 2:
-        bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n);
-        bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n);
-        neg = 1;
-        break;
-    case 3:
-        zero = 1;
-        break;
-    case 4:
-        bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n);
-        bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n);
-        break;
-    }
-
-    oneg = neg;
-    /* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
-    /* r[10] = (a[1]*b[1]) */
-# ifdef BN_MUL_COMBA
-    if (n == 8) {
-        bn_mul_comba8(&(t[0]), &(r[0]), &(r[n]));
-        bn_mul_comba8(r, &(a[n]), &(b[n]));
-    } else
-# endif
-    {
-        bn_mul_recursive(&(t[0]), &(r[0]), &(r[n]), n, 0, 0, &(t[n2]));
-        bn_mul_recursive(r, &(a[n]), &(b[n]), n, 0, 0, &(t[n2]));
-    }
-
-    /*-
-     * s0 == low(al*bl)
-     * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
-     * We know s0 and s1 so the only unknown is high(al*bl)
-     * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
-     * high(al*bl) == s1 - (r[0]+l[0]+t[0])
-     */
-    if (l != NULL) {
-        lp = &(t[n2 + n]);
-        bn_add_words(lp, &(r[0]), &(l[0]), n);
-    } else {
-        lp = &(r[0]);
-    }
-
-    if (neg)
-        neg = (int)(bn_sub_words(&(t[n2]), lp, &(t[0]), n));
-    else {
-        bn_add_words(&(t[n2]), lp, &(t[0]), n);
-        neg = 0;
-    }
-
-    if (l != NULL) {
-        bn_sub_words(&(t[n2 + n]), &(l[n]), &(t[n2]), n);
-    } else {
-        lp = &(t[n2 + n]);
-        mp = &(t[n2]);
-        for (i = 0; i < n; i++)
-            lp[i] = ((~mp[i]) + 1) & BN_MASK2;
-    }
-
-    /*-
-     * s[0] = low(al*bl)
-     * t[3] = high(al*bl)
-     * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
-     * r[10] = (a[1]*b[1])
-     */
-    /*-
-     * R[10] = al*bl
-     * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
-     * R[32] = ah*bh
-     */
-    /*-
-     * R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
-     * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
-     * R[3]=r[1]+(carry/borrow)
-     */
-    if (l != NULL) {
-        lp = &(t[n2]);
-        c1 = (int)(bn_add_words(lp, &(t[n2 + n]), &(l[0]), n));
-    } else {
-        lp = &(t[n2 + n]);
-        c1 = 0;
-    }
-    c1 += (int)(bn_add_words(&(t[n2]), lp, &(r[0]), n));
-    if (oneg)
-        c1 -= (int)(bn_sub_words(&(t[n2]), &(t[n2]), &(t[0]), n));
-    else
-        c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), &(t[0]), n));
-
-    c2 = (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n2 + n]), n));
-    c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(r[n]), n));
-    if (oneg)
-        c2 -= (int)(bn_sub_words(&(r[0]), &(r[0]), &(t[n]), n));
-    else
-        c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n]), n));
-
-    if (c1 != 0) {              /* Add starting at r[0], could be +ve or -ve */
-        i = 0;
-        if (c1 > 0) {
-            lc = c1;
-            do {
-                ll = (r[i] + lc) & BN_MASK2;
-                r[i++] = ll;
-                lc = (lc > ll);
-            } while (lc);
-        } else {
-            lc = -c1;
-            do {
-                ll = r[i];
-                r[i++] = (ll - lc) & BN_MASK2;
-                lc = (lc > ll);
-            } while (lc);
-        }
-    }
-    if (c2 != 0) {              /* Add starting at r[1] */
-        i = n;
-        if (c2 > 0) {
-            lc = c2;
-            do {
-                ll = (r[i] + lc) & BN_MASK2;
-                r[i++] = ll;
-                lc = (lc > ll);
-            } while (lc);
-        } else {
-            lc = -c2;
-            do {
-                ll = r[i];
-                r[i++] = (ll - lc) & BN_MASK2;
-                lc = (lc > ll);
-            } while (lc);
-        }
-    }
-}
 #endif                          /* BN_RECURSION */
 
 int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
@@ -863,7 +526,7 @@ int bn_mul_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
 
     if ((al == 0) || (bl == 0)) {
         BN_zero(r);
-        return (1);
+        return 1;
     }
     top = al + bl;
 
@@ -953,7 +616,7 @@ int bn_mul_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
  err:
     bn_check_top(r);
     BN_CTX_end(ctx);
-    return (ret);
+    return ret;
 }
 
 void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
diff --git a/deps/openssl/openssl/crypto/bn/bn_nist.c b/deps/openssl/openssl/crypto/bn/bn_nist.c
index 53598f97ef..dcdd321c66 100644
--- a/deps/openssl/openssl/crypto/bn/bn_nist.c
+++ b/deps/openssl/openssl/crypto/bn/bn_nist.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2002-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2002-2018 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -254,7 +254,7 @@ static void nist_cp_bn_0(BN_ULONG *dst, const BN_ULONG *src, int top, int max)
     int i;
 
 #ifdef BN_DEBUG
-    OPENSSL_assert(top <= max);
+    (void)ossl_assert(top <= max);
 #endif
     for (i = 0; i < top; i++)
         dst[i] = src[i];
diff --git a/deps/openssl/openssl/crypto/bn/bn_prime.c b/deps/openssl/openssl/crypto/bn/bn_prime.c
index 616389cfa6..b91b31b1f3 100644
--- a/deps/openssl/openssl/crypto/bn/bn_prime.c
+++ b/deps/openssl/openssl/crypto/bn/bn_prime.c
@@ -1,7 +1,5 @@
 /*
- * WARNING: do not edit!
- * Generated by crypto/bn/bn_prime.pl
- * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -29,53 +27,6 @@ static int probable_prime_dh_safe(BIGNUM *rnd, int bits,
                                   const BIGNUM *add, const BIGNUM *rem,
                                   BN_CTX *ctx);
 
-static const int prime_offsets[480] = {
-    13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
-    89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163,
-    167, 169, 173, 179, 181, 191, 193, 197, 199, 211, 221, 223, 227, 229,
-    233, 239, 241, 247, 251, 257, 263, 269, 271, 277, 281, 283, 289, 293,
-    299, 307, 311, 313, 317, 323, 331, 337, 347, 349, 353, 359, 361, 367,
-    373, 377, 379, 383, 389, 391, 397, 401, 403, 409, 419, 421, 431, 433,
-    437, 439, 443, 449, 457, 461, 463, 467, 479, 481, 487, 491, 493, 499,
-    503, 509, 521, 523, 527, 529, 533, 541, 547, 551, 557, 559, 563, 569,
-    571, 577, 587, 589, 593, 599, 601, 607, 611, 613, 617, 619, 629, 631,
-    641, 643, 647, 653, 659, 661, 667, 673, 677, 683, 689, 691, 697, 701,
-    703, 709, 713, 719, 727, 731, 733, 739, 743, 751, 757, 761, 767, 769,
-    773, 779, 787, 793, 797, 799, 809, 811, 817, 821, 823, 827, 829, 839,
-    841, 851, 853, 857, 859, 863, 871, 877, 881, 883, 887, 893, 899, 901,
-    907, 911, 919, 923, 929, 937, 941, 943, 947, 949, 953, 961, 967, 971,
-    977, 983, 989, 991, 997, 1003, 1007, 1009, 1013, 1019, 1021, 1027, 1031,
-    1033, 1037, 1039, 1049, 1051, 1061, 1063, 1069, 1073, 1079, 1081, 1087,
-    1091, 1093, 1097, 1103, 1109, 1117, 1121, 1123, 1129, 1139, 1147, 1151,
-    1153, 1157, 1159, 1163, 1171, 1181, 1187, 1189, 1193, 1201, 1207, 1213,
-    1217, 1219, 1223, 1229, 1231, 1237, 1241, 1247, 1249, 1259, 1261, 1271,
-    1273, 1277, 1279, 1283, 1289, 1291, 1297, 1301, 1303, 1307, 1313, 1319,
-    1321, 1327, 1333, 1339, 1343, 1349, 1357, 1361, 1363, 1367, 1369, 1373,
-    1381, 1387, 1391, 1399, 1403, 1409, 1411, 1417, 1423, 1427, 1429, 1433,
-    1439, 1447, 1451, 1453, 1457, 1459, 1469, 1471, 1481, 1483, 1487, 1489,
-    1493, 1499, 1501, 1511, 1513, 1517, 1523, 1531, 1537, 1541, 1543, 1549,
-    1553, 1559, 1567, 1571, 1577, 1579, 1583, 1591, 1597, 1601, 1607, 1609,
-    1613, 1619, 1621, 1627, 1633, 1637, 1643, 1649, 1651, 1657, 1663, 1667,
-    1669, 1679, 1681, 1691, 1693, 1697, 1699, 1703, 1709, 1711, 1717, 1721,
-    1723, 1733, 1739, 1741, 1747, 1751, 1753, 1759, 1763, 1769, 1777, 1781,
-    1783, 1787, 1789, 1801, 1807, 1811, 1817, 1819, 1823, 1829, 1831, 1843,
-    1847, 1849, 1853, 1861, 1867, 1871, 1873, 1877, 1879, 1889, 1891, 1901,
-    1907, 1909, 1913, 1919, 1921, 1927, 1931, 1933, 1937, 1943, 1949, 1951,
-    1957, 1961, 1963, 1973, 1979, 1987, 1993, 1997, 1999, 2003, 2011, 2017,
-    2021, 2027, 2029, 2033, 2039, 2041, 2047, 2053, 2059, 2063, 2069, 2071,
-    2077, 2081, 2083, 2087, 2089, 2099, 2111, 2113, 2117, 2119, 2129, 2131,
-    2137, 2141, 2143, 2147, 2153, 2159, 2161, 2171, 2173, 2179, 2183, 2197,
-    2201, 2203, 2207, 2209, 2213, 2221, 2227, 2231, 2237, 2239, 2243, 2249,
-    2251, 2257, 2263, 2267, 2269, 2273, 2279, 2281, 2287, 2291, 2293, 2297,
-    2309, 2311
-};
-
-static const int prime_offset_count = 480;
-static const int prime_multiplier = 2310;
-static const int prime_multiplier_bits = 11; /* 2^|prime_multiplier_bits| <=
-                                              * |prime_multiplier| */
-static const int first_prime_index = 5;
-
 int BN_GENCB_call(BN_GENCB *cb, int a, int b)
 {
     /* No callback means continue */
@@ -127,7 +78,7 @@ int BN_generate_prime_ex(BIGNUM *ret, int bits, int safe,
         goto err;
     BN_CTX_start(ctx);
     t = BN_CTX_get(ctx);
-    if (!t)
+    if (t == NULL)
         goto err;
  loop:
     /* make a random number and set the top and bottom bits */
@@ -203,26 +154,28 @@ int BN_is_prime_fasttest_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed,
     int i, j, ret = -1;
     int k;
     BN_CTX *ctx = NULL;
-    BIGNUM *A1, *A1_odd, *check; /* taken from ctx */
+    BIGNUM *A1, *A1_odd, *A3, *check; /* taken from ctx */
     BN_MONT_CTX *mont = NULL;
 
-    if (BN_cmp(a, BN_value_one()) <= 0)
+    /* Take care of the really small primes 2 & 3 */
+    if (BN_is_word(a, 2) || BN_is_word(a, 3))
+        return 1;
+
+    /* Check odd and bigger than 1 */
+    if (!BN_is_odd(a) || BN_cmp(a, BN_value_one()) <= 0)
         return 0;
 
     if (checks == BN_prime_checks)
         checks = BN_prime_checks_for_size(BN_num_bits(a));
 
     /* first look for small factors */
-    if (!BN_is_odd(a))
-        /* a is even => a is prime if and only if a == 2 */
-        return BN_is_word(a, 2);
     if (do_trial_division) {
         for (i = 1; i < NUMPRIMES; i++) {
             BN_ULONG mod = BN_mod_word(a, primes[i]);
             if (mod == (BN_ULONG)-1)
                 goto err;
             if (mod == 0)
-                return 0;
+                return BN_is_word(a, primes[i]);
         }
         if (!BN_GENCB_call(cb, 1, -1))
             goto err;
@@ -235,20 +188,18 @@ int BN_is_prime_fasttest_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed,
     BN_CTX_start(ctx);
 
     A1 = BN_CTX_get(ctx);
+    A3 = BN_CTX_get(ctx);
     A1_odd = BN_CTX_get(ctx);
     check = BN_CTX_get(ctx);
     if (check == NULL)
         goto err;
 
     /* compute A1 := a - 1 */
-    if (!BN_copy(A1, a))
+    if (!BN_copy(A1, a) || !BN_sub_word(A1, 1))
         goto err;
-    if (!BN_sub_word(A1, 1))
+    /* compute A3 := a - 3 */
+    if (!BN_copy(A3, a) || !BN_sub_word(A3, 3))
         goto err;
-    if (BN_is_zero(A1)) {
-        ret = 0;
-        goto err;
-    }
 
     /* write  A1  as  A1_odd * 2^k */
     k = 1;
@@ -265,11 +216,9 @@ int BN_is_prime_fasttest_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed,
         goto err;
 
     for (i = 0; i < checks; i++) {
-        if (!BN_pseudo_rand_range(check, A1))
+        /* 1 < check < a-1 */
+        if (!BN_priv_rand_range(check, A3) || !BN_add_word(check, 2))
             goto err;
-        if (!BN_add_word(check, 1))
-            goto err;
-        /* now 1 <= check < a */
 
         j = witness(check, a, A1, A1_odd, k, ctx, mont);
         if (j == -1)
@@ -290,83 +239,6 @@ int BN_is_prime_fasttest_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed,
     }
     BN_MONT_CTX_free(mont);
 
-    return (ret);
-}
-
-int bn_probable_prime_dh_retry(BIGNUM *rnd, int bits, BN_CTX *ctx)
-{
-    int i;
-    int ret = 0;
-
- loop:
-    if (!BN_rand(rnd, bits, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ODD))
-        goto err;
-
-    /* we now have a random number 'rand' to test. */
-
-    for (i = 1; i < NUMPRIMES; i++) {
-        /* check that rnd is a prime */
-        BN_ULONG mod = BN_mod_word(rnd, (BN_ULONG)primes[i]);
-        if (mod == (BN_ULONG)-1)
-            goto err;
-        if (mod <= 1) {
-            goto loop;
-        }
-    }
-    ret = 1;
-
- err:
-    bn_check_top(rnd);
-    return (ret);
-}
-
-int bn_probable_prime_dh_coprime(BIGNUM *rnd, int bits, BN_CTX *ctx)
-{
-    int i;
-    BIGNUM *offset_index;
-    BIGNUM *offset_count;
-    int ret = 0;
-
-    OPENSSL_assert(bits > prime_multiplier_bits);
-
-    BN_CTX_start(ctx);
-    if ((offset_index = BN_CTX_get(ctx)) == NULL)
-        goto err;
-    if ((offset_count = BN_CTX_get(ctx)) == NULL)
-        goto err;
-
-    if (!BN_add_word(offset_count, prime_offset_count))
-        goto err;
-
- loop:
-    if (!BN_rand(rnd, bits - prime_multiplier_bits,
-                 BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ODD))
-        goto err;
-    if (BN_is_bit_set(rnd, bits))
-        goto loop;
-    if (!BN_rand_range(offset_index, offset_count))
-        goto err;
-
-    if (!BN_mul_word(rnd, prime_multiplier)
-        || !BN_add_word(rnd, prime_offsets[BN_get_word(offset_index)]))
-        goto err;
-
-    /* we now have a random number 'rand' to test. */
-
-    /* skip coprimes */
-    for (i = first_prime_index; i < NUMPRIMES; i++) {
-        /* check that rnd is a prime */
-        BN_ULONG mod = BN_mod_word(rnd, (BN_ULONG)primes[i]);
-        if (mod == (BN_ULONG)-1)
-            goto err;
-        if (mod <= 1)
-            goto loop;
-    }
-    ret = 1;
-
- err:
-    BN_CTX_end(ctx);
-    bn_check_top(rnd);
     return ret;
 }
 
@@ -405,8 +277,9 @@ static int probable_prime(BIGNUM *rnd, int bits, prime_t *mods)
     char is_single_word = bits <= BN_BITS2;
 
  again:
-    if (!BN_rand(rnd, bits, BN_RAND_TOP_TWO, BN_RAND_BOTTOM_ODD))
-        return (0);
+    /* TODO: Not all primes are private */
+    if (!BN_priv_rand(rnd, bits, BN_RAND_TOP_TWO, BN_RAND_BOTTOM_ODD))
+        return 0;
     /* we now have a random number 'rnd' to test. */
     for (i = 1; i < NUMPRIMES; i++) {
         BN_ULONG mod = BN_mod_word(rnd, (BN_ULONG)primes[i]);
@@ -472,11 +345,11 @@ static int probable_prime(BIGNUM *rnd, int bits, prime_t *mods)
         }
     }
     if (!BN_add_word(rnd, delta))
-        return (0);
+        return 0;
     if (BN_num_bits(rnd) != bits)
         goto again;
     bn_check_top(rnd);
-    return (1);
+    return 1;
 }
 
 int bn_probable_prime_dh(BIGNUM *rnd, int bits,
@@ -525,7 +398,7 @@ int bn_probable_prime_dh(BIGNUM *rnd, int bits,
  err:
     BN_CTX_end(ctx);
     bn_check_top(rnd);
-    return (ret);
+    return ret;
 }
 
 static int probable_prime_dh_safe(BIGNUM *p, int bits, const BIGNUM *padd,
@@ -592,5 +465,5 @@ static int probable_prime_dh_safe(BIGNUM *p, int bits, const BIGNUM *padd,
  err:
     BN_CTX_end(ctx);
     bn_check_top(p);
-    return (ret);
+    return ret;
 }
diff --git a/deps/openssl/openssl/crypto/bn/bn_prime.h b/deps/openssl/openssl/crypto/bn/bn_prime.h
index 41440fa4e1..a64c9630f3 100644
--- a/deps/openssl/openssl/crypto/bn/bn_prime.h
+++ b/deps/openssl/openssl/crypto/bn/bn_prime.h
@@ -2,7 +2,7 @@
  * WARNING: do not edit!
  * Generated by crypto/bn/bn_prime.pl
  *
- * Copyright 1998-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1998-2018 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -14,261 +14,260 @@ typedef unsigned short prime_t;
 # define NUMPRIMES 2048
 
 static const prime_t primes[2048] = {
-
-       2,    3,    5,    7,   11,   13,   17,   19, 
-      23,   29,   31,   37,   41,   43,   47,   53, 
-      59,   61,   67,   71,   73,   79,   83,   89, 
-      97,  101,  103,  107,  109,  113,  127,  131, 
-     137,  139,  149,  151,  157,  163,  167,  173, 
-     179,  181,  191,  193,  197,  199,  211,  223, 
-     227,  229,  233,  239,  241,  251,  257,  263, 
-     269,  271,  277,  281,  283,  293,  307,  311, 
-     313,  317,  331,  337,  347,  349,  353,  359, 
-     367,  373,  379,  383,  389,  397,  401,  409, 
-     419,  421,  431,  433,  439,  443,  449,  457, 
-     461,  463,  467,  479,  487,  491,  499,  503, 
-     509,  521,  523,  541,  547,  557,  563,  569, 
-     571,  577,  587,  593,  599,  601,  607,  613, 
-     617,  619,  631,  641,  643,  647,  653,  659, 
-     661,  673,  677,  683,  691,  701,  709,  719, 
-     727,  733,  739,  743,  751,  757,  761,  769, 
-     773,  787,  797,  809,  811,  821,  823,  827, 
-     829,  839,  853,  857,  859,  863,  877,  881, 
-     883,  887,  907,  911,  919,  929,  937,  941, 
-     947,  953,  967,  971,  977,  983,  991,  997, 
-    1009, 1013, 1019, 1021, 1031, 1033, 1039, 1049, 
-    1051, 1061, 1063, 1069, 1087, 1091, 1093, 1097, 
-    1103, 1109, 1117, 1123, 1129, 1151, 1153, 1163, 
-    1171, 1181, 1187, 1193, 1201, 1213, 1217, 1223, 
-    1229, 1231, 1237, 1249, 1259, 1277, 1279, 1283, 
-    1289, 1291, 1297, 1301, 1303, 1307, 1319, 1321, 
-    1327, 1361, 1367, 1373, 1381, 1399, 1409, 1423, 
-    1427, 1429, 1433, 1439, 1447, 1451, 1453, 1459, 
-    1471, 1481, 1483, 1487, 1489, 1493, 1499, 1511, 
-    1523, 1531, 1543, 1549, 1553, 1559, 1567, 1571, 
-    1579, 1583, 1597, 1601, 1607, 1609, 1613, 1619, 
-    1621, 1627, 1637, 1657, 1663, 1667, 1669, 1693, 
-    1697, 1699, 1709, 1721, 1723, 1733, 1741, 1747, 
-    1753, 1759, 1777, 1783, 1787, 1789, 1801, 1811, 
-    1823, 1831, 1847, 1861, 1867, 1871, 1873, 1877, 
-    1879, 1889, 1901, 1907, 1913, 1931, 1933, 1949, 
-    1951, 1973, 1979, 1987, 1993, 1997, 1999, 2003, 
-    2011, 2017, 2027, 2029, 2039, 2053, 2063, 2069, 
-    2081, 2083, 2087, 2089, 2099, 2111, 2113, 2129, 
-    2131, 2137, 2141, 2143, 2153, 2161, 2179, 2203, 
-    2207, 2213, 2221, 2237, 2239, 2243, 2251, 2267, 
-    2269, 2273, 2281, 2287, 2293, 2297, 2309, 2311, 
-    2333, 2339, 2341, 2347, 2351, 2357, 2371, 2377, 
-    2381, 2383, 2389, 2393, 2399, 2411, 2417, 2423, 
-    2437, 2441, 2447, 2459, 2467, 2473, 2477, 2503, 
-    2521, 2531, 2539, 2543, 2549, 2551, 2557, 2579, 
-    2591, 2593, 2609, 2617, 2621, 2633, 2647, 2657, 
-    2659, 2663, 2671, 2677, 2683, 2687, 2689, 2693, 
-    2699, 2707, 2711, 2713, 2719, 2729, 2731, 2741, 
-    2749, 2753, 2767, 2777, 2789, 2791, 2797, 2801, 
-    2803, 2819, 2833, 2837, 2843, 2851, 2857, 2861, 
-    2879, 2887, 2897, 2903, 2909, 2917, 2927, 2939, 
-    2953, 2957, 2963, 2969, 2971, 2999, 3001, 3011, 
-    3019, 3023, 3037, 3041, 3049, 3061, 3067, 3079, 
-    3083, 3089, 3109, 3119, 3121, 3137, 3163, 3167, 
-    3169, 3181, 3187, 3191, 3203, 3209, 3217, 3221, 
-    3229, 3251, 3253, 3257, 3259, 3271, 3299, 3301, 
-    3307, 3313, 3319, 3323, 3329, 3331, 3343, 3347, 
-    3359, 3361, 3371, 3373, 3389, 3391, 3407, 3413, 
-    3433, 3449, 3457, 3461, 3463, 3467, 3469, 3491, 
-    3499, 3511, 3517, 3527, 3529, 3533, 3539, 3541, 
-    3547, 3557, 3559, 3571, 3581, 3583, 3593, 3607, 
-    3613, 3617, 3623, 3631, 3637, 3643, 3659, 3671, 
-    3673, 3677, 3691, 3697, 3701, 3709, 3719, 3727, 
-    3733, 3739, 3761, 3767, 3769, 3779, 3793, 3797, 
-    3803, 3821, 3823, 3833, 3847, 3851, 3853, 3863, 
-    3877, 3881, 3889, 3907, 3911, 3917, 3919, 3923, 
-    3929, 3931, 3943, 3947, 3967, 3989, 4001, 4003, 
-    4007, 4013, 4019, 4021, 4027, 4049, 4051, 4057, 
-    4073, 4079, 4091, 4093, 4099, 4111, 4127, 4129, 
-    4133, 4139, 4153, 4157, 4159, 4177, 4201, 4211, 
-    4217, 4219, 4229, 4231, 4241, 4243, 4253, 4259, 
-    4261, 4271, 4273, 4283, 4289, 4297, 4327, 4337, 
-    4339, 4349, 4357, 4363, 4373, 4391, 4397, 4409, 
-    4421, 4423, 4441, 4447, 4451, 4457, 4463, 4481, 
-    4483, 4493, 4507, 4513, 4517, 4519, 4523, 4547, 
-    4549, 4561, 4567, 4583, 4591, 4597, 4603, 4621, 
-    4637, 4639, 4643, 4649, 4651, 4657, 4663, 4673, 
-    4679, 4691, 4703, 4721, 4723, 4729, 4733, 4751, 
-    4759, 4783, 4787, 4789, 4793, 4799, 4801, 4813, 
-    4817, 4831, 4861, 4871, 4877, 4889, 4903, 4909, 
-    4919, 4931, 4933, 4937, 4943, 4951, 4957, 4967, 
-    4969, 4973, 4987, 4993, 4999, 5003, 5009, 5011, 
-    5021, 5023, 5039, 5051, 5059, 5077, 5081, 5087, 
-    5099, 5101, 5107, 5113, 5119, 5147, 5153, 5167, 
-    5171, 5179, 5189, 5197, 5209, 5227, 5231, 5233, 
-    5237, 5261, 5273, 5279, 5281, 5297, 5303, 5309, 
-    5323, 5333, 5347, 5351, 5381, 5387, 5393, 5399, 
-    5407, 5413, 5417, 5419, 5431, 5437, 5441, 5443, 
-    5449, 5471, 5477, 5479, 5483, 5501, 5503, 5507, 
-    5519, 5521, 5527, 5531, 5557, 5563, 5569, 5573, 
-    5581, 5591, 5623, 5639, 5641, 5647, 5651, 5653, 
-    5657, 5659, 5669, 5683, 5689, 5693, 5701, 5711, 
-    5717, 5737, 5741, 5743, 5749, 5779, 5783, 5791, 
-    5801, 5807, 5813, 5821, 5827, 5839, 5843, 5849, 
-    5851, 5857, 5861, 5867, 5869, 5879, 5881, 5897, 
-    5903, 5923, 5927, 5939, 5953, 5981, 5987, 6007, 
-    6011, 6029, 6037, 6043, 6047, 6053, 6067, 6073, 
-    6079, 6089, 6091, 6101, 6113, 6121, 6131, 6133, 
-    6143, 6151, 6163, 6173, 6197, 6199, 6203, 6211, 
-    6217, 6221, 6229, 6247, 6257, 6263, 6269, 6271, 
-    6277, 6287, 6299, 6301, 6311, 6317, 6323, 6329, 
-    6337, 6343, 6353, 6359, 6361, 6367, 6373, 6379, 
-    6389, 6397, 6421, 6427, 6449, 6451, 6469, 6473, 
-    6481, 6491, 6521, 6529, 6547, 6551, 6553, 6563, 
-    6569, 6571, 6577, 6581, 6599, 6607, 6619, 6637, 
-    6653, 6659, 6661, 6673, 6679, 6689, 6691, 6701, 
-    6703, 6709, 6719, 6733, 6737, 6761, 6763, 6779, 
-    6781, 6791, 6793, 6803, 6823, 6827, 6829, 6833, 
-    6841, 6857, 6863, 6869, 6871, 6883, 6899, 6907, 
-    6911, 6917, 6947, 6949, 6959, 6961, 6967, 6971, 
-    6977, 6983, 6991, 6997, 7001, 7013, 7019, 7027, 
-    7039, 7043, 7057, 7069, 7079, 7103, 7109, 7121, 
-    7127, 7129, 7151, 7159, 7177, 7187, 7193, 7207, 
-    7211, 7213, 7219, 7229, 7237, 7243, 7247, 7253, 
-    7283, 7297, 7307, 7309, 7321, 7331, 7333, 7349, 
-    7351, 7369, 7393, 7411, 7417, 7433, 7451, 7457, 
-    7459, 7477, 7481, 7487, 7489, 7499, 7507, 7517, 
-    7523, 7529, 7537, 7541, 7547, 7549, 7559, 7561, 
-    7573, 7577, 7583, 7589, 7591, 7603, 7607, 7621, 
-    7639, 7643, 7649, 7669, 7673, 7681, 7687, 7691, 
-    7699, 7703, 7717, 7723, 7727, 7741, 7753, 7757, 
-    7759, 7789, 7793, 7817, 7823, 7829, 7841, 7853, 
-    7867, 7873, 7877, 7879, 7883, 7901, 7907, 7919, 
-    7927, 7933, 7937, 7949, 7951, 7963, 7993, 8009, 
-    8011, 8017, 8039, 8053, 8059, 8069, 8081, 8087, 
-    8089, 8093, 8101, 8111, 8117, 8123, 8147, 8161, 
-    8167, 8171, 8179, 8191, 8209, 8219, 8221, 8231, 
-    8233, 8237, 8243, 8263, 8269, 8273, 8287, 8291, 
-    8293, 8297, 8311, 8317, 8329, 8353, 8363, 8369, 
-    8377, 8387, 8389, 8419, 8423, 8429, 8431, 8443, 
-    8447, 8461, 8467, 8501, 8513, 8521, 8527, 8537, 
-    8539, 8543, 8563, 8573, 8581, 8597, 8599, 8609, 
-    8623, 8627, 8629, 8641, 8647, 8663, 8669, 8677, 
-    8681, 8689, 8693, 8699, 8707, 8713, 8719, 8731, 
-    8737, 8741, 8747, 8753, 8761, 8779, 8783, 8803, 
-    8807, 8819, 8821, 8831, 8837, 8839, 8849, 8861, 
-    8863, 8867, 8887, 8893, 8923, 8929, 8933, 8941, 
-    8951, 8963, 8969, 8971, 8999, 9001, 9007, 9011, 
-    9013, 9029, 9041, 9043, 9049, 9059, 9067, 9091, 
-    9103, 9109, 9127, 9133, 9137, 9151, 9157, 9161, 
-    9173, 9181, 9187, 9199, 9203, 9209, 9221, 9227, 
-    9239, 9241, 9257, 9277, 9281, 9283, 9293, 9311, 
-    9319, 9323, 9337, 9341, 9343, 9349, 9371, 9377, 
-    9391, 9397, 9403, 9413, 9419, 9421, 9431, 9433, 
-    9437, 9439, 9461, 9463, 9467, 9473, 9479, 9491, 
-    9497, 9511, 9521, 9533, 9539, 9547, 9551, 9587, 
-    9601, 9613, 9619, 9623, 9629, 9631, 9643, 9649, 
-    9661, 9677, 9679, 9689, 9697, 9719, 9721, 9733, 
-    9739, 9743, 9749, 9767, 9769, 9781, 9787, 9791, 
-    9803, 9811, 9817, 9829, 9833, 9839, 9851, 9857, 
-    9859, 9871, 9883, 9887, 9901, 9907, 9923, 9929, 
-    9931, 9941, 9949, 9967, 9973, 10007, 10009, 10037, 
-    10039, 10061, 10067, 10069, 10079, 10091, 10093, 10099, 
-    10103, 10111, 10133, 10139, 10141, 10151, 10159, 10163, 
-    10169, 10177, 10181, 10193, 10211, 10223, 10243, 10247, 
-    10253, 10259, 10267, 10271, 10273, 10289, 10301, 10303, 
-    10313, 10321, 10331, 10333, 10337, 10343, 10357, 10369, 
-    10391, 10399, 10427, 10429, 10433, 10453, 10457, 10459, 
-    10463, 10477, 10487, 10499, 10501, 10513, 10529, 10531, 
-    10559, 10567, 10589, 10597, 10601, 10607, 10613, 10627, 
-    10631, 10639, 10651, 10657, 10663, 10667, 10687, 10691, 
-    10709, 10711, 10723, 10729, 10733, 10739, 10753, 10771, 
-    10781, 10789, 10799, 10831, 10837, 10847, 10853, 10859, 
-    10861, 10867, 10883, 10889, 10891, 10903, 10909, 10937, 
-    10939, 10949, 10957, 10973, 10979, 10987, 10993, 11003, 
-    11027, 11047, 11057, 11059, 11069, 11071, 11083, 11087, 
-    11093, 11113, 11117, 11119, 11131, 11149, 11159, 11161, 
-    11171, 11173, 11177, 11197, 11213, 11239, 11243, 11251, 
-    11257, 11261, 11273, 11279, 11287, 11299, 11311, 11317, 
-    11321, 11329, 11351, 11353, 11369, 11383, 11393, 11399, 
-    11411, 11423, 11437, 11443, 11447, 11467, 11471, 11483, 
-    11489, 11491, 11497, 11503, 11519, 11527, 11549, 11551, 
-    11579, 11587, 11593, 11597, 11617, 11621, 11633, 11657, 
-    11677, 11681, 11689, 11699, 11701, 11717, 11719, 11731, 
-    11743, 11777, 11779, 11783, 11789, 11801, 11807, 11813, 
-    11821, 11827, 11831, 11833, 11839, 11863, 11867, 11887, 
-    11897, 11903, 11909, 11923, 11927, 11933, 11939, 11941, 
-    11953, 11959, 11969, 11971, 11981, 11987, 12007, 12011, 
-    12037, 12041, 12043, 12049, 12071, 12073, 12097, 12101, 
-    12107, 12109, 12113, 12119, 12143, 12149, 12157, 12161, 
-    12163, 12197, 12203, 12211, 12227, 12239, 12241, 12251, 
-    12253, 12263, 12269, 12277, 12281, 12289, 12301, 12323, 
-    12329, 12343, 12347, 12373, 12377, 12379, 12391, 12401, 
-    12409, 12413, 12421, 12433, 12437, 12451, 12457, 12473, 
-    12479, 12487, 12491, 12497, 12503, 12511, 12517, 12527, 
-    12539, 12541, 12547, 12553, 12569, 12577, 12583, 12589, 
-    12601, 12611, 12613, 12619, 12637, 12641, 12647, 12653, 
-    12659, 12671, 12689, 12697, 12703, 12713, 12721, 12739, 
-    12743, 12757, 12763, 12781, 12791, 12799, 12809, 12821, 
-    12823, 12829, 12841, 12853, 12889, 12893, 12899, 12907, 
-    12911, 12917, 12919, 12923, 12941, 12953, 12959, 12967, 
-    12973, 12979, 12983, 13001, 13003, 13007, 13009, 13033, 
-    13037, 13043, 13049, 13063, 13093, 13099, 13103, 13109, 
-    13121, 13127, 13147, 13151, 13159, 13163, 13171, 13177, 
-    13183, 13187, 13217, 13219, 13229, 13241, 13249, 13259, 
-    13267, 13291, 13297, 13309, 13313, 13327, 13331, 13337, 
-    13339, 13367, 13381, 13397, 13399, 13411, 13417, 13421, 
-    13441, 13451, 13457, 13463, 13469, 13477, 13487, 13499, 
-    13513, 13523, 13537, 13553, 13567, 13577, 13591, 13597, 
-    13613, 13619, 13627, 13633, 13649, 13669, 13679, 13681, 
-    13687, 13691, 13693, 13697, 13709, 13711, 13721, 13723, 
-    13729, 13751, 13757, 13759, 13763, 13781, 13789, 13799, 
-    13807, 13829, 13831, 13841, 13859, 13873, 13877, 13879, 
-    13883, 13901, 13903, 13907, 13913, 13921, 13931, 13933, 
-    13963, 13967, 13997, 13999, 14009, 14011, 14029, 14033, 
-    14051, 14057, 14071, 14081, 14083, 14087, 14107, 14143, 
-    14149, 14153, 14159, 14173, 14177, 14197, 14207, 14221, 
-    14243, 14249, 14251, 14281, 14293, 14303, 14321, 14323, 
-    14327, 14341, 14347, 14369, 14387, 14389, 14401, 14407, 
-    14411, 14419, 14423, 14431, 14437, 14447, 14449, 14461, 
-    14479, 14489, 14503, 14519, 14533, 14537, 14543, 14549, 
-    14551, 14557, 14561, 14563, 14591, 14593, 14621, 14627, 
-    14629, 14633, 14639, 14653, 14657, 14669, 14683, 14699, 
-    14713, 14717, 14723, 14731, 14737, 14741, 14747, 14753, 
-    14759, 14767, 14771, 14779, 14783, 14797, 14813, 14821, 
-    14827, 14831, 14843, 14851, 14867, 14869, 14879, 14887, 
-    14891, 14897, 14923, 14929, 14939, 14947, 14951, 14957, 
-    14969, 14983, 15013, 15017, 15031, 15053, 15061, 15073, 
-    15077, 15083, 15091, 15101, 15107, 15121, 15131, 15137, 
-    15139, 15149, 15161, 15173, 15187, 15193, 15199, 15217, 
-    15227, 15233, 15241, 15259, 15263, 15269, 15271, 15277, 
-    15287, 15289, 15299, 15307, 15313, 15319, 15329, 15331, 
-    15349, 15359, 15361, 15373, 15377, 15383, 15391, 15401, 
-    15413, 15427, 15439, 15443, 15451, 15461, 15467, 15473, 
-    15493, 15497, 15511, 15527, 15541, 15551, 15559, 15569, 
-    15581, 15583, 15601, 15607, 15619, 15629, 15641, 15643, 
-    15647, 15649, 15661, 15667, 15671, 15679, 15683, 15727, 
-    15731, 15733, 15737, 15739, 15749, 15761, 15767, 15773, 
-    15787, 15791, 15797, 15803, 15809, 15817, 15823, 15859, 
-    15877, 15881, 15887, 15889, 15901, 15907, 15913, 15919, 
-    15923, 15937, 15959, 15971, 15973, 15991, 16001, 16007, 
-    16033, 16057, 16061, 16063, 16067, 16069, 16073, 16087, 
-    16091, 16097, 16103, 16111, 16127, 16139, 16141, 16183, 
-    16187, 16189, 16193, 16217, 16223, 16229, 16231, 16249, 
-    16253, 16267, 16273, 16301, 16319, 16333, 16339, 16349, 
-    16361, 16363, 16369, 16381, 16411, 16417, 16421, 16427, 
-    16433, 16447, 16451, 16453, 16477, 16481, 16487, 16493, 
-    16519, 16529, 16547, 16553, 16561, 16567, 16573, 16603, 
-    16607, 16619, 16631, 16633, 16649, 16651, 16657, 16661, 
-    16673, 16691, 16693, 16699, 16703, 16729, 16741, 16747, 
-    16759, 16763, 16787, 16811, 16823, 16829, 16831, 16843, 
-    16871, 16879, 16883, 16889, 16901, 16903, 16921, 16927, 
-    16931, 16937, 16943, 16963, 16979, 16981, 16987, 16993, 
-    17011, 17021, 17027, 17029, 17033, 17041, 17047, 17053, 
-    17077, 17093, 17099, 17107, 17117, 17123, 17137, 17159, 
-    17167, 17183, 17189, 17191, 17203, 17207, 17209, 17231, 
-    17239, 17257, 17291, 17293, 17299, 17317, 17321, 17327, 
-    17333, 17341, 17351, 17359, 17377, 17383, 17387, 17389, 
-    17393, 17401, 17417, 17419, 17431, 17443, 17449, 17467, 
-    17471, 17477, 17483, 17489, 17491, 17497, 17509, 17519, 
-    17539, 17551, 17569, 17573, 17579, 17581, 17597, 17599, 
-    17609, 17623, 17627, 17657, 17659, 17669, 17681, 17683, 
-    17707, 17713, 17729, 17737, 17747, 17749, 17761, 17783, 
-    17789, 17791, 17807, 17827, 17837, 17839, 17851, 17863, 
+        2,     3,     5,     7,    11,    13,    17,    19,
+       23,    29,    31,    37,    41,    43,    47,    53,
+       59,    61,    67,    71,    73,    79,    83,    89,
+       97,   101,   103,   107,   109,   113,   127,   131,
+      137,   139,   149,   151,   157,   163,   167,   173,
+      179,   181,   191,   193,   197,   199,   211,   223,
+      227,   229,   233,   239,   241,   251,   257,   263,
+      269,   271,   277,   281,   283,   293,   307,   311,
+      313,   317,   331,   337,   347,   349,   353,   359,
+      367,   373,   379,   383,   389,   397,   401,   409,
+      419,   421,   431,   433,   439,   443,   449,   457,
+      461,   463,   467,   479,   487,   491,   499,   503,
+      509,   521,   523,   541,   547,   557,   563,   569,
+      571,   577,   587,   593,   599,   601,   607,   613,
+      617,   619,   631,   641,   643,   647,   653,   659,
+      661,   673,   677,   683,   691,   701,   709,   719,
+      727,   733,   739,   743,   751,   757,   761,   769,
+      773,   787,   797,   809,   811,   821,   823,   827,
+      829,   839,   853,   857,   859,   863,   877,   881,
+      883,   887,   907,   911,   919,   929,   937,   941,
+      947,   953,   967,   971,   977,   983,   991,   997,
+     1009,  1013,  1019,  1021,  1031,  1033,  1039,  1049,
+     1051,  1061,  1063,  1069,  1087,  1091,  1093,  1097,
+     1103,  1109,  1117,  1123,  1129,  1151,  1153,  1163,
+     1171,  1181,  1187,  1193,  1201,  1213,  1217,  1223,
+     1229,  1231,  1237,  1249,  1259,  1277,  1279,  1283,
+     1289,  1291,  1297,  1301,  1303,  1307,  1319,  1321,
+     1327,  1361,  1367,  1373,  1381,  1399,  1409,  1423,
+     1427,  1429,  1433,  1439,  1447,  1451,  1453,  1459,
+     1471,  1481,  1483,  1487,  1489,  1493,  1499,  1511,
+     1523,  1531,  1543,  1549,  1553,  1559,  1567,  1571,
+     1579,  1583,  1597,  1601,  1607,  1609,  1613,  1619,
+     1621,  1627,  1637,  1657,  1663,  1667,  1669,  1693,
+     1697,  1699,  1709,  1721,  1723,  1733,  1741,  1747,
+     1753,  1759,  1777,  1783,  1787,  1789,  1801,  1811,
+     1823,  1831,  1847,  1861,  1867,  1871,  1873,  1877,
+     1879,  1889,  1901,  1907,  1913,  1931,  1933,  1949,
+     1951,  1973,  1979,  1987,  1993,  1997,  1999,  2003,
+     2011,  2017,  2027,  2029,  2039,  2053,  2063,  2069,
+     2081,  2083,  2087,  2089,  2099,  2111,  2113,  2129,
+     2131,  2137,  2141,  2143,  2153,  2161,  2179,  2203,
+     2207,  2213,  2221,  2237,  2239,  2243,  2251,  2267,
+     2269,  2273,  2281,  2287,  2293,  2297,  2309,  2311,
+     2333,  2339,  2341,  2347,  2351,  2357,  2371,  2377,
+     2381,  2383,  2389,  2393,  2399,  2411,  2417,  2423,
+     2437,  2441,  2447,  2459,  2467,  2473,  2477,  2503,
+     2521,  2531,  2539,  2543,  2549,  2551,  2557,  2579,
+     2591,  2593,  2609,  2617,  2621,  2633,  2647,  2657,
+     2659,  2663,  2671,  2677,  2683,  2687,  2689,  2693,
+     2699,  2707,  2711,  2713,  2719,  2729,  2731,  2741,
+     2749,  2753,  2767,  2777,  2789,  2791,  2797,  2801,
+     2803,  2819,  2833,  2837,  2843,  2851,  2857,  2861,
+     2879,  2887,  2897,  2903,  2909,  2917,  2927,  2939,
+     2953,  2957,  2963,  2969,  2971,  2999,  3001,  3011,
+     3019,  3023,  3037,  3041,  3049,  3061,  3067,  3079,
+     3083,  3089,  3109,  3119,  3121,  3137,  3163,  3167,
+     3169,  3181,  3187,  3191,  3203,  3209,  3217,  3221,
+     3229,  3251,  3253,  3257,  3259,  3271,  3299,  3301,
+     3307,  3313,  3319,  3323,  3329,  3331,  3343,  3347,
+     3359,  3361,  3371,  3373,  3389,  3391,  3407,  3413,
+     3433,  3449,  3457,  3461,  3463,  3467,  3469,  3491,
+     3499,  3511,  3517,  3527,  3529,  3533,  3539,  3541,
+     3547,  3557,  3559,  3571,  3581,  3583,  3593,  3607,
+     3613,  3617,  3623,  3631,  3637,  3643,  3659,  3671,
+     3673,  3677,  3691,  3697,  3701,  3709,  3719,  3727,
+     3733,  3739,  3761,  3767,  3769,  3779,  3793,  3797,
+     3803,  3821,  3823,  3833,  3847,  3851,  3853,  3863,
+     3877,  3881,  3889,  3907,  3911,  3917,  3919,  3923,
+     3929,  3931,  3943,  3947,  3967,  3989,  4001,  4003,
+     4007,  4013,  4019,  4021,  4027,  4049,  4051,  4057,
+     4073,  4079,  4091,  4093,  4099,  4111,  4127,  4129,
+     4133,  4139,  4153,  4157,  4159,  4177,  4201,  4211,
+     4217,  4219,  4229,  4231,  4241,  4243,  4253,  4259,
+     4261,  4271,  4273,  4283,  4289,  4297,  4327,  4337,
+     4339,  4349,  4357,  4363,  4373,  4391,  4397,  4409,
+     4421,  4423,  4441,  4447,  4451,  4457,  4463,  4481,
+     4483,  4493,  4507,  4513,  4517,  4519,  4523,  4547,
+     4549,  4561,  4567,  4583,  4591,  4597,  4603,  4621,
+     4637,  4639,  4643,  4649,  4651,  4657,  4663,  4673,
+     4679,  4691,  4703,  4721,  4723,  4729,  4733,  4751,
+     4759,  4783,  4787,  4789,  4793,  4799,  4801,  4813,
+     4817,  4831,  4861,  4871,  4877,  4889,  4903,  4909,
+     4919,  4931,  4933,  4937,  4943,  4951,  4957,  4967,
+     4969,  4973,  4987,  4993,  4999,  5003,  5009,  5011,
+     5021,  5023,  5039,  5051,  5059,  5077,  5081,  5087,
+     5099,  5101,  5107,  5113,  5119,  5147,  5153,  5167,
+     5171,  5179,  5189,  5197,  5209,  5227,  5231,  5233,
+     5237,  5261,  5273,  5279,  5281,  5297,  5303,  5309,
+     5323,  5333,  5347,  5351,  5381,  5387,  5393,  5399,
+     5407,  5413,  5417,  5419,  5431,  5437,  5441,  5443,
+     5449,  5471,  5477,  5479,  5483,  5501,  5503,  5507,
+     5519,  5521,  5527,  5531,  5557,  5563,  5569,  5573,
+     5581,  5591,  5623,  5639,  5641,  5647,  5651,  5653,
+     5657,  5659,  5669,  5683,  5689,  5693,  5701,  5711,
+     5717,  5737,  5741,  5743,  5749,  5779,  5783,  5791,
+     5801,  5807,  5813,  5821,  5827,  5839,  5843,  5849,
+     5851,  5857,  5861,  5867,  5869,  5879,  5881,  5897,
+     5903,  5923,  5927,  5939,  5953,  5981,  5987,  6007,
+     6011,  6029,  6037,  6043,  6047,  6053,  6067,  6073,
+     6079,  6089,  6091,  6101,  6113,  6121,  6131,  6133,
+     6143,  6151,  6163,  6173,  6197,  6199,  6203,  6211,
+     6217,  6221,  6229,  6247,  6257,  6263,  6269,  6271,
+     6277,  6287,  6299,  6301,  6311,  6317,  6323,  6329,
+     6337,  6343,  6353,  6359,  6361,  6367,  6373,  6379,
+     6389,  6397,  6421,  6427,  6449,  6451,  6469,  6473,
+     6481,  6491,  6521,  6529,  6547,  6551,  6553,  6563,
+     6569,  6571,  6577,  6581,  6599,  6607,  6619,  6637,
+     6653,  6659,  6661,  6673,  6679,  6689,  6691,  6701,
+     6703,  6709,  6719,  6733,  6737,  6761,  6763,  6779,
+     6781,  6791,  6793,  6803,  6823,  6827,  6829,  6833,
+     6841,  6857,  6863,  6869,  6871,  6883,  6899,  6907,
+     6911,  6917,  6947,  6949,  6959,  6961,  6967,  6971,
+     6977,  6983,  6991,  6997,  7001,  7013,  7019,  7027,
+     7039,  7043,  7057,  7069,  7079,  7103,  7109,  7121,
+     7127,  7129,  7151,  7159,  7177,  7187,  7193,  7207,
+     7211,  7213,  7219,  7229,  7237,  7243,  7247,  7253,
+     7283,  7297,  7307,  7309,  7321,  7331,  7333,  7349,
+     7351,  7369,  7393,  7411,  7417,  7433,  7451,  7457,
+     7459,  7477,  7481,  7487,  7489,  7499,  7507,  7517,
+     7523,  7529,  7537,  7541,  7547,  7549,  7559,  7561,
+     7573,  7577,  7583,  7589,  7591,  7603,  7607,  7621,
+     7639,  7643,  7649,  7669,  7673,  7681,  7687,  7691,
+     7699,  7703,  7717,  7723,  7727,  7741,  7753,  7757,
+     7759,  7789,  7793,  7817,  7823,  7829,  7841,  7853,
+     7867,  7873,  7877,  7879,  7883,  7901,  7907,  7919,
+     7927,  7933,  7937,  7949,  7951,  7963,  7993,  8009,
+     8011,  8017,  8039,  8053,  8059,  8069,  8081,  8087,
+     8089,  8093,  8101,  8111,  8117,  8123,  8147,  8161,
+     8167,  8171,  8179,  8191,  8209,  8219,  8221,  8231,
+     8233,  8237,  8243,  8263,  8269,  8273,  8287,  8291,
+     8293,  8297,  8311,  8317,  8329,  8353,  8363,  8369,
+     8377,  8387,  8389,  8419,  8423,  8429,  8431,  8443,
+     8447,  8461,  8467,  8501,  8513,  8521,  8527,  8537,
+     8539,  8543,  8563,  8573,  8581,  8597,  8599,  8609,
+     8623,  8627,  8629,  8641,  8647,  8663,  8669,  8677,
+     8681,  8689,  8693,  8699,  8707,  8713,  8719,  8731,
+     8737,  8741,  8747,  8753,  8761,  8779,  8783,  8803,
+     8807,  8819,  8821,  8831,  8837,  8839,  8849,  8861,
+     8863,  8867,  8887,  8893,  8923,  8929,  8933,  8941,
+     8951,  8963,  8969,  8971,  8999,  9001,  9007,  9011,
+     9013,  9029,  9041,  9043,  9049,  9059,  9067,  9091,
+     9103,  9109,  9127,  9133,  9137,  9151,  9157,  9161,
+     9173,  9181,  9187,  9199,  9203,  9209,  9221,  9227,
+     9239,  9241,  9257,  9277,  9281,  9283,  9293,  9311,
+     9319,  9323,  9337,  9341,  9343,  9349,  9371,  9377,
+     9391,  9397,  9403,  9413,  9419,  9421,  9431,  9433,
+     9437,  9439,  9461,  9463,  9467,  9473,  9479,  9491,
+     9497,  9511,  9521,  9533,  9539,  9547,  9551,  9587,
+     9601,  9613,  9619,  9623,  9629,  9631,  9643,  9649,
+     9661,  9677,  9679,  9689,  9697,  9719,  9721,  9733,
+     9739,  9743,  9749,  9767,  9769,  9781,  9787,  9791,
+     9803,  9811,  9817,  9829,  9833,  9839,  9851,  9857,
+     9859,  9871,  9883,  9887,  9901,  9907,  9923,  9929,
+     9931,  9941,  9949,  9967,  9973, 10007, 10009, 10037,
+    10039, 10061, 10067, 10069, 10079, 10091, 10093, 10099,
+    10103, 10111, 10133, 10139, 10141, 10151, 10159, 10163,
+    10169, 10177, 10181, 10193, 10211, 10223, 10243, 10247,
+    10253, 10259, 10267, 10271, 10273, 10289, 10301, 10303,
+    10313, 10321, 10331, 10333, 10337, 10343, 10357, 10369,
+    10391, 10399, 10427, 10429, 10433, 10453, 10457, 10459,
+    10463, 10477, 10487, 10499, 10501, 10513, 10529, 10531,
+    10559, 10567, 10589, 10597, 10601, 10607, 10613, 10627,
+    10631, 10639, 10651, 10657, 10663, 10667, 10687, 10691,
+    10709, 10711, 10723, 10729, 10733, 10739, 10753, 10771,
+    10781, 10789, 10799, 10831, 10837, 10847, 10853, 10859,
+    10861, 10867, 10883, 10889, 10891, 10903, 10909, 10937,
+    10939, 10949, 10957, 10973, 10979, 10987, 10993, 11003,
+    11027, 11047, 11057, 11059, 11069, 11071, 11083, 11087,
+    11093, 11113, 11117, 11119, 11131, 11149, 11159, 11161,
+    11171, 11173, 11177, 11197, 11213, 11239, 11243, 11251,
+    11257, 11261, 11273, 11279, 11287, 11299, 11311, 11317,
+    11321, 11329, 11351, 11353, 11369, 11383, 11393, 11399,
+    11411, 11423, 11437, 11443, 11447, 11467, 11471, 11483,
+    11489, 11491, 11497, 11503, 11519, 11527, 11549, 11551,
+    11579, 11587, 11593, 11597, 11617, 11621, 11633, 11657,
+    11677, 11681, 11689, 11699, 11701, 11717, 11719, 11731,
+    11743, 11777, 11779, 11783, 11789, 11801, 11807, 11813,
+    11821, 11827, 11831, 11833, 11839, 11863, 11867, 11887,
+    11897, 11903, 11909, 11923, 11927, 11933, 11939, 11941,
+    11953, 11959, 11969, 11971, 11981, 11987, 12007, 12011,
+    12037, 12041, 12043, 12049, 12071, 12073, 12097, 12101,
+    12107, 12109, 12113, 12119, 12143, 12149, 12157, 12161,
+    12163, 12197, 12203, 12211, 12227, 12239, 12241, 12251,
+    12253, 12263, 12269, 12277, 12281, 12289, 12301, 12323,
+    12329, 12343, 12347, 12373, 12377, 12379, 12391, 12401,
+    12409, 12413, 12421, 12433, 12437, 12451, 12457, 12473,
+    12479, 12487, 12491, 12497, 12503, 12511, 12517, 12527,
+    12539, 12541, 12547, 12553, 12569, 12577, 12583, 12589,
+    12601, 12611, 12613, 12619, 12637, 12641, 12647, 12653,
+    12659, 12671, 12689, 12697, 12703, 12713, 12721, 12739,
+    12743, 12757, 12763, 12781, 12791, 12799, 12809, 12821,
+    12823, 12829, 12841, 12853, 12889, 12893, 12899, 12907,
+    12911, 12917, 12919, 12923, 12941, 12953, 12959, 12967,
+    12973, 12979, 12983, 13001, 13003, 13007, 13009, 13033,
+    13037, 13043, 13049, 13063, 13093, 13099, 13103, 13109,
+    13121, 13127, 13147, 13151, 13159, 13163, 13171, 13177,
+    13183, 13187, 13217, 13219, 13229, 13241, 13249, 13259,
+    13267, 13291, 13297, 13309, 13313, 13327, 13331, 13337,
+    13339, 13367, 13381, 13397, 13399, 13411, 13417, 13421,
+    13441, 13451, 13457, 13463, 13469, 13477, 13487, 13499,
+    13513, 13523, 13537, 13553, 13567, 13577, 13591, 13597,
+    13613, 13619, 13627, 13633, 13649, 13669, 13679, 13681,
+    13687, 13691, 13693, 13697, 13709, 13711, 13721, 13723,
+    13729, 13751, 13757, 13759, 13763, 13781, 13789, 13799,
+    13807, 13829, 13831, 13841, 13859, 13873, 13877, 13879,
+    13883, 13901, 13903, 13907, 13913, 13921, 13931, 13933,
+    13963, 13967, 13997, 13999, 14009, 14011, 14029, 14033,
+    14051, 14057, 14071, 14081, 14083, 14087, 14107, 14143,
+    14149, 14153, 14159, 14173, 14177, 14197, 14207, 14221,
+    14243, 14249, 14251, 14281, 14293, 14303, 14321, 14323,
+    14327, 14341, 14347, 14369, 14387, 14389, 14401, 14407,
+    14411, 14419, 14423, 14431, 14437, 14447, 14449, 14461,
+    14479, 14489, 14503, 14519, 14533, 14537, 14543, 14549,
+    14551, 14557, 14561, 14563, 14591, 14593, 14621, 14627,
+    14629, 14633, 14639, 14653, 14657, 14669, 14683, 14699,
+    14713, 14717, 14723, 14731, 14737, 14741, 14747, 14753,
+    14759, 14767, 14771, 14779, 14783, 14797, 14813, 14821,
+    14827, 14831, 14843, 14851, 14867, 14869, 14879, 14887,
+    14891, 14897, 14923, 14929, 14939, 14947, 14951, 14957,
+    14969, 14983, 15013, 15017, 15031, 15053, 15061, 15073,
+    15077, 15083, 15091, 15101, 15107, 15121, 15131, 15137,
+    15139, 15149, 15161, 15173, 15187, 15193, 15199, 15217,
+    15227, 15233, 15241, 15259, 15263, 15269, 15271, 15277,
+    15287, 15289, 15299, 15307, 15313, 15319, 15329, 15331,
+    15349, 15359, 15361, 15373, 15377, 15383, 15391, 15401,
+    15413, 15427, 15439, 15443, 15451, 15461, 15467, 15473,
+    15493, 15497, 15511, 15527, 15541, 15551, 15559, 15569,
+    15581, 15583, 15601, 15607, 15619, 15629, 15641, 15643,
+    15647, 15649, 15661, 15667, 15671, 15679, 15683, 15727,
+    15731, 15733, 15737, 15739, 15749, 15761, 15767, 15773,
+    15787, 15791, 15797, 15803, 15809, 15817, 15823, 15859,
+    15877, 15881, 15887, 15889, 15901, 15907, 15913, 15919,
+    15923, 15937, 15959, 15971, 15973, 15991, 16001, 16007,
+    16033, 16057, 16061, 16063, 16067, 16069, 16073, 16087,
+    16091, 16097, 16103, 16111, 16127, 16139, 16141, 16183,
+    16187, 16189, 16193, 16217, 16223, 16229, 16231, 16249,
+    16253, 16267, 16273, 16301, 16319, 16333, 16339, 16349,
+    16361, 16363, 16369, 16381, 16411, 16417, 16421, 16427,
+    16433, 16447, 16451, 16453, 16477, 16481, 16487, 16493,
+    16519, 16529, 16547, 16553, 16561, 16567, 16573, 16603,
+    16607, 16619, 16631, 16633, 16649, 16651, 16657, 16661,
+    16673, 16691, 16693, 16699, 16703, 16729, 16741, 16747,
+    16759, 16763, 16787, 16811, 16823, 16829, 16831, 16843,
+    16871, 16879, 16883, 16889, 16901, 16903, 16921, 16927,
+    16931, 16937, 16943, 16963, 16979, 16981, 16987, 16993,
+    17011, 17021, 17027, 17029, 17033, 17041, 17047, 17053,
+    17077, 17093, 17099, 17107, 17117, 17123, 17137, 17159,
+    17167, 17183, 17189, 17191, 17203, 17207, 17209, 17231,
+    17239, 17257, 17291, 17293, 17299, 17317, 17321, 17327,
+    17333, 17341, 17351, 17359, 17377, 17383, 17387, 17389,
+    17393, 17401, 17417, 17419, 17431, 17443, 17449, 17467,
+    17471, 17477, 17483, 17489, 17491, 17497, 17509, 17519,
+    17539, 17551, 17569, 17573, 17579, 17581, 17597, 17599,
+    17609, 17623, 17627, 17657, 17659, 17669, 17681, 17683,
+    17707, 17713, 17729, 17737, 17747, 17749, 17761, 17783,
+    17789, 17791, 17807, 17827, 17837, 17839, 17851, 17863,
 };
diff --git a/deps/openssl/openssl/crypto/bn/bn_prime.pl b/deps/openssl/openssl/crypto/bn/bn_prime.pl
index 163d4a9d30..eeca475b93 100644
--- a/deps/openssl/openssl/crypto/bn/bn_prime.pl
+++ b/deps/openssl/openssl/crypto/bn/bn_prime.pl
@@ -1,17 +1,19 @@
 #! /usr/bin/env perl
-# Copyright 1998-2016 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 1998-2018 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
 # in the file LICENSE in the source distribution or at
 # https://www.openssl.org/source/license.html
 
+# Output year depends on the year of the script.
+my $YEAR = [localtime([stat($0)]->[9])]->[5] + 1900;
 print <<"EOF";
 /*
  * WARNING: do not edit!
  * Generated by crypto/bn/bn_prime.pl
  *
- * Copyright 1998-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1998-$YEAR The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -38,9 +40,9 @@ loop: while ($#primes < $num-1) {
 print "typedef unsigned short prime_t;\n";
 printf "# define NUMPRIMES %d\n\n", $num;
 
-printf "static const prime_t primes[%d] = {\n", $num;
+printf "static const prime_t primes[%d] = {", $num;
 for (my $i = 0; $i <= $#primes; $i++) {
-    printf "\n    " if ($i % 8) == 0;
-    printf "%4d, ", $primes[$i];
+    printf "\n   " if ($i % 8) == 0;
+    printf " %5d,", $primes[$i];
 }
 print "\n};\n";
diff --git a/deps/openssl/openssl/crypto/bn/bn_print.c b/deps/openssl/openssl/crypto/bn/bn_print.c
index 5ffe2fc9ba..1853269d90 100644
--- a/deps/openssl/openssl/crypto/bn/bn_print.c
+++ b/deps/openssl/openssl/crypto/bn/bn_print.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2017 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -8,7 +8,7 @@
  */
 
 #include <stdio.h>
-#include <ctype.h>
+#include "internal/ctype.h"
 #include <limits.h>
 #include "internal/cryptlib.h"
 #include <openssl/buffer.h>
@@ -32,27 +32,27 @@ char *BN_bn2hex(const BIGNUM *a)
     }
     p = buf;
     if (a->neg)
-        *(p++) = '-';
+        *p++ = '-';
     for (i = a->top - 1; i >= 0; i--) {
         for (j = BN_BITS2 - 8; j >= 0; j -= 8) {
             /* strip leading zeros */
-            v = ((int)(a->d[i] >> (long)j)) & 0xff;
-            if (z || (v != 0)) {
-                *(p++) = Hex[v >> 4];
-                *(p++) = Hex[v & 0x0f];
+            v = (int)((a->d[i] >> j) & 0xff);
+            if (z || v != 0) {
+                *p++ = Hex[v >> 4];
+                *p++ = Hex[v & 0x0f];
                 z = 1;
             }
         }
     }
     *p = '\0';
  err:
-    return (buf);
+    return buf;
 }
 
 /* Must 'OPENSSL_free' the returned data */
 char *BN_bn2dec(const BIGNUM *a)
 {
-    int i = 0, num, ok = 0;
+    int i = 0, num, ok = 0, n, tbytes;
     char *buf = NULL;
     char *p;
     BIGNUM *t = NULL;
@@ -67,22 +67,22 @@ char *BN_bn2dec(const BIGNUM *a)
      */
     i = BN_num_bits(a) * 3;
     num = (i / 10 + i / 1000 + 1) + 1;
+    tbytes = num + 3;   /* negative and terminator and one spare? */
     bn_data_num = num / BN_DEC_NUM + 1;
     bn_data = OPENSSL_malloc(bn_data_num * sizeof(BN_ULONG));
-    buf = OPENSSL_malloc(num + 3);
-    if ((buf == NULL) || (bn_data == NULL)) {
+    buf = OPENSSL_malloc(tbytes);
+    if (buf == NULL || bn_data == NULL) {
         BNerr(BN_F_BN_BN2DEC, ERR_R_MALLOC_FAILURE);
         goto err;
     }
     if ((t = BN_dup(a)) == NULL)
         goto err;
 
-#define BUF_REMAIN (num+3 - (size_t)(p - buf))
     p = buf;
     lp = bn_data;
     if (BN_is_zero(t)) {
-        *(p++) = '0';
-        *(p++) = '\0';
+        *p++ = '0';
+        *p++ = '\0';
     } else {
         if (BN_is_negative(t))
             *p++ = '-';
@@ -101,14 +101,16 @@ char *BN_bn2dec(const BIGNUM *a)
          * the last one needs truncation. The blocks need to be reversed in
          * order.
          */
-        BIO_snprintf(p, BUF_REMAIN, BN_DEC_FMT1, *lp);
-        while (*p)
-            p++;
+        n = BIO_snprintf(p, tbytes - (size_t)(p - buf), BN_DEC_FMT1, *lp);
+        if (n < 0)
+            goto err;
+        p += n;
         while (lp != bn_data) {
             lp--;
-            BIO_snprintf(p, BUF_REMAIN, BN_DEC_FMT2, *lp);
-            while (*p)
-                p++;
+            n = BIO_snprintf(p, tbytes - (size_t)(p - buf), BN_DEC_FMT2, *lp);
+            if (n < 0)
+                goto err;
+            p += n;
         }
     }
     ok = 1;
@@ -128,28 +130,28 @@ int BN_hex2bn(BIGNUM **bn, const char *a)
     int neg = 0, h, m, i, j, k, c;
     int num;
 
-    if ((a == NULL) || (*a == '\0'))
-        return (0);
+    if (a == NULL || *a == '\0')
+        return 0;
 
     if (*a == '-') {
         neg = 1;
         a++;
     }
 
-    for (i = 0; i <= (INT_MAX/4) && isxdigit((unsigned char)a[i]); i++)
+    for (i = 0; i <= INT_MAX / 4 && ossl_isxdigit(a[i]); i++)
         continue;
 
-    if (i == 0 || i > INT_MAX/4)
+    if (i == 0 || i > INT_MAX / 4)
         goto err;
 
     num = i + neg;
     if (bn == NULL)
-        return (num);
+        return num;
 
     /* a is the start of the hex digits, and it is 'i' long */
     if (*bn == NULL) {
         if ((ret = BN_new()) == NULL)
-            return (0);
+            return 0;
     } else {
         ret = *bn;
         BN_zero(ret);
@@ -163,7 +165,7 @@ int BN_hex2bn(BIGNUM **bn, const char *a)
     m = 0;
     h = 0;
     while (j > 0) {
-        m = ((BN_BYTES * 2) <= j) ? (BN_BYTES * 2) : j;
+        m = (BN_BYTES * 2 <= j) ? BN_BYTES * 2 : j;
         l = 0;
         for (;;) {
             c = a[j - m];
@@ -177,7 +179,7 @@ int BN_hex2bn(BIGNUM **bn, const char *a)
                 break;
             }
         }
-        j -= (BN_BYTES * 2);
+        j -= BN_BYTES * 2;
     }
     ret->top = h;
     bn_correct_top(ret);
@@ -187,11 +189,11 @@ int BN_hex2bn(BIGNUM **bn, const char *a)
     /* Don't set the negative flag if it's zero. */
     if (ret->top != 0)
         ret->neg = neg;
-    return (num);
+    return num;
  err:
     if (*bn == NULL)
         BN_free(ret);
-    return (0);
+    return 0;
 }
 
 int BN_dec2bn(BIGNUM **bn, const char *a)
@@ -201,22 +203,22 @@ int BN_dec2bn(BIGNUM **bn, const char *a)
     int neg = 0, i, j;
     int num;
 
-    if ((a == NULL) || (*a == '\0'))
-        return (0);
+    if (a == NULL || *a == '\0')
+        return 0;
     if (*a == '-') {
         neg = 1;
         a++;
     }
 
-    for (i = 0; i <= (INT_MAX/4) && isdigit((unsigned char)a[i]); i++)
+    for (i = 0; i <= INT_MAX / 4 && ossl_isdigit(a[i]); i++)
         continue;
 
-    if (i == 0 || i > INT_MAX/4)
+    if (i == 0 || i > INT_MAX / 4)
         goto err;
 
     num = i + neg;
     if (bn == NULL)
-        return (num);
+        return num;
 
     /*
      * a is the start of the digits, and it is 'i' long. We chop it into
@@ -224,7 +226,7 @@ int BN_dec2bn(BIGNUM **bn, const char *a)
      */
     if (*bn == NULL) {
         if ((ret = BN_new()) == NULL)
-            return (0);
+            return 0;
     } else {
         ret = *bn;
         BN_zero(ret);
@@ -234,7 +236,7 @@ int BN_dec2bn(BIGNUM **bn, const char *a)
     if (bn_expand(ret, i * 4) == NULL)
         goto err;
 
-    j = BN_DEC_NUM - (i % BN_DEC_NUM);
+    j = BN_DEC_NUM - i % BN_DEC_NUM;
     if (j == BN_DEC_NUM)
         j = 0;
     l = 0;
@@ -257,11 +259,11 @@ int BN_dec2bn(BIGNUM **bn, const char *a)
     /* Don't set the negative flag if it's zero. */
     if (ret->top != 0)
         ret->neg = neg;
-    return (num);
+    return num;
  err:
     if (*bn == NULL)
         BN_free(ret);
-    return (0);
+    return 0;
 }
 
 int BN_asc2bn(BIGNUM **bn, const char *a)
@@ -291,11 +293,11 @@ int BN_print_fp(FILE *fp, const BIGNUM *a)
     int ret;
 
     if ((b = BIO_new(BIO_s_file())) == NULL)
-        return (0);
+        return 0;
     BIO_set_fp(b, fp, BIO_NOCLOSE);
     ret = BN_print(b, a);
     BIO_free(b);
-    return (ret);
+    return ret;
 }
 # endif
 
@@ -304,16 +306,16 @@ int BN_print(BIO *bp, const BIGNUM *a)
     int i, j, v, z = 0;
     int ret = 0;
 
-    if ((a->neg) && (BIO_write(bp, "-", 1) != 1))
+    if ((a->neg) && BIO_write(bp, "-", 1) != 1)
         goto end;
-    if (BN_is_zero(a) && (BIO_write(bp, "0", 1) != 1))
+    if (BN_is_zero(a) && BIO_write(bp, "0", 1) != 1)
         goto end;
     for (i = a->top - 1; i >= 0; i--) {
         for (j = BN_BITS2 - 4; j >= 0; j -= 4) {
             /* strip leading zeros */
-            v = ((int)(a->d[i] >> (long)j)) & 0x0f;
-            if (z || (v != 0)) {
-                if (BIO_write(bp, &(Hex[v]), 1) != 1)
+            v = (int)((a->d[i] >> j) & 0x0f);
+            if (z || v != 0) {
+                if (BIO_write(bp, &Hex[v], 1) != 1)
                     goto end;
                 z = 1;
             }
@@ -321,7 +323,7 @@ int BN_print(BIO *bp, const BIGNUM *a)
     }
     ret = 1;
  end:
-    return (ret);
+    return ret;
 }
 
 char *BN_options(void)
@@ -332,12 +334,12 @@ char *BN_options(void)
     if (!init) {
         init++;
 #ifdef BN_LLONG
-        BIO_snprintf(data, sizeof(data), "bn(%d,%d)",
-                     (int)sizeof(BN_ULLONG) * 8, (int)sizeof(BN_ULONG) * 8);
+        BIO_snprintf(data, sizeof(data), "bn(%zu,%zu)",
+                     sizeof(BN_ULLONG) * 8, sizeof(BN_ULONG) * 8);
 #else
-        BIO_snprintf(data, sizeof(data), "bn(%d,%d)",
-                     (int)sizeof(BN_ULONG) * 8, (int)sizeof(BN_ULONG) * 8);
+        BIO_snprintf(data, sizeof(data), "bn(%zu,%zu)",
+                     sizeof(BN_ULONG) * 8, sizeof(BN_ULONG) * 8);
 #endif
     }
-    return (data);
+    return data;
 }
diff --git a/deps/openssl/openssl/crypto/bn/bn_rand.c b/deps/openssl/openssl/crypto/bn/bn_rand.c
index 9ce4c5f606..c0d1a32292 100644
--- a/deps/openssl/openssl/crypto/bn/bn_rand.c
+++ b/deps/openssl/openssl/crypto/bn/bn_rand.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -14,11 +14,14 @@
 #include <openssl/rand.h>
 #include <openssl/sha.h>
 
-static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom)
+typedef enum bnrand_flag_e {
+    NORMAL, TESTING, PRIVATE
+} BNRAND_FLAG;
+
+static int bnrand(BNRAND_FLAG flag, BIGNUM *rnd, int bits, int top, int bottom)
 {
     unsigned char *buf = NULL;
-    int ret = 0, bit, bytes, mask;
-    time_t tim;
+    int b, ret = 0, bit, bytes, mask;
 
     if (bits == 0) {
         if (top != BN_RAND_TOP_ANY || bottom != BN_RAND_BOTTOM_ANY)
@@ -40,13 +43,11 @@ static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom)
     }
 
     /* make a random number and set the top and bottom bits */
-    time(&tim);
-    RAND_add(&tim, sizeof(tim), 0.0);
-
-    if (RAND_bytes(buf, bytes) <= 0)
+    b = flag == NORMAL ? RAND_bytes(buf, bytes) : RAND_priv_bytes(buf, bytes);
+    if (b <= 0)
         goto err;
 
-    if (pseudorand == 2) {
+    if (flag == TESTING) {
         /*
          * generate patterns that are more likely to trigger BN library bugs
          */
@@ -86,7 +87,7 @@ static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom)
  err:
     OPENSSL_clear_free(buf, bytes);
     bn_check_top(rnd);
-    return (ret);
+    return ret;
 
 toosmall:
     BNerr(BN_F_BNRAND, BN_R_BITS_TOO_SMALL);
@@ -95,29 +96,27 @@ toosmall:
 
 int BN_rand(BIGNUM *rnd, int bits, int top, int bottom)
 {
-    return bnrand(0, rnd, bits, top, bottom);
+    return bnrand(NORMAL, rnd, bits, top, bottom);
 }
 
-int BN_pseudo_rand(BIGNUM *rnd, int bits, int top, int bottom)
+int BN_bntest_rand(BIGNUM *rnd, int bits, int top, int bottom)
 {
-    return bnrand(1, rnd, bits, top, bottom);
+    return bnrand(TESTING, rnd, bits, top, bottom);
 }
 
-int BN_bntest_rand(BIGNUM *rnd, int bits, int top, int bottom)
+int BN_priv_rand(BIGNUM *rnd, int bits, int top, int bottom)
 {
-    return bnrand(2, rnd, bits, top, bottom);
+    return bnrand(PRIVATE, rnd, bits, top, bottom);
 }
 
 /* random number r:  0 <= r < range */
-static int bn_rand_range(int pseudo, BIGNUM *r, const BIGNUM *range)
+static int bnrand_range(BNRAND_FLAG flag, BIGNUM *r, const BIGNUM *range)
 {
-    int (*bn_rand) (BIGNUM *, int, int, int) =
-        pseudo ? BN_pseudo_rand : BN_rand;
     int n;
     int count = 100;
 
     if (range->neg || BN_is_zero(range)) {
-        BNerr(BN_F_BN_RAND_RANGE, BN_R_INVALID_RANGE);
+        BNerr(BN_F_BNRAND_RANGE, BN_R_INVALID_RANGE);
         return 0;
     }
 
@@ -133,8 +132,9 @@ static int bn_rand_range(int pseudo, BIGNUM *r, const BIGNUM *range)
          * than range
          */
         do {
-            if (!bn_rand(r, n + 1, BN_RAND_TOP_ANY, BN_RAND_BOTTOM_ANY))
+            if (!bnrand(flag, r, n + 1, BN_RAND_TOP_ANY, BN_RAND_BOTTOM_ANY))
                 return 0;
+
             /*
              * If r < 3*range, use r := r MOD range (which is either r, r -
              * range, or r - 2*range). Otherwise, iterate once more. Since
@@ -150,7 +150,7 @@ static int bn_rand_range(int pseudo, BIGNUM *r, const BIGNUM *range)
             }
 
             if (!--count) {
-                BNerr(BN_F_BN_RAND_RANGE, BN_R_TOO_MANY_ITERATIONS);
+                BNerr(BN_F_BNRAND_RANGE, BN_R_TOO_MANY_ITERATIONS);
                 return 0;
             }
 
@@ -159,11 +159,11 @@ static int bn_rand_range(int pseudo, BIGNUM *r, const BIGNUM *range)
     } else {
         do {
             /* range = 11..._2  or  range = 101..._2 */
-            if (!bn_rand(r, n, BN_RAND_TOP_ANY, BN_RAND_BOTTOM_ANY))
+            if (!bnrand(flag, r, n, BN_RAND_TOP_ANY, BN_RAND_BOTTOM_ANY))
                 return 0;
 
             if (!--count) {
-                BNerr(BN_F_BN_RAND_RANGE, BN_R_TOO_MANY_ITERATIONS);
+                BNerr(BN_F_BNRAND_RANGE, BN_R_TOO_MANY_ITERATIONS);
                 return 0;
             }
         }
@@ -176,12 +176,22 @@ static int bn_rand_range(int pseudo, BIGNUM *r, const BIGNUM *range)
 
 int BN_rand_range(BIGNUM *r, const BIGNUM *range)
 {
-    return bn_rand_range(0, r, range);
+    return bnrand_range(NORMAL, r, range);
+}
+
+int BN_priv_rand_range(BIGNUM *r, const BIGNUM *range)
+{
+    return bnrand_range(PRIVATE, r, range);
+}
+
+int BN_pseudo_rand(BIGNUM *rnd, int bits, int top, int bottom)
+{
+    return BN_rand(rnd, bits, top, bottom);
 }
 
 int BN_pseudo_rand_range(BIGNUM *r, const BIGNUM *range)
 {
-    return bn_rand_range(1, r, range);
+    return BN_rand_range(r, range);
 }
 
 /*
@@ -229,7 +239,7 @@ int BN_generate_dsa_nonce(BIGNUM *out, const BIGNUM *range,
     memset(private_bytes + todo, 0, sizeof(private_bytes) - todo);
 
     for (done = 0; done < num_k_bytes;) {
-        if (RAND_bytes(random_bytes, sizeof(random_bytes)) != 1)
+        if (RAND_priv_bytes(random_bytes, sizeof(random_bytes)) != 1)
             goto err;
         SHA512_Init(&sha);
         SHA512_Update(&sha, &done, sizeof(done));
diff --git a/deps/openssl/openssl/crypto/bn/bn_recp.c b/deps/openssl/openssl/crypto/bn/bn_recp.c
index 20585b9d4b..9ab767f42f 100644
--- a/deps/openssl/openssl/crypto/bn/bn_recp.c
+++ b/deps/openssl/openssl/crypto/bn/bn_recp.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -21,22 +21,23 @@ BN_RECP_CTX *BN_RECP_CTX_new(void)
 {
     BN_RECP_CTX *ret;
 
-    if ((ret = OPENSSL_zalloc(sizeof(*ret))) == NULL)
-        return (NULL);
+    if ((ret = OPENSSL_zalloc(sizeof(*ret))) == NULL) {
+        BNerr(BN_F_BN_RECP_CTX_NEW, ERR_R_MALLOC_FAILURE);
+        return NULL;
+    }
 
     bn_init(&(ret->N));
     bn_init(&(ret->Nr));
     ret->flags = BN_FLG_MALLOCED;
-    return (ret);
+    return ret;
 }
 
 void BN_RECP_CTX_free(BN_RECP_CTX *recp)
 {
     if (recp == NULL)
         return;
-
-    BN_free(&(recp->N));
-    BN_free(&(recp->Nr));
+    BN_free(&recp->N);
+    BN_free(&recp->Nr);
     if (recp->flags & BN_FLG_MALLOCED)
         OPENSSL_free(recp);
 }
@@ -48,7 +49,7 @@ int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx)
     BN_zero(&(recp->Nr));
     recp->num_bits = BN_num_bits(d);
     recp->shift = 0;
-    return (1);
+    return 1;
 }
 
 int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y,
@@ -77,7 +78,7 @@ int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y,
  err:
     BN_CTX_end(ctx);
     bn_check_top(r);
-    return (ret);
+    return ret;
 }
 
 int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
@@ -87,17 +88,11 @@ int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
     BIGNUM *a, *b, *d, *r;
 
     BN_CTX_start(ctx);
+    d = (dv != NULL) ? dv : BN_CTX_get(ctx);
+    r = (rem != NULL) ? rem : BN_CTX_get(ctx);
     a = BN_CTX_get(ctx);
     b = BN_CTX_get(ctx);
-    if (dv != NULL)
-        d = dv;
-    else
-        d = BN_CTX_get(ctx);
-    if (rem != NULL)
-        r = rem;
-    else
-        r = BN_CTX_get(ctx);
-    if (a == NULL || b == NULL || d == NULL || r == NULL)
+    if (b == NULL)
         goto err;
 
     if (BN_ucmp(m, &(recp->N)) < 0) {
@@ -107,7 +102,7 @@ int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
             return 0;
         }
         BN_CTX_end(ctx);
-        return (1);
+        return 1;
     }
 
     /*
@@ -167,7 +162,7 @@ int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
     BN_CTX_end(ctx);
     bn_check_top(dv);
     bn_check_top(rem);
-    return (ret);
+    return ret;
 }
 
 /*
@@ -195,5 +190,5 @@ int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx)
  err:
     bn_check_top(r);
     BN_CTX_end(ctx);
-    return (ret);
+    return ret;
 }
diff --git a/deps/openssl/openssl/crypto/bn/bn_shift.c b/deps/openssl/openssl/crypto/bn/bn_shift.c
index 6a1eec80af..15d4b321ba 100644
--- a/deps/openssl/openssl/crypto/bn/bn_shift.c
+++ b/deps/openssl/openssl/crypto/bn/bn_shift.c
@@ -21,11 +21,11 @@ int BN_lshift1(BIGNUM *r, const BIGNUM *a)
     if (r != a) {
         r->neg = a->neg;
         if (bn_wexpand(r, a->top + 1) == NULL)
-            return (0);
+            return 0;
         r->top = a->top;
     } else {
         if (bn_wexpand(r, a->top + 1) == NULL)
-            return (0);
+            return 0;
     }
     ap = a->d;
     rp = r->d;
@@ -40,7 +40,7 @@ int BN_lshift1(BIGNUM *r, const BIGNUM *a)
         r->top++;
     }
     bn_check_top(r);
-    return (1);
+    return 1;
 }
 
 int BN_rshift1(BIGNUM *r, const BIGNUM *a)
@@ -53,14 +53,14 @@ int BN_rshift1(BIGNUM *r, const BIGNUM *a)
 
     if (BN_is_zero(a)) {
         BN_zero(r);
-        return (1);
+        return 1;
     }
     i = a->top;
     ap = a->d;
     j = i - (ap[i - 1] == 1);
     if (a != r) {
         if (bn_wexpand(r, j) == NULL)
-            return (0);
+            return 0;
         r->neg = a->neg;
     }
     rp = r->d;
@@ -77,7 +77,7 @@ int BN_rshift1(BIGNUM *r, const BIGNUM *a)
     if (!r->top)
         r->neg = 0; /* don't allow negative zero */
     bn_check_top(r);
-    return (1);
+    return 1;
 }
 
 int BN_lshift(BIGNUM *r, const BIGNUM *a, int n)
@@ -96,7 +96,7 @@ int BN_lshift(BIGNUM *r, const BIGNUM *a, int n)
 
     nw = n / BN_BITS2;
     if (bn_wexpand(r, a->top + nw + 1) == NULL)
-        return (0);
+        return 0;
     r->neg = a->neg;
     lb = n % BN_BITS2;
     rb = BN_BITS2 - lb;
@@ -116,7 +116,7 @@ int BN_lshift(BIGNUM *r, const BIGNUM *a, int n)
     r->top = a->top + nw + 1;
     bn_correct_top(r);
     bn_check_top(r);
-    return (1);
+    return 1;
 }
 
 int BN_rshift(BIGNUM *r, const BIGNUM *a, int n)
@@ -138,12 +138,12 @@ int BN_rshift(BIGNUM *r, const BIGNUM *a, int n)
     lb = BN_BITS2 - rb;
     if (nw >= a->top || a->top == 0) {
         BN_zero(r);
-        return (1);
+        return 1;
     }
     i = (BN_num_bits(a) - n + (BN_BITS2 - 1)) / BN_BITS2;
     if (r != a) {
         if (bn_wexpand(r, i) == NULL)
-            return (0);
+            return 0;
         r->neg = a->neg;
     } else {
         if (n == 0)
@@ -171,5 +171,5 @@ int BN_rshift(BIGNUM *r, const BIGNUM *a, int n)
     if (!r->top)
         r->neg = 0; /* don't allow negative zero */
     bn_check_top(r);
-    return (1);
+    return 1;
 }
diff --git a/deps/openssl/openssl/crypto/bn/bn_sqr.c b/deps/openssl/openssl/crypto/bn/bn_sqr.c
index db72bf28a6..0c0a590f0c 100644
--- a/deps/openssl/openssl/crypto/bn/bn_sqr.c
+++ b/deps/openssl/openssl/crypto/bn/bn_sqr.c
@@ -42,7 +42,7 @@ int bn_sqr_fixed_top(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx)
     BN_CTX_start(ctx);
     rr = (a != r) ? r : BN_CTX_get(ctx);
     tmp = BN_CTX_get(ctx);
-    if (!rr || !tmp)
+    if (rr == NULL || tmp == NULL)
         goto err;
 
     max = 2 * al;               /* Non-zero (from above) */
@@ -102,7 +102,7 @@ int bn_sqr_fixed_top(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx)
     bn_check_top(rr);
     bn_check_top(tmp);
     BN_CTX_end(ctx);
-    return (ret);
+    return ret;
 }
 
 /* tmp must have 2*n words */
diff --git a/deps/openssl/openssl/crypto/bn/bn_sqrt.c b/deps/openssl/openssl/crypto/bn/bn_sqrt.c
index 84376c78e5..b97d8ca43b 100644
--- a/deps/openssl/openssl/crypto/bn/bn_sqrt.c
+++ b/deps/openssl/openssl/crypto/bn/bn_sqrt.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2000-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2000-2018 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -39,7 +39,7 @@ BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
         }
 
         BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME);
-        return (NULL);
+        return NULL;
     }
 
     if (BN_is_zero(a) || BN_is_one(a)) {
@@ -179,7 +179,7 @@ BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
             if (!BN_set_word(y, i))
                 goto end;
         } else {
-            if (!BN_pseudo_rand(y, BN_num_bits(p), 0, 0))
+            if (!BN_priv_rand(y, BN_num_bits(p), 0, 0))
                 goto end;
             if (BN_ucmp(y, p) >= 0) {
                 if (!(p->neg ? BN_add : BN_sub) (y, y, p))
diff --git a/deps/openssl/openssl/crypto/bn/bn_srp.c b/deps/openssl/openssl/crypto/bn/bn_srp.c
index 58b1691eee..27b6ebe518 100644
--- a/deps/openssl/openssl/crypto/bn/bn_srp.c
+++ b/deps/openssl/openssl/crypto/bn/bn_srp.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2014-2017 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -8,12 +8,12 @@
  */
 
 #include "bn_lcl.h"
-#include "e_os.h"
+#include "internal/nelem.h"
 
 #ifndef OPENSSL_NO_SRP
 
 #include <openssl/srp.h>
-#include <internal/bn_srp.h>
+#include "internal/bn_srp.h"
 
 # if (BN_BYTES == 8)
 #  if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
diff --git a/deps/openssl/openssl/crypto/bn/bn_word.c b/deps/openssl/openssl/crypto/bn/bn_word.c
index 1af13a53fb..262d7668fc 100644
--- a/deps/openssl/openssl/crypto/bn/bn_word.c
+++ b/deps/openssl/openssl/crypto/bn/bn_word.c
@@ -55,7 +55,7 @@ BN_ULONG BN_mod_word(const BIGNUM *a, BN_ULONG w)
                            (BN_ULLONG) w);
 #endif
     }
-    return ((BN_ULONG)ret);
+    return (BN_ULONG)ret;
 }
 
 BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w)
@@ -92,7 +92,7 @@ BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w)
     if (!a->top)
         a->neg = 0; /* don't allow negative zero */
     bn_check_top(a);
-    return (ret);
+    return ret;
 }
 
 int BN_add_word(BIGNUM *a, BN_ULONG w)
@@ -115,7 +115,7 @@ int BN_add_word(BIGNUM *a, BN_ULONG w)
         i = BN_sub_word(a, w);
         if (!BN_is_zero(a))
             a->neg = !(a->neg);
-        return (i);
+        return i;
     }
     for (i = 0; w != 0 && i < a->top; i++) {
         a->d[i] = l = (a->d[i] + w) & BN_MASK2;
@@ -128,7 +128,7 @@ int BN_add_word(BIGNUM *a, BN_ULONG w)
         a->d[i] = w;
     }
     bn_check_top(a);
-    return (1);
+    return 1;
 }
 
 int BN_sub_word(BIGNUM *a, BN_ULONG w)
@@ -153,13 +153,13 @@ int BN_sub_word(BIGNUM *a, BN_ULONG w)
         a->neg = 0;
         i = BN_add_word(a, w);
         a->neg = 1;
-        return (i);
+        return i;
     }
 
     if ((a->top == 1) && (a->d[0] < w)) {
         a->d[0] = w - a->d[0];
         a->neg = 1;
-        return (1);
+        return 1;
     }
     i = 0;
     for (;;) {
@@ -175,7 +175,7 @@ int BN_sub_word(BIGNUM *a, BN_ULONG w)
     if ((a->d[i] == 0) && (i == (a->top - 1)))
         a->top--;
     bn_check_top(a);
-    return (1);
+    return 1;
 }
 
 int BN_mul_word(BIGNUM *a, BN_ULONG w)
@@ -191,11 +191,11 @@ int BN_mul_word(BIGNUM *a, BN_ULONG w)
             ll = bn_mul_words(a->d, a->d, a->top, w);
             if (ll) {
                 if (bn_wexpand(a, a->top + 1) == NULL)
-                    return (0);
+                    return 0;
                 a->d[a->top++] = ll;
             }
         }
     }
     bn_check_top(a);
-    return (1);
+    return 1;
 }
diff --git a/deps/openssl/openssl/crypto/bn/bn_x931p.c b/deps/openssl/openssl/crypto/bn/bn_x931p.c
index d01f12cadc..9eb8384fde 100644
--- a/deps/openssl/openssl/crypto/bn/bn_x931p.c
+++ b/deps/openssl/openssl/crypto/bn/bn_x931p.c
@@ -62,10 +62,10 @@ int BN_X931_derive_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2,
         return 0;
 
     BN_CTX_start(ctx);
-    if (!p1)
+    if (p1 == NULL)
         p1 = BN_CTX_get(ctx);
 
-    if (!p2)
+    if (p2 == NULL)
         p2 = BN_CTX_get(ctx);
 
     t = BN_CTX_get(ctx);
@@ -173,7 +173,7 @@ int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx)
      * - 1. By setting the top two bits we ensure that the lower bound is
      * exceeded.
      */
-    if (!BN_rand(Xp, nbits, BN_RAND_TOP_TWO, BN_RAND_BOTTOM_ANY))
+    if (!BN_priv_rand(Xp, nbits, BN_RAND_TOP_TWO, BN_RAND_BOTTOM_ANY))
         goto err;
 
     BN_CTX_start(ctx);
@@ -182,7 +182,7 @@ int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx)
         goto err;
 
     for (i = 0; i < 1000; i++) {
-        if (!BN_rand(Xq, nbits, BN_RAND_TOP_TWO, BN_RAND_BOTTOM_ANY))
+        if (!BN_priv_rand(Xq, nbits, BN_RAND_TOP_TWO, BN_RAND_BOTTOM_ANY))
             goto err;
 
         /* Check that |Xp - Xq| > 2^(nbits - 100) */
@@ -227,9 +227,9 @@ int BN_X931_generate_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2,
     if (Xp1 == NULL || Xp2 == NULL)
         goto error;
 
-    if (!BN_rand(Xp1, 101, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY))
+    if (!BN_priv_rand(Xp1, 101, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY))
         goto error;
-    if (!BN_rand(Xp2, 101, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY))
+    if (!BN_priv_rand(Xp2, 101, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY))
         goto error;
     if (!BN_X931_derive_prime_ex(p, p1, p2, Xp, Xp1, Xp2, e, ctx, cb))
         goto error;
diff --git a/deps/openssl/openssl/crypto/bn/build.info b/deps/openssl/openssl/crypto/bn/build.info
index c608ecce82..a463eddabb 100644
--- a/deps/openssl/openssl/crypto/bn/build.info
+++ b/deps/openssl/openssl/crypto/bn/build.info
@@ -11,16 +11,16 @@ INCLUDE[../../libcrypto]=../../crypto/include
 INCLUDE[bn_exp.o]=..
 
 GENERATE[bn-586.s]=asm/bn-586.pl \
-	$(PERLASM_SCHEME) $(CFLAGS) $(LIB_CFLAGS) $(PROCESSOR)
+	$(PERLASM_SCHEME) $(LIB_CFLAGS) $(LIB_CPPFLAGS) $(PROCESSOR)
 DEPEND[bn-586.s]=../perlasm/x86asm.pl
 GENERATE[co-586.s]=asm/co-586.pl \
-	$(PERLASM_SCHEME) $(CFLAGS) $(LIB_CFLAGS) $(PROCESSOR)
+	$(PERLASM_SCHEME) $(LIB_CFLAGS) $(LIB_CPPFLAGS) $(PROCESSOR)
 DEPEND[co-586.s]=../perlasm/x86asm.pl
 GENERATE[x86-mont.s]=asm/x86-mont.pl \
-	$(PERLASM_SCHEME) $(CFLAGS) $(LIB_CFLAGS) $(PROCESSOR)
+	$(PERLASM_SCHEME) $(LIB_CFLAGS) $(LIB_CPPFLAGS) $(PROCESSOR)
 DEPEND[x86-mont.s]=../perlasm/x86asm.pl
 GENERATE[x86-gf2m.s]=asm/x86-gf2m.pl \
-	$(PERLASM_SCHEME) $(CFLAGS) $(LIB_CFLAGS) $(PROCESSOR)
+	$(PERLASM_SCHEME) $(LIB_CFLAGS) $(LIB_CPPFLAGS) $(PROCESSOR)
 DEPEND[x86-gf2m.s]=../perlasm/x86asm.pl
 
 GENERATE[sparcv9a-mont.S]=asm/sparcv9a-mont.pl $(PERLASM_SCHEME)
@@ -34,8 +34,10 @@ INCLUDE[sparct4-mont.o]=..
 GENERATE[sparcv9-gf2m.S]=asm/sparcv9-gf2m.pl $(PERLASM_SCHEME)
 INCLUDE[sparcv9-gf2m.o]=..
 
-GENERATE[bn-mips.s]=asm/mips.pl $(PERLASM_SCHEME)
-GENERATE[mips-mont.s]=asm/mips-mont.pl $(PERLASM_SCHEME)
+GENERATE[bn-mips.S]=asm/mips.pl $(PERLASM_SCHEME)
+INCLUDE[bn-mips.o]=..
+GENERATE[mips-mont.S]=asm/mips-mont.pl $(PERLASM_SCHEME)
+INCLUDE[mips-mont.o]=..
 
 GENERATE[s390x-mont.S]=asm/s390x-mont.pl $(PERLASM_SCHEME)
 GENERATE[s390x-gf2m.s]=asm/s390x-gf2m.pl $(PERLASM_SCHEME)
@@ -47,7 +49,7 @@ GENERATE[rsaz-x86_64.s]=asm/rsaz-x86_64.pl $(PERLASM_SCHEME)
 GENERATE[rsaz-avx2.s]=asm/rsaz-avx2.pl $(PERLASM_SCHEME)
 
 GENERATE[bn-ia64.s]=asm/ia64.S
-GENERATE[ia64-mont.s]=asm/ia64-mont.pl $(CFLAGS) $(LIB_CFLAGS)
+GENERATE[ia64-mont.s]=asm/ia64-mont.pl $(LIB_CFLAGS) $(LIB_CPPFLAGS)
 
 GENERATE[parisc-mont.s]=asm/parisc-mont.pl $(PERLASM_SCHEME)
 
@@ -63,22 +65,3 @@ INCLUDE[armv4-mont.o]=..
 GENERATE[armv4-gf2m.S]=asm/armv4-gf2m.pl $(PERLASM_SCHEME)
 INCLUDE[armv4-gf2m.o]=..
 GENERATE[armv8-mont.S]=asm/armv8-mont.pl $(PERLASM_SCHEME)
-
-OVERRIDES=bn-mips3.o pa-risc2W.o pa-risc2.c
-BEGINRAW[Makefile]
-##### BN assembler implementations
-
-{- $builddir -}/bn-mips3.o:	{- $sourcedir -}/asm/mips3.s
-	@if [ "$(CC)" = "gcc" ]; then \
-		ABI=`expr "$(CFLAGS)" : ".*-mabi=\([n3264]*\)"` && \
-		as -$$ABI -O -o $@ {- $sourcedir -}/asm/mips3.s; \
-	else	$(CC) -c $(CFLAGS) $(LIB_CFLAGS) -o $@ {- $sourcedir -}/asm/mips3.s; fi
-
-# GNU assembler fails to compile PA-RISC2 modules, insist on calling
-# vendor assembler...
-{- $builddir -}/pa-risc2W.o: {- $sourcedir -}/asm/pa-risc2W.s
-	CC="$(CC)" $(PERL) $(SRCDIR)/util/fipsas.pl $(SRCDIR) $< /usr/ccs/bin/as -o pa-risc2W.o {- $sourcedir -}/asm/pa-risc2W.s
-{- $builddir -}/pa-risc2.o: {- $sourcedir -}/asm/pa-risc2.s
-	CC="$(CC)" $(PERL) $(SRCDIR)/util/fipsas.pl $(SRCDIR) $< /usr/ccs/bin/as -o pa-risc2.o {- $sourcedir -}/asm/pa-risc2.s
-
-ENDRAW[Makefile]
diff --git a/deps/openssl/openssl/crypto/bn/rsaz_exp.c b/deps/openssl/openssl/crypto/bn/rsaz_exp.c
index 1a70f6cade..22455b8a63 100644
--- a/deps/openssl/openssl/crypto/bn/rsaz_exp.c
+++ b/deps/openssl/openssl/crypto/bn/rsaz_exp.c
@@ -1,54 +1,17 @@
 /*
  * Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright (c) 2012, Intel Corporation. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
  * in the file LICENSE in the source distribution or at
  * https://www.openssl.org/source/license.html
+ *
+ * Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1)
+ * (1) Intel Corporation, Israel Development Center, Haifa, Israel
+ * (2) University of Haifa, Israel
  */
 
-/*****************************************************************************
-*                                                                            *
-*  Copyright (c) 2012, Intel Corporation                                     *
-*                                                                            *
-*  All rights reserved.                                                      *
-*                                                                            *
-*  Redistribution and use in source and binary forms, with or without        *
-*  modification, are permitted provided that the following conditions are    *
-*  met:                                                                      *
-*                                                                            *
-*  *  Redistributions of source code must retain the above copyright         *
-*     notice, this list of conditions and the following disclaimer.          *
-*                                                                            *
-*  *  Redistributions in binary form must reproduce the above copyright      *
-*     notice, this list of conditions and the following disclaimer in the    *
-*     documentation and/or other materials provided with the                 *
-*     distribution.                                                          *
-*                                                                            *
-*  *  Neither the name of the Intel Corporation nor the names of its         *
-*     contributors may be used to endorse or promote products derived from   *
-*     this software without specific prior written permission.               *
-*                                                                            *
-*                                                                            *
-*  THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY          *
-*  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE         *
-*  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR        *
-*  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR            *
-*  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,     *
-*  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,       *
-*  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR        *
-*  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF    *
-*  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING      *
-*  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS        *
-*  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.              *
-*                                                                            *
-******************************************************************************
-* Developers and authors:                                                    *
-* Shay Gueron (1, 2), and Vlad Krasnov (1)                                   *
-* (1) Intel Corporation, Israel Development Center, Haifa, Israel            *
-* (2) University of Haifa, Israel                                            *
-*****************************************************************************/
-
 #include <openssl/opensslconf.h>
 #include "rsaz_exp.h"
 
diff --git a/deps/openssl/openssl/crypto/bn/rsaz_exp.h b/deps/openssl/openssl/crypto/bn/rsaz_exp.h
index 9501cc8089..c5864f8aaa 100644
--- a/deps/openssl/openssl/crypto/bn/rsaz_exp.h
+++ b/deps/openssl/openssl/crypto/bn/rsaz_exp.h
@@ -1,54 +1,17 @@
 /*
- * Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2013-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright (c) 2012, Intel Corporation. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
  * in the file LICENSE in the source distribution or at
  * https://www.openssl.org/source/license.html
+ *
+ * Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1)
+ * (1) Intel Corporation, Israel Development Center, Haifa, Israel
+ * (2) University of Haifa, Israel
  */
 
-/*****************************************************************************
-*                                                                            *
-*  Copyright (c) 2012, Intel Corporation                                     *
-*                                                                            *
-*  All rights reserved.                                                      *
-*                                                                            *
-*  Redistribution and use in source and binary forms, with or without        *
-*  modification, are permitted provided that the following conditions are    *
-*  met:                                                                      *
-*                                                                            *
-*  *  Redistributions of source code must retain the above copyright         *
-*     notice, this list of conditions and the following disclaimer.          *
-*                                                                            *
-*  *  Redistributions in binary form must reproduce the above copyright      *
-*     notice, this list of conditions and the following disclaimer in the    *
-*     documentation and/or other materials provided with the                 *
-*     distribution.                                                          *
-*                                                                            *
-*  *  Neither the name of the Intel Corporation nor the names of its         *
-*     contributors may be used to endorse or promote products derived from   *
-*     this software without specific prior written permission.               *
-*                                                                            *
-*                                                                            *
-*  THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY          *
-*  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE         *
-*  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR        *
-*  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR            *
-*  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,     *
-*  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,       *
-*  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR        *
-*  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF    *
-*  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING      *
-*  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS        *
-*  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.              *
-*                                                                            *
-******************************************************************************
-* Developers and authors:                                                    *
-* Shay Gueron (1, 2), and Vlad Krasnov (1)                                   *
-* (1) Intel Corporation, Israel Development Center, Haifa, Israel            *
-* (2) University of Haifa, Israel                                            *
-*****************************************************************************/
-
 #ifndef RSAZ_EXP_H
 # define RSAZ_EXP_H
 
@@ -65,7 +28,7 @@ void RSAZ_1024_mod_exp_avx2(BN_ULONG result[16],
                             const BN_ULONG exponent[16],
                             const BN_ULONG m_norm[16], const BN_ULONG RR[16],
                             BN_ULONG k0);
-int rsaz_avx2_eligible();
+int rsaz_avx2_eligible(void);
 
 void RSAZ_512_mod_exp(BN_ULONG result[8],
                       const BN_ULONG base_norm[8], const BN_ULONG exponent[8],