1 files changed, 22 insertions, 18 deletions
diff --git a/deps/openssl/openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl b/deps/openssl/openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl
index a34f03cc5e..84379fce1c 100755
--- a/deps/openssl/openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl
+++ b/deps/openssl/openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl
@@ -30,20 +30,24 @@
 
 # Further optimization by <appro@openssl.org>:
 #
-#		this/original
-# Opteron	+12-49%
-# Bulldozer	+14-45%
-# P4		+18-46%
-# Westmere	+12-34%
-# Sandy Bridge	+9-35%
-# Ivy Bridge	+9-35%
-# Haswell	+8-37%
-# Broadwell	+18-58%
-# Atom		+15-50%
-# VIA Nano	+43-160%
+#		this/original	with/without -DECP_NISTZ256_ASM(*)
+# Opteron	+12-49%		+110-150%
+# Bulldozer	+14-45%		+175-210%
+# P4		+18-46%		n/a :-(
+# Westmere	+12-34%		+80-87%
+# Sandy Bridge	+9-35%		+110-120%
+# Ivy Bridge	+9-35%		+110-125%
+# Haswell	+8-37%		+140-160%
+# Broadwell	+18-58%		+145-210%
+# Atom		+15-50%		+130-180%
+# VIA Nano	+43-160%	+300-480%
+#
+# (*)	"without -DECP_NISTZ256_ASM" refers to build with
+#	"enable-ec_nistp_64_gcc_128";
 #
 # Ranges denote minimum and maximum improvement coefficients depending
-# on benchmark.
+# on benchmark. Lower coefficients are for ECDSA sign, relatively fastest
+# server-side operation. Keep in mind that +100% means 2x improvement.
 
 $flavour = shift;
 $output  = shift;
@@ -599,7 +603,7 @@ __ecp_nistz256_mul_montq:
 	adc	\$0, $acc0
 
 	########################################################################
-	# Second reduction step
+	# Second reduction step	
 	mov	$acc1, $t1
 	shl	\$32, $acc1
 	mulq	$poly3
@@ -646,7 +650,7 @@ __ecp_nistz256_mul_montq:
 	adc	\$0, $acc1
 
 	########################################################################
-	# Third reduction step
+	# Third reduction step	
 	mov	$acc2, $t1
 	shl	\$32, $acc2
 	mulq	$poly3
@@ -693,7 +697,7 @@ __ecp_nistz256_mul_montq:
 	adc	\$0, $acc2
 
 	########################################################################
-	# Final reduction step
+	# Final reduction step	
 	mov	$acc3, $t1
 	shl	\$32, $acc3
 	mulq	$poly3
@@ -706,7 +710,7 @@ __ecp_nistz256_mul_montq:
 	 mov	$acc5, $t1
 	adc	\$0, $acc2
 
-	########################################################################
+	########################################################################	
 	# Branch-less conditional subtraction of P
 	sub	\$-1, $acc4		# .Lpoly[0]
 	 mov	$acc0, $t2
@@ -2060,7 +2064,7 @@ $code.=<<___;
 	movq	%xmm1, $r_ptr
 	call	__ecp_nistz256_sqr_mont$x	# p256_sqr_mont(res_y, S);
 ___
-{
+{	
 ######## ecp_nistz256_div_by_2(res_y, res_y); ##########################
 # operate in 4-5-6-7 "name space" that matches squaring output
 #
@@ -2149,7 +2153,7 @@ $code.=<<___;
 	lea	$M(%rsp), $b_ptr
 	mov	$acc4, $acc6			# harmonize sub output and mul input
 	xor	%ecx, %ecx
-	mov	$acc4, $S+8*0(%rsp)		# have to save:-(
+	mov	$acc4, $S+8*0(%rsp)		# have to save:-(	
 	mov	$acc5, $acc2
 	mov	$acc5, $S+8*1(%rsp)
 	cmovz	$acc0, $acc3