1 files changed, 77 insertions, 68 deletions
diff --git a/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl b/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl
index 0466e11a25..f1292cc75c 100755
--- a/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl
@@ -1,68 +1,30 @@
 #! /usr/bin/env perl
 # Copyright 2013-2018 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright (c) 2012, Intel Corporation. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
 # in the file LICENSE in the source distribution or at
 # https://www.openssl.org/source/license.html
-
-
-##############################################################################
-#                                                                            #
-#  Copyright (c) 2012, Intel Corporation                                     #
-#                                                                            #
-#  All rights reserved.                                                      #
-#                                                                            #
-#  Redistribution and use in source and binary forms, with or without        #
-#  modification, are permitted provided that the following conditions are    #
-#  met:                                                                      #
-#                                                                            #
-#  *  Redistributions of source code must retain the above copyright         #
-#     notice, this list of conditions and the following disclaimer.          #
-#                                                                            #
-#  *  Redistributions in binary form must reproduce the above copyright      #
-#     notice, this list of conditions and the following disclaimer in the    #
-#     documentation and/or other materials provided with the                 #
-#     distribution.                                                          #
-#                                                                            #
-#  *  Neither the name of the Intel Corporation nor the names of its         #
-#     contributors may be used to endorse or promote products derived from   #
-#     this software without specific prior written permission.               #
-#                                                                            #
-#                                                                            #
-#  THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY          #
-#  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE         #
-#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR        #
-#  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR            #
-#  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,     #
-#  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,       #
-#  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR        #
-#  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF    #
-#  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING      #
-#  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS        #
-#  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.              #
-#                                                                            #
-##############################################################################
-# Developers and authors:                                                    #
-# Shay Gueron (1, 2), and Vlad Krasnov (1)                                   #
-# (1) Intel Corporation, Israel Development Center, Haifa, Israel            #
-# (2) University of Haifa, Israel                                            #
-##############################################################################
-# Reference:                                                                 #
-# [1] S. Gueron, V. Krasnov: "Software Implementation of Modular             #
-#     Exponentiation,  Using Advanced Vector Instructions Architectures",    #
-#     F. Ozbudak and F. Rodriguez-Henriquez (Eds.): WAIFI 2012, LNCS 7369,   #
-#     pp. 119?135, 2012. Springer-Verlag Berlin Heidelberg 2012              #
-# [2] S. Gueron: "Efficient Software Implementations of Modular              #
-#     Exponentiation", Journal of Cryptographic Engineering 2:31-43 (2012).  #
-# [3] S. Gueron, V. Krasnov: "Speeding up Big-numbers Squaring",IEEE         #
-#     Proceedings of 9th International Conference on Information Technology: #
-#     New Generations (ITNG 2012), pp.821-823 (2012)                         #
-# [4] S. Gueron, V. Krasnov: "[PATCH] Efficient and side channel analysis    #
-#     resistant 1024-bit modular exponentiation, for optimizing RSA2048      #
-#     on AVX2 capable x86_64 platforms",                                     #
-#     http://rt.openssl.org/Ticket/Display.html?id=2850&user=guest&pass=guest#
-##############################################################################
+#
+# Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1)
+# (1) Intel Corporation, Israel Development Center, Haifa, Israel
+# (2) University of Haifa, Israel
+#
+# References:
+# [1] S. Gueron, V. Krasnov: "Software Implementation of Modular
+#     Exponentiation,  Using Advanced Vector Instructions Architectures",
+#     F. Ozbudak and F. Rodriguez-Henriquez (Eds.): WAIFI 2012, LNCS 7369,
+#     pp. 119?135, 2012. Springer-Verlag Berlin Heidelberg 2012
+# [2] S. Gueron: "Efficient Software Implementations of Modular
+#     Exponentiation", Journal of Cryptographic Engineering 2:31-43 (2012).
+# [3] S. Gueron, V. Krasnov: "Speeding up Big-numbers Squaring",IEEE
+#     Proceedings of 9th International Conference on Information Technology:
+#     New Generations (ITNG 2012), pp.821-823 (2012)
+# [4] S. Gueron, V. Krasnov: "[PATCH] Efficient and side channel analysis
+#     resistant 1024-bit modular exponentiation, for optimizing RSA2048
+#     on AVX2 capable x86_64 platforms",
+#     http://rt.openssl.org/Ticket/Display.html?id=2850&user=guest&pass=guest
 #
 # +13% improvement over original submission by <appro@openssl.org>
 #
@@ -168,13 +130,21 @@ $code.=<<___;
 .type	rsaz_1024_sqr_avx2,\@function,5
 .align	64
 rsaz_1024_sqr_avx2:		# 702 cycles, 14% faster than rsaz_1024_mul_avx2
+.cfi_startproc
 	lea	(%rsp), %rax
+.cfi_def_cfa_register	%rax
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 	vzeroupper
 ___
 $code.=<<___ if ($win64);
@@ -193,6 +163,7 @@ $code.=<<___ if ($win64);
 ___
 $code.=<<___;
 	mov	%rax,%rbp
+.cfi_def_cfa_register	%rbp
 	mov	%rdx, $np			# reassigned argument
 	sub	\$$FrameSize, %rsp
 	mov	$np, $tmp
@@ -382,7 +353,7 @@ $code.=<<___;
 	vpaddq		$TEMP1, $ACC1, $ACC1
 	vpmuludq	32*7-128($aap), $B2, $ACC2
 	 vpbroadcastq	32*5-128($tpa), $B2
-	vpaddq		32*11-448($tp1), $ACC2, $ACC2	
+	vpaddq		32*11-448($tp1), $ACC2, $ACC2
 
 	vmovdqu		$ACC6, 32*6-192($tp0)
 	vmovdqu		$ACC7, 32*7-192($tp0)
@@ -441,7 +412,7 @@ $code.=<<___;
 	vmovdqu		$ACC7, 32*16-448($tp1)
 	lea		8($tp1), $tp1
 
-	dec	$i        
+	dec	$i
 	jnz	.LOOP_SQR_1024
 ___
 $ZERO = $ACC9;
@@ -786,7 +757,7 @@ $code.=<<___;
 	vpblendd	\$3, $TEMP4, $TEMP5, $TEMP4
 	vpaddq		$TEMP3, $ACC7, $ACC7
 	vpaddq		$TEMP4, $ACC8, $ACC8
-     
+
 	vpsrlq		\$29, $ACC4, $TEMP1
 	vpand		$AND_MASK, $ACC4, $ACC4
 	vpsrlq		\$29, $ACC5, $TEMP2
@@ -825,8 +796,10 @@ $code.=<<___;
 
 	vzeroall
 	mov	%rbp, %rax
+.cfi_def_cfa_register	%rax
 ___
 $code.=<<___ if ($win64);
+.Lsqr_1024_in_tail:
 	movaps	-0xd8(%rax),%xmm6
 	movaps	-0xc8(%rax),%xmm7
 	movaps	-0xb8(%rax),%xmm8
@@ -840,14 +813,22 @@ $code.=<<___ if ($win64);
 ___
 $code.=<<___;
 	mov	-48(%rax),%r15
+.cfi_restore	%r15
 	mov	-40(%rax),%r14
+.cfi_restore	%r14
 	mov	-32(%rax),%r13
+.cfi_restore	%r13
 	mov	-24(%rax),%r12
+.cfi_restore	%r12
 	mov	-16(%rax),%rbp
+.cfi_restore	%rbp
 	mov	-8(%rax),%rbx
+.cfi_restore	%rbx
 	lea	(%rax),%rsp		# restore %rsp
+.cfi_def_cfa_register	%rsp
 .Lsqr_1024_epilogue:
 	ret
+.cfi_endproc
 .size	rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2
 ___
 }
@@ -900,13 +881,21 @@ $code.=<<___;
 .type	rsaz_1024_mul_avx2,\@function,5
 .align	64
 rsaz_1024_mul_avx2:
+.cfi_startproc
 	lea	(%rsp), %rax
+.cfi_def_cfa_register	%rax
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 ___
 $code.=<<___ if ($win64);
 	vzeroupper
@@ -925,6 +914,7 @@ $code.=<<___ if ($win64);
 ___
 $code.=<<___;
 	mov	%rax,%rbp
+.cfi_def_cfa_register	%rbp
 	vzeroall
 	mov	%rdx, $bp	# reassigned argument
 	sub	\$64,%rsp
@@ -1450,15 +1440,17 @@ $code.=<<___;
 	vpaddq		$TEMP4, $ACC8, $ACC8
 
 	vmovdqu		$ACC4, 128-128($rp)
-	vmovdqu		$ACC5, 160-128($rp)    
+	vmovdqu		$ACC5, 160-128($rp)
 	vmovdqu		$ACC6, 192-128($rp)
 	vmovdqu		$ACC7, 224-128($rp)
 	vmovdqu		$ACC8, 256-128($rp)
 	vzeroupper
 
 	mov	%rbp, %rax
+.cfi_def_cfa_register	%rax
 ___
 $code.=<<___ if ($win64);
+.Lmul_1024_in_tail:
 	movaps	-0xd8(%rax),%xmm6
 	movaps	-0xc8(%rax),%xmm7
 	movaps	-0xb8(%rax),%xmm8
@@ -1472,14 +1464,22 @@ $code.=<<___ if ($win64);
 ___
 $code.=<<___;
 	mov	-48(%rax),%r15
+.cfi_restore	%r15
 	mov	-40(%rax),%r14
+.cfi_restore	%r14
 	mov	-32(%rax),%r13
+.cfi_restore	%r13
 	mov	-24(%rax),%r12
+.cfi_restore	%r12
 	mov	-16(%rax),%rbp
+.cfi_restore	%rbp
 	mov	-8(%rax),%rbx
+.cfi_restore	%rbx
 	lea	(%rax),%rsp		# restore %rsp
+.cfi_def_cfa_register	%rsp
 .Lmul_1024_epilogue:
 	ret
+.cfi_endproc
 .size	rsaz_1024_mul_avx2,.-rsaz_1024_mul_avx2
 ___
 }
@@ -1598,8 +1598,10 @@ rsaz_1024_scatter5_avx2:
 .type	rsaz_1024_gather5_avx2,\@abi-omnipotent
 .align	32
 rsaz_1024_gather5_avx2:
+.cfi_startproc
 	vzeroupper
 	mov	%rsp,%r11
+.cfi_def_cfa_register	%r11
 ___
 $code.=<<___ if ($win64);
 	lea	-0x88(%rsp),%rax
@@ -1737,11 +1739,13 @@ $code.=<<___ if ($win64);
 	movaps	-0x38(%r11),%xmm13
 	movaps	-0x28(%r11),%xmm14
 	movaps	-0x18(%r11),%xmm15
-.LSEH_end_rsaz_1024_gather5:
 ___
 $code.=<<___;
 	lea	(%r11),%rsp
+.cfi_def_cfa_register	%rsp
 	ret
+.cfi_endproc
+.LSEH_end_rsaz_1024_gather5:
 .size	rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2
 ___
 }
@@ -1814,14 +1818,17 @@ rsaz_se_handler:
 	cmp	%r10,%rbx		# context->Rip<prologue label
 	jb	.Lcommon_seh_tail
 
-	mov	152($context),%rax	# pull context->Rsp
-
 	mov	4(%r11),%r10d		# HandlerData[1]
 	lea	(%rsi,%r10),%r10	# epilogue label
 	cmp	%r10,%rbx		# context->Rip>=epilogue label
 	jae	.Lcommon_seh_tail
 
-	mov	160($context),%rax	# pull context->Rbp
+	mov	160($context),%rbp	# pull context->Rbp
+
+	mov	8(%r11),%r10d		# HandlerData[2]
+	lea	(%rsi,%r10),%r10	# "in tail" label
+	cmp	%r10,%rbx		# context->Rip>="in tail" label
+	cmovc	%rbp,%rax
 
 	mov	-48(%rax),%r15
 	mov	-40(%rax),%r14
@@ -1899,11 +1906,13 @@ rsaz_se_handler:
 .LSEH_info_rsaz_1024_sqr_avx2:
 	.byte	9,0,0,0
 	.rva	rsaz_se_handler
-	.rva	.Lsqr_1024_body,.Lsqr_1024_epilogue
+	.rva	.Lsqr_1024_body,.Lsqr_1024_epilogue,.Lsqr_1024_in_tail
+	.long	0
 .LSEH_info_rsaz_1024_mul_avx2:
 	.byte	9,0,0,0
 	.rva	rsaz_se_handler
-	.rva	.Lmul_1024_body,.Lmul_1024_epilogue
+	.rva	.Lmul_1024_body,.Lmul_1024_epilogue,.Lmul_1024_in_tail
+	.long	0
 .LSEH_info_rsaz_1024_gather5:
 	.byte	0x01,0x36,0x17,0x0b
 	.byte	0x36,0xf8,0x09,0x00	# vmovaps 0x90(rsp),xmm15