summaryrefslogtreecommitdiff
path: root/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl
diff options
context:
space:
mode:
Diffstat (limited to 'deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl')
-rwxr-xr-xdeps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl145
1 files changed, 77 insertions, 68 deletions
diff --git a/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl b/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl
index 0466e11a25..f1292cc75c 100755
--- a/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl
@@ -1,68 +1,30 @@
#! /usr/bin/env perl
# Copyright 2013-2018 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright (c) 2012, Intel Corporation. All Rights Reserved.
#
# Licensed under the OpenSSL license (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
# in the file LICENSE in the source distribution or at
# https://www.openssl.org/source/license.html
-
-
-##############################################################################
-# #
-# Copyright (c) 2012, Intel Corporation #
-# #
-# All rights reserved. #
-# #
-# Redistribution and use in source and binary forms, with or without #
-# modification, are permitted provided that the following conditions are #
-# met: #
-# #
-# * Redistributions of source code must retain the above copyright #
-# notice, this list of conditions and the following disclaimer. #
-# #
-# * Redistributions in binary form must reproduce the above copyright #
-# notice, this list of conditions and the following disclaimer in the #
-# documentation and/or other materials provided with the #
-# distribution. #
-# #
-# * Neither the name of the Intel Corporation nor the names of its #
-# contributors may be used to endorse or promote products derived from #
-# this software without specific prior written permission. #
-# #
-# #
-# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY #
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE #
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR #
-# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR #
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, #
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, #
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR #
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF #
-# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING #
-# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS #
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #
-# #
-##############################################################################
-# Developers and authors: #
-# Shay Gueron (1, 2), and Vlad Krasnov (1) #
-# (1) Intel Corporation, Israel Development Center, Haifa, Israel #
-# (2) University of Haifa, Israel #
-##############################################################################
-# Reference: #
-# [1] S. Gueron, V. Krasnov: "Software Implementation of Modular #
-# Exponentiation, Using Advanced Vector Instructions Architectures", #
-# F. Ozbudak and F. Rodriguez-Henriquez (Eds.): WAIFI 2012, LNCS 7369, #
-# pp. 119?135, 2012. Springer-Verlag Berlin Heidelberg 2012 #
-# [2] S. Gueron: "Efficient Software Implementations of Modular #
-# Exponentiation", Journal of Cryptographic Engineering 2:31-43 (2012). #
-# [3] S. Gueron, V. Krasnov: "Speeding up Big-numbers Squaring",IEEE #
-# Proceedings of 9th International Conference on Information Technology: #
-# New Generations (ITNG 2012), pp.821-823 (2012) #
-# [4] S. Gueron, V. Krasnov: "[PATCH] Efficient and side channel analysis #
-# resistant 1024-bit modular exponentiation, for optimizing RSA2048 #
-# on AVX2 capable x86_64 platforms", #
-# http://rt.openssl.org/Ticket/Display.html?id=2850&user=guest&pass=guest#
-##############################################################################
+#
+# Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1)
+# (1) Intel Corporation, Israel Development Center, Haifa, Israel
+# (2) University of Haifa, Israel
+#
+# References:
+# [1] S. Gueron, V. Krasnov: "Software Implementation of Modular
+# Exponentiation, Using Advanced Vector Instructions Architectures",
+# F. Ozbudak and F. Rodriguez-Henriquez (Eds.): WAIFI 2012, LNCS 7369,
+# pp. 119?135, 2012. Springer-Verlag Berlin Heidelberg 2012
+# [2] S. Gueron: "Efficient Software Implementations of Modular
+# Exponentiation", Journal of Cryptographic Engineering 2:31-43 (2012).
+# [3] S. Gueron, V. Krasnov: "Speeding up Big-numbers Squaring",IEEE
+# Proceedings of 9th International Conference on Information Technology:
+# New Generations (ITNG 2012), pp.821-823 (2012)
+# [4] S. Gueron, V. Krasnov: "[PATCH] Efficient and side channel analysis
+# resistant 1024-bit modular exponentiation, for optimizing RSA2048
+# on AVX2 capable x86_64 platforms",
+# http://rt.openssl.org/Ticket/Display.html?id=2850&user=guest&pass=guest
#
# +13% improvement over original submission by <appro@openssl.org>
#
@@ -168,13 +130,21 @@ $code.=<<___;
.type rsaz_1024_sqr_avx2,\@function,5
.align 64
rsaz_1024_sqr_avx2: # 702 cycles, 14% faster than rsaz_1024_mul_avx2
+.cfi_startproc
lea (%rsp), %rax
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
vzeroupper
___
$code.=<<___ if ($win64);
@@ -193,6 +163,7 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
mov %rax,%rbp
+.cfi_def_cfa_register %rbp
mov %rdx, $np # reassigned argument
sub \$$FrameSize, %rsp
mov $np, $tmp
@@ -382,7 +353,7 @@ $code.=<<___;
vpaddq $TEMP1, $ACC1, $ACC1
vpmuludq 32*7-128($aap), $B2, $ACC2
vpbroadcastq 32*5-128($tpa), $B2
- vpaddq 32*11-448($tp1), $ACC2, $ACC2
+ vpaddq 32*11-448($tp1), $ACC2, $ACC2
vmovdqu $ACC6, 32*6-192($tp0)
vmovdqu $ACC7, 32*7-192($tp0)
@@ -441,7 +412,7 @@ $code.=<<___;
vmovdqu $ACC7, 32*16-448($tp1)
lea 8($tp1), $tp1
- dec $i
+ dec $i
jnz .LOOP_SQR_1024
___
$ZERO = $ACC9;
@@ -786,7 +757,7 @@ $code.=<<___;
vpblendd \$3, $TEMP4, $TEMP5, $TEMP4
vpaddq $TEMP3, $ACC7, $ACC7
vpaddq $TEMP4, $ACC8, $ACC8
-
+
vpsrlq \$29, $ACC4, $TEMP1
vpand $AND_MASK, $ACC4, $ACC4
vpsrlq \$29, $ACC5, $TEMP2
@@ -825,8 +796,10 @@ $code.=<<___;
vzeroall
mov %rbp, %rax
+.cfi_def_cfa_register %rax
___
$code.=<<___ if ($win64);
+.Lsqr_1024_in_tail:
movaps -0xd8(%rax),%xmm6
movaps -0xc8(%rax),%xmm7
movaps -0xb8(%rax),%xmm8
@@ -840,14 +813,22 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
mov -48(%rax),%r15
+.cfi_restore %r15
mov -40(%rax),%r14
+.cfi_restore %r14
mov -32(%rax),%r13
+.cfi_restore %r13
mov -24(%rax),%r12
+.cfi_restore %r12
mov -16(%rax),%rbp
+.cfi_restore %rbp
mov -8(%rax),%rbx
+.cfi_restore %rbx
lea (%rax),%rsp # restore %rsp
+.cfi_def_cfa_register %rsp
.Lsqr_1024_epilogue:
ret
+.cfi_endproc
.size rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2
___
}
@@ -900,13 +881,21 @@ $code.=<<___;
.type rsaz_1024_mul_avx2,\@function,5
.align 64
rsaz_1024_mul_avx2:
+.cfi_startproc
lea (%rsp), %rax
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
___
$code.=<<___ if ($win64);
vzeroupper
@@ -925,6 +914,7 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
mov %rax,%rbp
+.cfi_def_cfa_register %rbp
vzeroall
mov %rdx, $bp # reassigned argument
sub \$64,%rsp
@@ -1450,15 +1440,17 @@ $code.=<<___;
vpaddq $TEMP4, $ACC8, $ACC8
vmovdqu $ACC4, 128-128($rp)
- vmovdqu $ACC5, 160-128($rp)
+ vmovdqu $ACC5, 160-128($rp)
vmovdqu $ACC6, 192-128($rp)
vmovdqu $ACC7, 224-128($rp)
vmovdqu $ACC8, 256-128($rp)
vzeroupper
mov %rbp, %rax
+.cfi_def_cfa_register %rax
___
$code.=<<___ if ($win64);
+.Lmul_1024_in_tail:
movaps -0xd8(%rax),%xmm6
movaps -0xc8(%rax),%xmm7
movaps -0xb8(%rax),%xmm8
@@ -1472,14 +1464,22 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
mov -48(%rax),%r15
+.cfi_restore %r15
mov -40(%rax),%r14
+.cfi_restore %r14
mov -32(%rax),%r13
+.cfi_restore %r13
mov -24(%rax),%r12
+.cfi_restore %r12
mov -16(%rax),%rbp
+.cfi_restore %rbp
mov -8(%rax),%rbx
+.cfi_restore %rbx
lea (%rax),%rsp # restore %rsp
+.cfi_def_cfa_register %rsp
.Lmul_1024_epilogue:
ret
+.cfi_endproc
.size rsaz_1024_mul_avx2,.-rsaz_1024_mul_avx2
___
}
@@ -1598,8 +1598,10 @@ rsaz_1024_scatter5_avx2:
.type rsaz_1024_gather5_avx2,\@abi-omnipotent
.align 32
rsaz_1024_gather5_avx2:
+.cfi_startproc
vzeroupper
mov %rsp,%r11
+.cfi_def_cfa_register %r11
___
$code.=<<___ if ($win64);
lea -0x88(%rsp),%rax
@@ -1737,11 +1739,13 @@ $code.=<<___ if ($win64);
movaps -0x38(%r11),%xmm13
movaps -0x28(%r11),%xmm14
movaps -0x18(%r11),%xmm15
-.LSEH_end_rsaz_1024_gather5:
___
$code.=<<___;
lea (%r11),%rsp
+.cfi_def_cfa_register %rsp
ret
+.cfi_endproc
+.LSEH_end_rsaz_1024_gather5:
.size rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2
___
}
@@ -1814,14 +1818,17 @@ rsaz_se_handler:
cmp %r10,%rbx # context->Rip<prologue label
jb .Lcommon_seh_tail
- mov 152($context),%rax # pull context->Rsp
-
mov 4(%r11),%r10d # HandlerData[1]
lea (%rsi,%r10),%r10 # epilogue label
cmp %r10,%rbx # context->Rip>=epilogue label
jae .Lcommon_seh_tail
- mov 160($context),%rax # pull context->Rbp
+ mov 160($context),%rbp # pull context->Rbp
+
+ mov 8(%r11),%r10d # HandlerData[2]
+ lea (%rsi,%r10),%r10 # "in tail" label
+ cmp %r10,%rbx # context->Rip>="in tail" label
+ cmovc %rbp,%rax
mov -48(%rax),%r15
mov -40(%rax),%r14
@@ -1899,11 +1906,13 @@ rsaz_se_handler:
.LSEH_info_rsaz_1024_sqr_avx2:
.byte 9,0,0,0
.rva rsaz_se_handler
- .rva .Lsqr_1024_body,.Lsqr_1024_epilogue
+ .rva .Lsqr_1024_body,.Lsqr_1024_epilogue,.Lsqr_1024_in_tail
+ .long 0
.LSEH_info_rsaz_1024_mul_avx2:
.byte 9,0,0,0
.rva rsaz_se_handler
- .rva .Lmul_1024_body,.Lmul_1024_epilogue
+ .rva .Lmul_1024_body,.Lmul_1024_epilogue,.Lmul_1024_in_tail
+ .long 0
.LSEH_info_rsaz_1024_gather5:
.byte 0x01,0x36,0x17,0x0b
.byte 0x36,0xf8,0x09,0x00 # vmovaps 0x90(rsp),xmm15