diff options
Diffstat (limited to 'deps/openssl/openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl')
-rwxr-xr-x | deps/openssl/openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl | 154 |
1 files changed, 117 insertions, 37 deletions
diff --git a/deps/openssl/openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl b/deps/openssl/openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl index 35d2b6d146..183137e5f0 100755 --- a/deps/openssl/openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl +++ b/deps/openssl/openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + ############################################################################## # # @@ -60,7 +67,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or die "can't locate x86_64-xlate.pl"; -open OUT,"| \"$^X\" $xlate $flavour $output"; +open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; *STDOUT=*OUT; if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` @@ -1367,20 +1374,44 @@ my ($M1,$T2a,$T2b,$TMP2,$M2,$T2a,$T2b,$TMP2)=map("%xmm$_",(8..15)); $code.=<<___; ################################################################################ -# void ecp_nistz256_select_w5(uint64_t *val, uint64_t *in_t, int index); -.globl ecp_nistz256_select_w5 -.type ecp_nistz256_select_w5,\@abi-omnipotent +# void ecp_nistz256_scatter_w5(uint64_t *val, uint64_t *in_t, int index); +.globl ecp_nistz256_scatter_w5 +.type ecp_nistz256_scatter_w5,\@abi-omnipotent +.align 32 +ecp_nistz256_scatter_w5: + lea -3($index,$index,2), $index + movdqa 0x00($in_t), %xmm0 + shl \$5, $index + movdqa 0x10($in_t), %xmm1 + movdqa 0x20($in_t), %xmm2 + movdqa 0x30($in_t), %xmm3 + movdqa 0x40($in_t), %xmm4 + movdqa 0x50($in_t), %xmm5 + movdqa %xmm0, 0x00($val,$index) + movdqa %xmm1, 0x10($val,$index) + movdqa %xmm2, 0x20($val,$index) + movdqa %xmm3, 0x30($val,$index) + movdqa %xmm4, 0x40($val,$index) + movdqa %xmm5, 0x50($val,$index) + + ret +.size ecp_nistz256_scatter_w5,.-ecp_nistz256_scatter_w5 + +################################################################################ +# void ecp_nistz256_gather_w5(uint64_t *val, uint64_t *in_t, int index); +.globl ecp_nistz256_gather_w5 +.type ecp_nistz256_gather_w5,\@abi-omnipotent .align 32 -ecp_nistz256_select_w5: +ecp_nistz256_gather_w5: ___ $code.=<<___ if ($avx>1); mov OPENSSL_ia32cap_P+8(%rip), %eax test \$`1<<5`, %eax - jnz .Lavx2_select_w5 + jnz .Lavx2_gather_w5 ___ $code.=<<___ if ($win64); lea -0x88(%rsp), %rax -.LSEH_begin_ecp_nistz256_select_w5: +.LSEH_begin_ecp_nistz256_gather_w5: .byte 0x48,0x8d,0x60,0xe0 #lea -0x20(%rax), %rsp .byte 0x0f,0x29,0x70,0xe0 #movaps %xmm6, -0x20(%rax) .byte 0x0f,0x29,0x78,0xf0 #movaps %xmm7, -0x10(%rax) @@ -1457,27 +1488,46 @@ $code.=<<___ if ($win64); movaps 0x80(%rsp), %xmm14 movaps 0x90(%rsp), %xmm15 lea 0xa8(%rsp), %rsp -.LSEH_end_ecp_nistz256_select_w5: +.LSEH_end_ecp_nistz256_gather_w5: ___ $code.=<<___; ret -.size ecp_nistz256_select_w5,.-ecp_nistz256_select_w5 +.size ecp_nistz256_gather_w5,.-ecp_nistz256_gather_w5 + +################################################################################ +# void ecp_nistz256_scatter_w7(uint64_t *val, uint64_t *in_t, int index); +.globl ecp_nistz256_scatter_w7 +.type ecp_nistz256_scatter_w7,\@abi-omnipotent +.align 32 +ecp_nistz256_scatter_w7: + movdqu 0x00($in_t), %xmm0 + shl \$6, $index + movdqu 0x10($in_t), %xmm1 + movdqu 0x20($in_t), %xmm2 + movdqu 0x30($in_t), %xmm3 + movdqa %xmm0, 0x00($val,$index) + movdqa %xmm1, 0x10($val,$index) + movdqa %xmm2, 0x20($val,$index) + movdqa %xmm3, 0x30($val,$index) + + ret +.size ecp_nistz256_scatter_w7,.-ecp_nistz256_scatter_w7 ################################################################################ -# void ecp_nistz256_select_w7(uint64_t *val, uint64_t *in_t, int index); -.globl ecp_nistz256_select_w7 -.type ecp_nistz256_select_w7,\@abi-omnipotent +# void ecp_nistz256_gather_w7(uint64_t *val, uint64_t *in_t, int index); +.globl ecp_nistz256_gather_w7 +.type ecp_nistz256_gather_w7,\@abi-omnipotent .align 32 -ecp_nistz256_select_w7: +ecp_nistz256_gather_w7: ___ $code.=<<___ if ($avx>1); mov OPENSSL_ia32cap_P+8(%rip), %eax test \$`1<<5`, %eax - jnz .Lavx2_select_w7 + jnz .Lavx2_gather_w7 ___ $code.=<<___ if ($win64); lea -0x88(%rsp), %rax -.LSEH_begin_ecp_nistz256_select_w7: +.LSEH_begin_ecp_nistz256_gather_w7: .byte 0x48,0x8d,0x60,0xe0 #lea -0x20(%rax), %rsp .byte 0x0f,0x29,0x70,0xe0 #movaps %xmm6, -0x20(%rax) .byte 0x0f,0x29,0x78,0xf0 #movaps %xmm7, -0x10(%rax) @@ -1543,11 +1593,11 @@ $code.=<<___ if ($win64); movaps 0x80(%rsp), %xmm14 movaps 0x90(%rsp), %xmm15 lea 0xa8(%rsp), %rsp -.LSEH_end_ecp_nistz256_select_w7: +.LSEH_end_ecp_nistz256_gather_w7: ___ $code.=<<___; ret -.size ecp_nistz256_select_w7,.-ecp_nistz256_select_w7 +.size ecp_nistz256_gather_w7,.-ecp_nistz256_gather_w7 ___ } if ($avx>1) { @@ -1558,16 +1608,16 @@ my ($M1,$T1a,$T1b,$T1c,$TMP1)=map("%ymm$_",(10..14)); $code.=<<___; ################################################################################ -# void ecp_nistz256_avx2_select_w5(uint64_t *val, uint64_t *in_t, int index); -.type ecp_nistz256_avx2_select_w5,\@abi-omnipotent +# void ecp_nistz256_avx2_gather_w5(uint64_t *val, uint64_t *in_t, int index); +.type ecp_nistz256_avx2_gather_w5,\@abi-omnipotent .align 32 -ecp_nistz256_avx2_select_w5: -.Lavx2_select_w5: +ecp_nistz256_avx2_gather_w5: +.Lavx2_gather_w5: vzeroupper ___ $code.=<<___ if ($win64); lea -0x88(%rsp), %rax -.LSEH_begin_ecp_nistz256_avx2_select_w5: +.LSEH_begin_ecp_nistz256_avx2_gather_w5: .byte 0x48,0x8d,0x60,0xe0 #lea -0x20(%rax), %rsp .byte 0xc5,0xf8,0x29,0x70,0xe0 #vmovaps %xmm6, -0x20(%rax) .byte 0xc5,0xf8,0x29,0x78,0xf0 #vmovaps %xmm7, -0x10(%rax) @@ -1645,11 +1695,11 @@ $code.=<<___ if ($win64); movaps 0x80(%rsp), %xmm14 movaps 0x90(%rsp), %xmm15 lea 0xa8(%rsp), %rsp -.LSEH_end_ecp_nistz256_avx2_select_w5: +.LSEH_end_ecp_nistz256_avx2_gather_w5: ___ $code.=<<___; ret -.size ecp_nistz256_avx2_select_w5,.-ecp_nistz256_avx2_select_w5 +.size ecp_nistz256_avx2_gather_w5,.-ecp_nistz256_avx2_gather_w5 ___ } if ($avx>1) { @@ -1662,17 +1712,17 @@ my ($M2,$T2a,$T2b,$TMP2)=map("%ymm$_",(12..15)); $code.=<<___; ################################################################################ -# void ecp_nistz256_avx2_select_w7(uint64_t *val, uint64_t *in_t, int index); -.globl ecp_nistz256_avx2_select_w7 -.type ecp_nistz256_avx2_select_w7,\@abi-omnipotent +# void ecp_nistz256_avx2_gather_w7(uint64_t *val, uint64_t *in_t, int index); +.globl ecp_nistz256_avx2_gather_w7 +.type ecp_nistz256_avx2_gather_w7,\@abi-omnipotent .align 32 -ecp_nistz256_avx2_select_w7: -.Lavx2_select_w7: +ecp_nistz256_avx2_gather_w7: +.Lavx2_gather_w7: vzeroupper ___ $code.=<<___ if ($win64); lea -0x88(%rsp), %rax -.LSEH_begin_ecp_nistz256_avx2_select_w7: +.LSEH_begin_ecp_nistz256_avx2_gather_w7: .byte 0x48,0x8d,0x60,0xe0 #lea -0x20(%rax), %rsp .byte 0xc5,0xf8,0x29,0x70,0xe0 #vmovaps %xmm6, -0x20(%rax) .byte 0xc5,0xf8,0x29,0x78,0xf0 #vmovaps %xmm7, -0x10(%rax) @@ -1765,21 +1815,21 @@ $code.=<<___ if ($win64); movaps 0x80(%rsp), %xmm14 movaps 0x90(%rsp), %xmm15 lea 0xa8(%rsp), %rsp -.LSEH_end_ecp_nistz256_avx2_select_w7: +.LSEH_end_ecp_nistz256_avx2_gather_w7: ___ $code.=<<___; ret -.size ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7 +.size ecp_nistz256_avx2_gather_w7,.-ecp_nistz256_avx2_gather_w7 ___ } else { $code.=<<___; -.globl ecp_nistz256_avx2_select_w7 -.type ecp_nistz256_avx2_select_w7,\@function,3 +.globl ecp_nistz256_avx2_gather_w7 +.type ecp_nistz256_avx2_gather_w7,\@function,3 .align 32 -ecp_nistz256_avx2_select_w7: +ecp_nistz256_avx2_gather_w7: .byte 0x0f,0x0b # ud2 ret -.size ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7 +.size ecp_nistz256_avx2_gather_w7,.-ecp_nistz256_avx2_gather_w7 ___ } {{{ @@ -2998,6 +3048,36 @@ ___ } }}} +######################################################################## +# Convert ecp_nistz256_table.c to layout expected by ecp_nistz_gather_w7 +# +open TABLE,"<ecp_nistz256_table.c" or +open TABLE,"<${dir}../ecp_nistz256_table.c" or +die "failed to open ecp_nistz256_table.c:",$!; + +use integer; + +foreach(<TABLE>) { + s/TOBN\(\s*(0x[0-9a-f]+),\s*(0x[0-9a-f]+)\s*\)/push @arr,hex($2),hex($1)/geo; +} +close TABLE; + +die "insane number of elements" if ($#arr != 64*16*37-1); + +print <<___; +.text +.globl ecp_nistz256_precomputed +.type ecp_nistz256_precomputed,\@object +.align 4096 +ecp_nistz256_precomputed: +___ +while (@line=splice(@arr,0,16)) { + print ".long\t",join(',',map { sprintf "0x%08x",$_} @line),"\n"; +} +print <<___; +.size ecp_nistz256_precomputed,.-ecp_nistz256_precomputed +___ + $code =~ s/\`([^\`]*)\`/eval $1/gem; print $code; close STDOUT; |