diff options
Diffstat (limited to 'deps/openssl/openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl')
-rwxr-xr-x | deps/openssl/openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl | 40 |
1 files changed, 22 insertions, 18 deletions
diff --git a/deps/openssl/openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl b/deps/openssl/openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl index a34f03cc5e..84379fce1c 100755 --- a/deps/openssl/openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl +++ b/deps/openssl/openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl @@ -30,20 +30,24 @@ # Further optimization by <appro@openssl.org>: # -# this/original -# Opteron +12-49% -# Bulldozer +14-45% -# P4 +18-46% -# Westmere +12-34% -# Sandy Bridge +9-35% -# Ivy Bridge +9-35% -# Haswell +8-37% -# Broadwell +18-58% -# Atom +15-50% -# VIA Nano +43-160% +# this/original with/without -DECP_NISTZ256_ASM(*) +# Opteron +12-49% +110-150% +# Bulldozer +14-45% +175-210% +# P4 +18-46% n/a :-( +# Westmere +12-34% +80-87% +# Sandy Bridge +9-35% +110-120% +# Ivy Bridge +9-35% +110-125% +# Haswell +8-37% +140-160% +# Broadwell +18-58% +145-210% +# Atom +15-50% +130-180% +# VIA Nano +43-160% +300-480% +# +# (*) "without -DECP_NISTZ256_ASM" refers to build with +# "enable-ec_nistp_64_gcc_128"; # # Ranges denote minimum and maximum improvement coefficients depending -# on benchmark. +# on benchmark. Lower coefficients are for ECDSA sign, relatively fastest +# server-side operation. Keep in mind that +100% means 2x improvement. $flavour = shift; $output = shift; @@ -599,7 +603,7 @@ __ecp_nistz256_mul_montq: adc \$0, $acc0 ######################################################################## - # Second reduction step + # Second reduction step mov $acc1, $t1 shl \$32, $acc1 mulq $poly3 @@ -646,7 +650,7 @@ __ecp_nistz256_mul_montq: adc \$0, $acc1 ######################################################################## - # Third reduction step + # Third reduction step mov $acc2, $t1 shl \$32, $acc2 mulq $poly3 @@ -693,7 +697,7 @@ __ecp_nistz256_mul_montq: adc \$0, $acc2 ######################################################################## - # Final reduction step + # Final reduction step mov $acc3, $t1 shl \$32, $acc3 mulq $poly3 @@ -706,7 +710,7 @@ __ecp_nistz256_mul_montq: mov $acc5, $t1 adc \$0, $acc2 - ######################################################################## + ######################################################################## # Branch-less conditional subtraction of P sub \$-1, $acc4 # .Lpoly[0] mov $acc0, $t2 @@ -2060,7 +2064,7 @@ $code.=<<___; movq %xmm1, $r_ptr call __ecp_nistz256_sqr_mont$x # p256_sqr_mont(res_y, S); ___ -{ +{ ######## ecp_nistz256_div_by_2(res_y, res_y); ########################## # operate in 4-5-6-7 "name space" that matches squaring output # @@ -2149,7 +2153,7 @@ $code.=<<___; lea $M(%rsp), $b_ptr mov $acc4, $acc6 # harmonize sub output and mul input xor %ecx, %ecx - mov $acc4, $S+8*0(%rsp) # have to save:-( + mov $acc4, $S+8*0(%rsp) # have to save:-( mov $acc5, $acc2 mov $acc5, $S+8*1(%rsp) cmovz $acc0, $acc3 |