aboutsummaryrefslogtreecommitdiff
path: root/deps/openssl/openssl/crypto/sha/asm
diff options
context:
space:
mode:
authorShigeki Ohtsu <ohtsu@ohtsu.org>2018-03-29 16:39:12 +0900
committerShigeki Ohtsu <ohtsu@ohtsu.org>2018-04-10 06:45:42 +0900
commit66cb29e64621fdd1aa5e377a395ff107d21a613b (patch)
treef05243a51577e04b6f1c4a2f8a6b7b2f05786079 /deps/openssl/openssl/crypto/sha/asm
parent38c97f5dc7ff3fbf83982d0268fc9e93cfc00c7d (diff)
downloadandroid-node-v8-66cb29e64621fdd1aa5e377a395ff107d21a613b.tar.gz
android-node-v8-66cb29e64621fdd1aa5e377a395ff107d21a613b.tar.bz2
android-node-v8-66cb29e64621fdd1aa5e377a395ff107d21a613b.zip
deps: upgrade openssl sources to 1.1.0h
This updates all sources in deps/openssl/openssl with openssl-1.1.0h. Fixes: https://github.com/nodejs/node/issues/4270 PR-URL: https://github.com/nodejs/node/pull/19794 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Rod Vagg <rod@vagg.org> Reviewed-By: Michael Dawson <michael_dawson@ca.ibm.com>
Diffstat (limited to 'deps/openssl/openssl/crypto/sha/asm')
-rw-r--r--deps/openssl/openssl/crypto/sha/asm/README1
-rw-r--r--deps/openssl/openssl/crypto/sha/asm/sha1-586.pl22
-rw-r--r--deps/openssl/openssl/crypto/sha/asm/sha1-alpha.pl11
-rw-r--r--deps/openssl/openssl/crypto/sha/asm/sha1-armv4-large.pl79
-rw-r--r--deps/openssl/openssl/crypto/sha/asm/sha1-armv8.pl29
-rw-r--r--deps/openssl/openssl/crypto/sha/asm/sha1-c64xplus.pl337
-rw-r--r--deps/openssl/openssl/crypto/sha/asm/sha1-ia64.pl13
-rw-r--r--deps/openssl/openssl/crypto/sha/asm/sha1-mb-x86_64.pl20
-rw-r--r--deps/openssl/openssl/crypto/sha/asm/sha1-mips.pl15
-rw-r--r--deps/openssl/openssl/crypto/sha/asm/sha1-parisc.pl9
-rwxr-xr-xdeps/openssl/openssl/crypto/sha/asm/sha1-ppc.pl11
-rw-r--r--deps/openssl/openssl/crypto/sha/asm/sha1-s390x.pl18
-rw-r--r--deps/openssl/openssl/crypto/sha/asm/sha1-sparcv9.pl13
-rw-r--r--deps/openssl/openssl/crypto/sha/asm/sha1-sparcv9a.pl11
-rw-r--r--deps/openssl/openssl/crypto/sha/asm/sha1-thumb.pl9
-rwxr-xr-xdeps/openssl/openssl/crypto/sha/asm/sha1-x86_64.pl20
-rw-r--r--deps/openssl/openssl/crypto/sha/asm/sha256-586.pl14
-rw-r--r--deps/openssl/openssl/crypto/sha/asm/sha256-armv4.pl59
-rw-r--r--deps/openssl/openssl/crypto/sha/asm/sha256-c64xplus.pl320
-rw-r--r--deps/openssl/openssl/crypto/sha/asm/sha256-mb-x86_64.pl20
-rw-r--r--deps/openssl/openssl/crypto/sha/asm/sha512-586.pl15
-rw-r--r--deps/openssl/openssl/crypto/sha/asm/sha512-armv4.pl107
-rw-r--r--deps/openssl/openssl/crypto/sha/asm/sha512-armv8.pl38
-rw-r--r--deps/openssl/openssl/crypto/sha/asm/sha512-c64xplus.pl438
-rwxr-xr-xdeps/openssl/openssl/crypto/sha/asm/sha512-ia64.pl11
-rw-r--r--deps/openssl/openssl/crypto/sha/asm/sha512-mips.pl17
-rwxr-xr-xdeps/openssl/openssl/crypto/sha/asm/sha512-parisc.pl9
-rwxr-xr-xdeps/openssl/openssl/crypto/sha/asm/sha512-ppc.pl9
-rw-r--r--deps/openssl/openssl/crypto/sha/asm/sha512-s390x.pl17
-rw-r--r--deps/openssl/openssl/crypto/sha/asm/sha512-sparcv9.pl13
-rwxr-xr-xdeps/openssl/openssl/crypto/sha/asm/sha512-x86_64.pl17
-rwxr-xr-xdeps/openssl/openssl/crypto/sha/asm/sha512p8-ppc.pl9
32 files changed, 1596 insertions, 135 deletions
diff --git a/deps/openssl/openssl/crypto/sha/asm/README b/deps/openssl/openssl/crypto/sha/asm/README
deleted file mode 100644
index b7e755765f..0000000000
--- a/deps/openssl/openssl/crypto/sha/asm/README
+++ /dev/null
@@ -1 +0,0 @@
-C2.pl works
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha1-586.pl b/deps/openssl/openssl/crypto/sha/asm/sha1-586.pl
index e0b5d83b62..5adca23404 100644
--- a/deps/openssl/openssl/crypto/sha/asm/sha1-586.pl
+++ b/deps/openssl/openssl/crypto/sha/asm/sha1-586.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 1998-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# [Re]written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -113,6 +120,9 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
push(@INC,"${dir}","${dir}../../perlasm");
require "x86asm.pl";
+$output=pop;
+open STDOUT,">$output";
+
&asm_init($ARGV[0],"sha1-586.pl",$ARGV[$#ARGV] eq "386");
$xmm=$ymm=0;
@@ -647,7 +657,7 @@ my $_ror=sub { &ror(@_) };
&jmp (&label("loop"));
######################################################################
-# SSE instruction sequence is first broken to groups of indepentent
+# SSE instruction sequence is first broken to groups of independent
# instructions, independent in respect to their inputs and shifter
# (not all architectures have more than one). Then IALU instructions
# are "knitted in" between the SSE groups. Distance is maintained for
@@ -656,14 +666,14 @@ my $_ror=sub { &ror(@_) };
#
# Temporary registers usage. X[2] is volatile at the entry and at the
# end is restored from backtrace ring buffer. X[3] is expected to
-# contain current K_XX_XX constant and is used to caclulate X[-1]+K
+# contain current K_XX_XX constant and is used to calculate X[-1]+K
# from previous round, it becomes volatile the moment the value is
# saved to stack for transfer to IALU. X[4] becomes volatile whenever
# X[-4] is accumulated and offloaded to backtrace ring buffer, at the
# end it is loaded with next K_XX_XX [which becomes X[3] in next
# round]...
#
-sub Xupdate_ssse3_16_31() # recall that $Xi starts wtih 4
+sub Xupdate_ssse3_16_31() # recall that $Xi starts with 4
{ use integer;
my $body = shift;
my @insns = (&$body,&$body,&$body,&$body); # 40 instructions
@@ -1186,7 +1196,7 @@ my $_ror=sub { &shrd(@_[0],@_) };
&and (@T[0],@T[1]);
&jmp (&label("loop"));
-sub Xupdate_avx_16_31() # recall that $Xi starts wtih 4
+sub Xupdate_avx_16_31() # recall that $Xi starts with 4
{ use integer;
my $body = shift;
my @insns = (&$body,&$body,&$body,&$body); # 40 instructions
@@ -1474,3 +1484,5 @@ sub Xtail_avx()
&asciz("SHA1 block transform for x86, CRYPTOGAMS by <appro\@openssl.org>");
&asm_finish();
+
+close STDOUT;
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha1-alpha.pl b/deps/openssl/openssl/crypto/sha/asm/sha1-alpha.pl
index 6c4b9251fd..4124958f78 100644
--- a/deps/openssl/openssl/crypto/sha/asm/sha1-alpha.pl
+++ b/deps/openssl/openssl/crypto/sha/asm/sha1-alpha.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
@@ -317,6 +324,6 @@ $code.=<<___;
.ascii "SHA1 block transform for Alpha, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
___
-$output=shift and open STDOUT,">$output";
+$output=pop and open STDOUT,">$output";
print $code;
close STDOUT;
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha1-armv4-large.pl b/deps/openssl/openssl/crypto/sha/asm/sha1-armv4-large.pl
index b2c30322c3..7ff5bfbba6 100644
--- a/deps/openssl/openssl/crypto/sha/asm/sha1-armv4-large.pl
+++ b/deps/openssl/openssl/crypto/sha/asm/sha1-armv4-large.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -60,14 +67,28 @@
# is ~2.5x larger and there are some redundant instructions executed
# when processing last block, improvement is not as big for smallest
# blocks, only ~30%. Snapdragon S4 is a tad faster, 6.4 cycles per
-# byte, which is also >80% faster than integer-only code.
+# byte, which is also >80% faster than integer-only code. Cortex-A15
+# is even faster spending 5.6 cycles per byte outperforming integer-
+# only code by factor of 2.
# May 2014.
#
# Add ARMv8 code path performing at 2.35 cpb on Apple A7.
-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-open STDOUT,">$output";
+$flavour = shift;
+if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
+else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
+
+if ($flavour && $flavour ne "void") {
+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+ ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+ ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+ die "can't locate arm-xlate.pl";
+
+ open STDOUT,"| \"$^X\" $xlate $flavour $output";
+} else {
+ open STDOUT,">$output";
+}
$ctx="r0";
$inp="r1";
@@ -167,7 +188,12 @@ $code=<<___;
#include "arm_arch.h"
.text
+#if defined(__thumb2__)
+.syntax unified
+.thumb
+#else
.code 32
+#endif
.global sha1_block_data_order
.type sha1_block_data_order,%function
@@ -175,9 +201,13 @@ $code=<<___;
.align 5
sha1_block_data_order:
#if __ARM_MAX_ARCH__>=7
- sub r3,pc,#8 @ sha1_block_data_order
+.Lsha1_block:
+ adr r3,.Lsha1_block
ldr r12,.LOPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap_P
+#ifdef __APPLE__
+ ldr r12,[r12]
+#endif
tst r12,#ARMV8_SHA1
bne .LARMv8
tst r12,#ARMV7_NEON
@@ -199,7 +229,12 @@ for($i=0;$i<5;$i++) {
&BODY_00_15(@V); unshift(@V,pop(@V));
}
$code.=<<___;
+#if defined(__thumb2__)
+ mov $t3,sp
+ teq $Xi,$t3
+#else
teq $Xi,sp
+#endif
bne .L_00_15 @ [((11+4)*5+2)*3]
sub sp,sp,#25*4
___
@@ -218,7 +253,12 @@ for($i=0;$i<5;$i++) {
&BODY_20_39(@V); unshift(@V,pop(@V));
}
$code.=<<___;
+#if defined(__thumb2__)
+ mov $t3,sp
+ teq $Xi,$t3
+#else
teq $Xi,sp @ preserve carry
+#endif
bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes
@@ -230,7 +270,12 @@ for($i=0;$i<5;$i++) {
&BODY_40_59(@V); unshift(@V,pop(@V));
}
$code.=<<___;
+#if defined(__thumb2__)
+ mov $t3,sp
+ teq $Xi,$t3
+#else
teq $Xi,sp
+#endif
bne .L_40_59 @ [+((12+5)*5+2)*4]
ldr $K,.LK_60_79
@@ -266,7 +311,7 @@ $code.=<<___;
.LK_60_79: .word 0xca62c1d6
#if __ARM_MAX_ARCH__>=7
.LOPENSSL_armcap:
-.word OPENSSL_armcap_P-sha1_block_data_order
+.word OPENSSL_armcap_P-.Lsha1_block
#endif
.asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
.align 5
@@ -441,6 +486,7 @@ sub Xuplast_80 ()
&teq ($inp,$len);
&sub ($K_XX_XX,$K_XX_XX,16); # rewind $K_XX_XX
+ &it ("eq");
&subeq ($inp,$inp,64); # reload last block to avoid SEGV
&vld1_8 ("{@X[-4&7]-@X[-3&7]}","[$inp]!");
eval(shift(@insns));
@@ -491,12 +537,12 @@ sha1_block_data_order_neon:
@ dmb @ errata #451034 on early Cortex A8
@ vstmdb sp!,{d8-d15} @ ABI specification says so
mov $saved_sp,sp
- sub sp,sp,#64 @ alloca
+ sub $Xfer,sp,#64
adr $K_XX_XX,.LK_00_19
- bic sp,sp,#15 @ align for 128-bit stores
+ bic $Xfer,$Xfer,#15 @ align for 128-bit stores
ldmia $ctx,{$a,$b,$c,$d,$e} @ load context
- mov $Xfer,sp
+ mov sp,$Xfer @ alloca
vld1.8 {@X[-4&7]-@X[-3&7]},[$inp]! @ handles unaligned
veor $zero,$zero,$zero
@@ -543,10 +589,13 @@ $code.=<<___;
add $b,$b,$t0
add $c,$c,$t1
add $d,$d,$Xfer
+ it eq
moveq sp,$saved_sp
add $e,$e,$Ki
+ it ne
ldrne $Ki,[sp]
stmia $ctx,{$a,$b,$c,$d,$e}
+ itt ne
addne $Xfer,sp,#3*16
bne .Loop_neon
@@ -567,6 +616,13 @@ my ($W0,$W1,$ABCD_SAVE)=map("q$_",(12..14));
$code.=<<___;
#if __ARM_MAX_ARCH__>=7
+
+# if defined(__thumb2__)
+# define INST(a,b,c,d) .byte c,d|0xf,a,b
+# else
+# define INST(a,b,c,d) .byte a,b,c,d|0x10
+# endif
+
.type sha1_block_data_order_armv8,%function
.align 5
sha1_block_data_order_armv8:
@@ -660,7 +716,10 @@ ___
# since ARMv7 instructions are always encoded little-endian.
# correct solution is to use .inst directive, but older
# assemblers don't implement it:-(
- sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",
+
+ # this fix-up provides Thumb encoding in conjunction with INST
+ $word &= ~0x10000000 if (($word & 0x0f000000) == 0x02000000);
+ sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s",
$word&0xff,($word>>8)&0xff,
($word>>16)&0xff,($word>>24)&0xff,
$mnemonic,$arg;
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha1-armv8.pl b/deps/openssl/openssl/crypto/sha/asm/sha1-armv8.pl
index c04432a543..84a00bf2af 100644
--- a/deps/openssl/openssl/crypto/sha/asm/sha1-armv8.pl
+++ b/deps/openssl/openssl/crypto/sha/asm/sha1-armv8.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -18,13 +25,22 @@
# Cortex-A57 2.35 7.88 (+74%)
# Denver 2.13 3.97 (+0%)(**)
# X-Gene 8.80 (+200%)
+# Mongoose 2.05 6.50 (+160%)
#
# (*) Software results are presented mostly for reference purposes.
# (**) Keep in mind that Denver relies on binary translation, which
# optimizes compiler output at run-time.
$flavour = shift;
-open STDOUT,">".shift;
+$output = shift;
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+die "can't locate arm-xlate.pl";
+
+open OUT,"| \"$^X\" $xlate $flavour $output";
+*STDOUT=*OUT;
($ctx,$inp,$num)=("x0","x1","x2");
@Xw=map("w$_",(3..17,19));
@@ -158,11 +174,16 @@ $code.=<<___;
.text
+.extern OPENSSL_armcap_P
.globl sha1_block_data_order
.type sha1_block_data_order,%function
.align 6
sha1_block_data_order:
+#ifdef __ILP32__
+ ldrsw x16,.LOPENSSL_armcap_P
+#else
ldr x16,.LOPENSSL_armcap_P
+#endif
adr x17,.LOPENSSL_armcap_P
add x16,x16,x17
ldr w16,[x16]
@@ -300,7 +321,11 @@ $code.=<<___;
.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc //K_40_59
.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 //K_60_79
.LOPENSSL_armcap_P:
+#ifdef __ILP32__
+.long OPENSSL_armcap_P-.
+#else
.quad OPENSSL_armcap_P-.
+#endif
.asciz "SHA1 block transform for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
.comm OPENSSL_armcap_P,4,4
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha1-c64xplus.pl b/deps/openssl/openssl/crypto/sha/asm/sha1-c64xplus.pl
new file mode 100644
index 0000000000..4db2bcb06b
--- /dev/null
+++ b/deps/openssl/openssl/crypto/sha/asm/sha1-c64xplus.pl
@@ -0,0 +1,337 @@
+#! /usr/bin/env perl
+# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# SHA1 for C64x+.
+#
+# November 2011
+#
+# If compared to compiler-generated code with similar characteristics,
+# i.e. compiled with OPENSSL_SMALL_FOOTPRINT and utilizing SPLOOPs,
+# this implementation is 25% smaller and >2x faster. In absolute terms
+# performance is (quite impressive) ~6.5 cycles per processed byte.
+# Fully unrolled assembler would be ~5x larger and is likely to be
+# ~15% faster. It would be free from references to intermediate ring
+# buffer, but put more pressure on L1P [both because the code would be
+# larger and won't be using SPLOOP buffer]. There are no plans to
+# realize fully unrolled variant though...
+#
+# !!! Note that this module uses AMR, which means that all interrupt
+# service routines are expected to preserve it and for own well-being
+# zero it upon entry.
+
+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
+open STDOUT,">$output";
+
+($CTX,$INP,$NUM) = ("A4","B4","A6"); # arguments
+
+($A,$B,$C,$D,$E, $Arot,$F,$F0,$T,$K) = map("A$_",(16..20, 21..25));
+($X0,$X2,$X8,$X13) = ("A26","B26","A27","B27");
+($TX0,$TX1,$TX2,$TX3) = map("B$_",(28..31));
+($XPA,$XPB) = ("A5","B5"); # X circular buffer
+($Actx,$Bctx,$Cctx,$Dctx,$Ectx) = map("A$_",(3,6..9)); # zaps $NUM
+
+$code=<<___;
+ .text
+
+ .if .ASSEMBLER_VERSION<7000000
+ .asg 0,__TI_EABI__
+ .endif
+ .if __TI_EABI__
+ .asg sha1_block_data_order,_sha1_block_data_order
+ .endif
+
+ .asg B3,RA
+ .asg A15,FP
+ .asg B15,SP
+
+ .if .BIG_ENDIAN
+ .asg MV,SWAP2
+ .asg MV,SWAP4
+ .endif
+
+ .global _sha1_block_data_order
+_sha1_block_data_order:
+ .asmfunc stack_usage(64)
+ MV $NUM,A0 ; reassign $NUM
+|| MVK -64,B0
+ [!A0] BNOP RA ; if ($NUM==0) return;
+|| [A0] STW FP,*SP--[16] ; save frame pointer and alloca(64)
+|| [A0] MV SP,FP
+ [A0] LDW *${CTX}[0],$A ; load A-E...
+|| [A0] AND B0,SP,SP ; align stack at 64 bytes
+ [A0] LDW *${CTX}[1],$B
+|| [A0] SUBAW SP,2,SP ; reserve two words above buffer
+ [A0] LDW *${CTX}[2],$C
+|| [A0] MVK 0x00404,B0
+ [A0] LDW *${CTX}[3],$D
+|| [A0] MVKH 0x50000,B0 ; 0x050404, 64 bytes for $XP[AB]
+ [A0] LDW *${CTX}[4],$E
+|| [A0] MVC B0,AMR ; setup circular addressing
+ LDNW *${INP}++,$TX1 ; pre-fetch input
+ NOP 1
+
+loop?:
+ MVK 0x00007999,$K
+|| ADDAW SP,2,$XPA
+|| SUB A0,1,A0
+|| MVK 13,B0
+ MVKH 0x5a820000,$K ; K_00_19
+|| ADDAW SP,2,$XPB
+|| MV $A,$Actx
+|| MV $B,$Bctx
+;;==================================================
+ SPLOOPD 5 ; BODY_00_13
+|| MV $C,$Cctx
+|| MV $D,$Dctx
+|| MV $E,$Ectx
+|| MVC B0,ILC
+
+ ROTL $A,5,$Arot
+|| AND $C,$B,$F
+|| ANDN $D,$B,$F0
+|| ADD $K,$E,$T ; T=E+K
+
+ XOR $F0,$F,$F ; F_00_19(B,C,D)
+|| MV $D,$E ; E=D
+|| MV $C,$D ; D=C
+|| SWAP2 $TX1,$TX2
+|| LDNW *${INP}++,$TX1
+
+ ADD $F,$T,$T ; T+=F_00_19(B,C,D)
+|| ROTL $B,30,$C ; C=ROL(B,30)
+|| SWAP4 $TX2,$TX3 ; byte swap
+
+ ADD $Arot,$T,$T ; T+=ROL(A,5)
+|| MV $A,$B ; B=A
+
+ ADD $TX3,$T,$A ; A=T+Xi
+|| STW $TX3,*${XPB}++
+ SPKERNEL
+;;==================================================
+ ROTL $A,5,$Arot ; BODY_14
+|| AND $C,$B,$F
+|| ANDN $D,$B,$F0
+|| ADD $K,$E,$T ; T=E+K
+
+ XOR $F0,$F,$F ; F_00_19(B,C,D)
+|| MV $D,$E ; E=D
+|| MV $C,$D ; D=C
+|| SWAP2 $TX1,$TX2
+|| LDNW *${INP}++,$TX1
+
+ ADD $F,$T,$T ; T+=F_00_19(B,C,D)
+|| ROTL $B,30,$C ; C=ROL(B,30)
+|| SWAP4 $TX2,$TX2 ; byte swap
+|| LDW *${XPA}++,$X0 ; fetches from X ring buffer are
+|| LDW *${XPB}[4],$X2 ; 2 iterations ahead
+
+ ADD $Arot,$T,$T ; T+=ROL(A,5)
+|| MV $A,$B ; B=A
+|| LDW *${XPA}[7],$X8
+|| MV $TX3,$X13 ; || LDW *${XPB}[15],$X13
+|| MV $TX2,$TX3
+
+ ADD $TX2,$T,$A ; A=T+Xi
+|| STW $TX2,*${XPB}++
+;;==================================================
+ ROTL $A,5,$Arot ; BODY_15
+|| AND $C,$B,$F
+|| ANDN $D,$B,$F0
+|| ADD $K,$E,$T ; T=E+K
+
+ XOR $F0,$F,$F ; F_00_19(B,C,D)
+|| MV $D,$E ; E=D
+|| MV $C,$D ; D=C
+|| SWAP2 $TX1,$TX2
+
+ ADD $F,$T,$T ; T+=F_00_19(B,C,D)
+|| ROTL $B,30,$C ; C=ROL(B,30)
+|| SWAP4 $TX2,$TX2 ; byte swap
+|| XOR $X0,$X2,$TX0 ; Xupdate XORs are 1 iteration ahead
+|| LDW *${XPA}++,$X0
+|| LDW *${XPB}[4],$X2
+
+ ADD $Arot,$T,$T ; T+=ROL(A,5)
+|| MV $A,$B ; B=A
+|| XOR $X8,$X13,$TX1
+|| LDW *${XPA}[7],$X8
+|| MV $TX3,$X13 ; || LDW *${XPB}[15],$X13
+|| MV $TX2,$TX3
+
+ ADD $TX2,$T,$A ; A=T+Xi
+|| STW $TX2,*${XPB}++
+|| XOR $TX0,$TX1,$TX1
+|| MVK 3,B0
+;;==================================================
+ SPLOOPD 5 ; BODY_16_19
+|| MVC B0,ILC
+
+ ROTL $A,5,$Arot
+|| AND $C,$B,$F
+|| ANDN $D,$B,$F0
+|| ADD $K,$E,$T ; T=E+K
+|| ROTL $TX1,1,$TX2 ; Xupdate output
+
+ XOR $F0,$F,$F ; F_00_19(B,C,D)
+|| MV $D,$E ; E=D
+|| MV $C,$D ; D=C
+
+ ADD $F,$T,$T ; T+=F_00_19(B,C,D)
+|| ROTL $B,30,$C ; C=ROL(B,30)
+|| XOR $X0,$X2,$TX0
+|| LDW *${XPA}++,$X0
+|| LDW *${XPB}[4],$X2
+
+ ADD $Arot,$T,$T ; T+=ROL(A,5)
+|| MV $A,$B ; B=A
+|| XOR $X8,$X13,$TX1
+|| LDW *${XPA}[7],$X8
+|| MV $TX3,$X13 ; || LDW *${XPB}[15],$X13
+|| MV $TX2,$TX3
+
+ ADD $TX2,$T,$A ; A=T+Xi
+|| STW $TX2,*${XPB}++
+|| XOR $TX0,$TX1,$TX1
+ SPKERNEL
+
+ MVK 0xffffeba1,$K
+|| MVK 19,B0
+ MVKH 0x6ed90000,$K ; K_20_39
+___
+sub BODY_20_39 {
+$code.=<<___;
+;;==================================================
+ SPLOOPD 5 ; BODY_20_39
+|| MVC B0,ILC
+
+ ROTL $A,5,$Arot
+|| XOR $B,$C,$F
+|| ADD $K,$E,$T ; T=E+K
+|| ROTL $TX1,1,$TX2 ; Xupdate output
+
+ XOR $D,$F,$F ; F_20_39(B,C,D)
+|| MV $D,$E ; E=D
+|| MV $C,$D ; D=C
+
+ ADD $F,$T,$T ; T+=F_20_39(B,C,D)
+|| ROTL $B,30,$C ; C=ROL(B,30)
+|| XOR $X0,$X2,$TX0
+|| LDW *${XPA}++,$X0
+|| LDW *${XPB}[4],$X2
+
+ ADD $Arot,$T,$T ; T+=ROL(A,5)
+|| MV $A,$B ; B=A
+|| XOR $X8,$X13,$TX1
+|| LDW *${XPA}[7],$X8
+|| MV $TX3,$X13 ; || LDW *${XPB}[15],$X13
+|| MV $TX2,$TX3
+
+ ADD $TX2,$T,$A ; A=T+Xi
+|| STW $TX2,*${XPB}++ ; last one is redundant
+|| XOR $TX0,$TX1,$TX1
+ SPKERNEL
+___
+$code.=<<___ if (!shift);
+ MVK 0xffffbcdc,$K
+ MVKH 0x8f1b0000,$K ; K_40_59
+___
+} &BODY_20_39();
+$code.=<<___;
+;;==================================================
+ SPLOOPD 5 ; BODY_40_59
+|| MVC B0,ILC
+|| AND $B,$C,$F
+|| AND $B,$D,$F0
+
+ ROTL $A,5,$Arot
+|| XOR $F0,$F,$F
+|| AND $C,$D,$F0
+|| ADD $K,$E,$T ; T=E+K
+|| ROTL $TX1,1,$TX2 ; Xupdate output
+
+ XOR $F0,$F,$F ; F_40_59(B,C,D)
+|| MV $D,$E ; E=D
+|| MV $C,$D ; D=C
+
+ ADD $F,$T,$T ; T+=F_40_59(B,C,D)
+|| ROTL $B,30,$C ; C=ROL(B,30)
+|| XOR $X0,$X2,$TX0
+|| LDW *${XPA}++,$X0
+|| LDW *${XPB}[4],$X2
+
+ ADD $Arot,$T,$T ; T+=ROL(A,5)
+|| MV $A,$B ; B=A
+|| XOR $X8,$X13,$TX1
+|| LDW *${XPA}[7],$X8
+|| MV $TX3,$X13 ; || LDW *${XPB}[15],$X13
+|| MV $TX2,$TX3
+
+ ADD $TX2,$T,$A ; A=T+Xi
+|| STW $TX2,*${XPB}++
+|| XOR $TX0,$TX1,$TX1
+|| AND $B,$C,$F
+|| AND $B,$D,$F0
+ SPKERNEL
+
+ MVK 0xffffc1d6,$K
+|| MVK 18,B0
+ MVKH 0xca620000,$K ; K_60_79
+___
+ &BODY_20_39(-1); # BODY_60_78
+$code.=<<___;
+;;==================================================
+ [A0] B loop?
+|| ROTL $A,5,$Arot ; BODY_79
+|| XOR $B,$C,$F
+|| ROTL $TX1,1,$TX2 ; Xupdate output
+
+ [A0] LDNW *${INP}++,$TX1 ; pre-fetch input
+|| ADD $K,$E,$T ; T=E+K
+|| XOR $D,$F,$F ; F_20_39(B,C,D)
+
+ ADD $F,$T,$T ; T+=F_20_39(B,C,D)
+|| ADD $Ectx,$D,$E ; E=D,E+=Ectx
+|| ADD $Dctx,$C,$D ; D=C,D+=Dctx
+|| ROTL $B,30,$C ; C=ROL(B,30)
+
+ ADD $Arot,$T,$T ; T+=ROL(A,5)
+|| ADD $Bctx,$A,$B ; B=A,B+=Bctx
+
+ ADD $TX2,$T,$A ; A=T+Xi
+
+ ADD $Actx,$A,$A ; A+=Actx
+|| ADD $Cctx,$C,$C ; C+=Cctx
+;; end of loop?
+
+ BNOP RA ; return
+|| MV FP,SP ; restore stack pointer
+|| LDW *FP[0],FP ; restore frame pointer
+ STW $A,*${CTX}[0] ; emit A-E...
+|| MVK 0,B0
+ STW $B,*${CTX}[1]
+|| MVC B0,AMR ; clear AMR
+ STW $C,*${CTX}[2]
+ STW $D,*${CTX}[3]
+ STW $E,*${CTX}[4]
+ .endasmfunc
+
+ .sect .const
+ .cstring "SHA1 block transform for C64x+, CRYPTOGAMS by <appro\@openssl.org>"
+ .align 4
+___
+
+print $code;
+close STDOUT;
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha1-ia64.pl b/deps/openssl/openssl/crypto/sha/asm/sha1-ia64.pl
index 02d35d1614..dec21f92d5 100644
--- a/deps/openssl/openssl/crypto/sha/asm/sha1-ia64.pl
+++ b/deps/openssl/openssl/crypto/sha/asm/sha1-ia64.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2004-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
@@ -14,6 +21,8 @@
# Performance under big-endian OS such as HP-UX is 179MBps*1GHz, which
# is >50% better than HP C and >2x better than gcc.
+$output = pop;
+
$code=<<___;
.ident \"sha1-ia64.s, version 1.3\"
.ident \"IA-64 ISA artwork by Andy Polyakov <appro\@fy.chalmers.se>\"
@@ -301,5 +310,5 @@ $code.=<<___;
stringz "SHA1 block transform for IA64, CRYPTOGAMS by <appro\@openssl.org>"
___
-$output=shift and open STDOUT,">$output";
+open STDOUT,">$output" if $output;
print $code;
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha1-mb-x86_64.pl b/deps/openssl/openssl/crypto/sha/asm/sha1-mb-x86_64.pl
index a8d8708d4b..51c73c05ac 100644
--- a/deps/openssl/openssl/crypto/sha/asm/sha1-mb-x86_64.pl
+++ b/deps/openssl/openssl/crypto/sha/asm/sha1-mb-x86_64.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -19,6 +26,7 @@
# Sandy Bridge (8.16 +5.15=13.3)/n 4.99 5.98 +80%
# Ivy Bridge (8.08 +5.14=13.2)/n 4.60 5.54 +68%
# Haswell(iii) (8.96 +5.00=14.0)/n 3.57 4.55 +160%
+# Skylake (8.70 +5.00=13.7)/n 3.64 4.20 +145%
# Bulldozer (9.76 +5.76=15.5)/n 5.95 6.37 +64%
#
# (i) multi-block CBC encrypt with 128-bit key;
@@ -62,7 +70,7 @@ if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([
$avx = ($2>=3.0) + ($2>3.0);
}
-open OUT,"| \"$^X\" $xlate $flavour $output";
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
*STDOUT=*OUT;
# void sha1_multi_block (
@@ -478,7 +486,7 @@ $code.=<<___;
jnz .Loop_grande
.Ldone:
- mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp
+ mov `$REG_SZ*17`(%rsp),%rax # original %rsp
___
$code.=<<___ if ($win64);
movaps -0xb8(%rax),%xmm6
@@ -538,7 +546,7 @@ $code.=<<___;
movdqa K_XX_XX+0x80(%rip),$BSWAP # byte-n-word swap
.Loop_grande_shaext:
- mov $num,`$REG_SZ*17+8`(%rsp) # orignal $num
+ mov $num,`$REG_SZ*17+8`(%rsp) # original $num
xor $num,$num
___
for($i=0;$i<2;$i++) {
@@ -1116,7 +1124,7 @@ $code.=<<___;
jnz .Loop_grande_avx
.Ldone_avx:
- mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp
+ mov `$REG_SZ*17`(%rsp),%rax # original %rsp
vzeroupper
___
$code.=<<___ if ($win64);
@@ -1271,7 +1279,7 @@ $code.=<<___;
#jnz .Loop_grande_avx2
.Ldone_avx2:
- mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp
+ mov `$REG_SZ*17`(%rsp),%rax # original %rsp
vzeroupper
___
$code.=<<___ if ($win64);
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha1-mips.pl b/deps/openssl/openssl/crypto/sha/asm/sha1-mips.pl
index 3408493899..882f9731cf 100644
--- a/deps/openssl/openssl/crypto/sha/asm/sha1-mips.pl
+++ b/deps/openssl/openssl/crypto/sha/asm/sha1-mips.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
@@ -70,7 +77,7 @@ if ($flavour =~ /64|n32/i) {
$big_endian=(`echo MIPSEL | $ENV{CC} -E -`=~/MIPSEL/)?1:0 if ($ENV{CC});
-for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); }
+for (@ARGV) { $output=$_ if (/\w[\w\-]*\.\w+$/); }
open STDOUT,">$output";
if (!defined($big_endian))
@@ -325,8 +332,8 @@ $code.=<<___ if ($i<79);
___
}
-$FRAMESIZE=16; # large enough to accomodate NUBI saved registers
-$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
+$FRAMESIZE=16; # large enough to accommodate NUBI saved registers
+$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc0fff008" : "0xc0ff0000";
$code=<<___;
#ifdef OPENSSL_FIPSCANISTER
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha1-parisc.pl b/deps/openssl/openssl/crypto/sha/asm/sha1-parisc.pl
index 6e5a328a6f..a85d126ff0 100644
--- a/deps/openssl/openssl/crypto/sha/asm/sha1-parisc.pl
+++ b/deps/openssl/openssl/crypto/sha/asm/sha1-parisc.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha1-ppc.pl b/deps/openssl/openssl/crypto/sha/asm/sha1-ppc.pl
index ab655021cc..add5a9ea5c 100755
--- a/deps/openssl/openssl/crypto/sha/asm/sha1-ppc.pl
+++ b/deps/openssl/openssl/crypto/sha/asm/sha1-ppc.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2006-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
@@ -37,7 +44,7 @@ if ($flavour =~ /64/) {
$PUSH ="stw";
} else { die "nonsense $flavour"; }
-# Define endianess based on flavour
+# Define endianness based on flavour
# i.e.: linux64le
$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha1-s390x.pl b/deps/openssl/openssl/crypto/sha/asm/sha1-s390x.pl
index d5cf1640a1..79df1ffdad 100644
--- a/deps/openssl/openssl/crypto/sha/asm/sha1-s390x.pl
+++ b/deps/openssl/openssl/crypto/sha/asm/sha1-s390x.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
@@ -28,7 +35,8 @@
# instructions and achieve "64-bit" performance even in 31-bit legacy
# application context. The feature is not specific to any particular
# processor, as long as it's "z-CPU". Latter implies that the code
-# remains z/Architecture specific.
+# remains z/Architecture specific. On z990 it was measured to perform
+# 23% better than code generated by gcc 4.3.
$kimdfunc=1; # magic function code for kimd instruction
@@ -42,7 +50,7 @@ if ($flavour =~ /3[12]/) {
$g="g";
}
-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
$K_00_39="%r0"; $K=$K_00_39;
@@ -164,9 +172,6 @@ sha1_block_data_order:
___
$code.=<<___ if ($kimdfunc);
larl %r1,OPENSSL_s390xcap_P
- lg %r0,0(%r1)
- tmhl %r0,0x4000 # check for message-security assist
- jz .Lsoftware
lg %r0,16(%r1) # check kimd capabilities
tmhh %r0,`0x8000>>$kimdfunc`
jz .Lsoftware
@@ -234,7 +239,6 @@ $code.=<<___;
br %r14
.size sha1_block_data_order,.-sha1_block_data_order
.string "SHA1 block transform for s390x, CRYPTOGAMS by <appro\@openssl.org>"
-.comm OPENSSL_s390xcap_P,80,8
___
$code =~ s/\`([^\`]*)\`/eval $1/gem;
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha1-sparcv9.pl b/deps/openssl/openssl/crypto/sha/asm/sha1-sparcv9.pl
index b5efcde5c1..7437ff4f05 100644
--- a/deps/openssl/openssl/crypto/sha/asm/sha1-sparcv9.pl
+++ b/deps/openssl/openssl/crypto/sha/asm/sha1-sparcv9.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
@@ -25,7 +32,7 @@
# single-process result on 8-core processor, or ~9GBps per 2.85GHz
# socket.
-$output=shift;
+$output=pop;
open STDOUT,">$output";
@X=("%o0","%o1","%o2","%o3","%o4","%o5","%g1","%o7");
@@ -368,7 +375,7 @@ ___
# Purpose of these subroutines is to explicitly encode VIS instructions,
# so that one can compile the module without having to specify VIS
-# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
+# extensions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
# Idea is to reserve for option to produce "universal" binary and let
# programmer detect if current CPU is VIS capable at run-time.
sub unvis {
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha1-sparcv9a.pl b/deps/openssl/openssl/crypto/sha/asm/sha1-sparcv9a.pl
index e65291bbd9..f9ed5630e8 100644
--- a/deps/openssl/openssl/crypto/sha/asm/sha1-sparcv9a.pl
+++ b/deps/openssl/openssl/crypto/sha/asm/sha1-sparcv9a.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
@@ -544,7 +551,7 @@ ___
# Purpose of these subroutines is to explicitly encode VIS instructions,
# so that one can compile the module without having to specify VIS
-# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
+# extensions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
# Idea is to reserve for option to produce "universal" binary and let
# programmer detect if current CPU is VIS capable at run-time.
sub unvis {
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha1-thumb.pl b/deps/openssl/openssl/crypto/sha/asm/sha1-thumb.pl
index 7c9ea9b029..661fd9f9ff 100644
--- a/deps/openssl/openssl/crypto/sha/asm/sha1-thumb.pl
+++ b/deps/openssl/openssl/crypto/sha/asm/sha1-thumb.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha1-x86_64.pl b/deps/openssl/openssl/crypto/sha/asm/sha1-x86_64.pl
index 752138b0ea..6a3378ba4c 100755
--- a/deps/openssl/openssl/crypto/sha/asm/sha1-x86_64.pl
+++ b/deps/openssl/openssl/crypto/sha/asm/sha1-x86_64.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2006-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -73,13 +80,16 @@
# Sandy Bridge 7.70 6.10/+26% 4.99/+54%
# Ivy Bridge 6.06 4.67/+30% 4.60/+32%
# Haswell 5.45 4.15/+31% 3.57/+53%
+# Skylake 5.18 4.06/+28% 3.54/+46%
# Bulldozer 9.11 5.95/+53%
# VIA Nano 9.32 7.15/+30%
# Atom 10.3 9.17/+12%
# Silvermont 13.1(*) 9.37/+40%
+# Goldmont 8.13 6.42/+27% 1.70/+380%(**)
#
# (*) obviously suboptimal result, nothing was done about it,
# because SSSE3 code is compiled unconditionally;
+# (**) SHAEXT result
$flavour = shift;
$output = shift;
@@ -114,7 +124,7 @@ if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([
$shaext=1; ### set to zero if compiling for 1.0.1
$avx=1 if (!$shaext && $avx);
-open OUT,"| \"$^X\" $xlate $flavour $output";
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
*STDOUT=*OUT;
$ctx="%rdi"; # 1st arg
@@ -541,7 +551,7 @@ sub AUTOLOAD() # thunk [simplified] 32-bit style perlasm
$code .= "\t$opcode\t".join(',',$arg,reverse @_)."\n";
}
-sub Xupdate_ssse3_16_31() # recall that $Xi starts wtih 4
+sub Xupdate_ssse3_16_31() # recall that $Xi starts with 4
{ use integer;
my $body = shift;
my @insns = (&$body,&$body,&$body,&$body); # 40 instructions
@@ -994,7 +1004,7 @@ $code.=<<___;
jmp .Loop_avx
___
-sub Xupdate_avx_16_31() # recall that $Xi starts wtih 4
+sub Xupdate_avx_16_31() # recall that $Xi starts with 4
{ use integer;
my $body = shift;
my @insns = (&$body,&$body,&$body,&$body); # 40 instructions
@@ -1466,7 +1476,7 @@ sub bodyx_40_59 () { # 10 instructions, 3 cycles critical path
)
}
-sub Xupdate_avx2_16_31() # recall that $Xi starts wtih 4
+sub Xupdate_avx2_16_31() # recall that $Xi starts with 4
{ use integer;
my $body = shift;
my @insns = (&$body,&$body,&$body,&$body,&$body); # 35 instructions
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha256-586.pl b/deps/openssl/openssl/crypto/sha/asm/sha256-586.pl
index e907714381..6af1d84beb 100644
--- a/deps/openssl/openssl/crypto/sha/asm/sha256-586.pl
+++ b/deps/openssl/openssl/crypto/sha/asm/sha256-586.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -63,6 +70,9 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
push(@INC,"${dir}","${dir}../../perlasm");
require "x86asm.pl";
+$output=pop;
+open STDOUT,">$output";
+
&asm_init($ARGV[0],"sha512-586.pl",$ARGV[$#ARGV] eq "386");
$xmm=$avx=0;
@@ -1279,3 +1289,5 @@ sub bodyx_00_15 () { # +10%
&function_end_B("sha256_block_data_order");
&asm_finish();
+
+close STDOUT;
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha256-armv4.pl b/deps/openssl/openssl/crypto/sha/asm/sha256-armv4.pl
index 750216eb42..55d30cba3a 100644
--- a/deps/openssl/openssl/crypto/sha/asm/sha256-armv4.pl
+++ b/deps/openssl/openssl/crypto/sha/asm/sha256-armv4.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -37,8 +44,20 @@
#
# Add ARMv8 code path performing at 2.0 cpb on Apple A7.
-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-open STDOUT,">$output";
+$flavour = shift;
+if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
+else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
+
+if ($flavour && $flavour ne "void") {
+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+ ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+ ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+ die "can't locate arm-xlate.pl";
+
+ open STDOUT,"| \"$^X\" $xlate $flavour $output";
+} else {
+ open STDOUT,">$output";
+}
$ctx="r0"; $t0="r0";
$inp="r1"; $t4="r1";
@@ -73,7 +92,9 @@ $code.=<<___ if ($i<16);
eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
+# ifndef __ARMEB__
rev $t1,$t1
+# endif
#else
@ ldrb $t1,[$inp,#3] @ $i
add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
@@ -161,15 +182,11 @@ $code=<<___;
#endif
.text
-#if __ARM_ARCH__<7
-.code 32
-#else
+#if defined(__thumb2__)
.syntax unified
-# ifdef __thumb2__
.thumb
-# else
+#else
.code 32
-# endif
#endif
.type K256,%object
@@ -195,21 +212,25 @@ K256:
.word 0 @ terminator
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
.LOPENSSL_armcap:
-.word OPENSSL_armcap_P-sha256_block_data_order
+.word OPENSSL_armcap_P-.Lsha256_block_data_order
#endif
.align 5
.global sha256_block_data_order
.type sha256_block_data_order,%function
sha256_block_data_order:
-#if __ARM_ARCH__<7
+.Lsha256_block_data_order:
+#if __ARM_ARCH__<7 && !defined(__thumb2__)
sub r3,pc,#8 @ sha256_block_data_order
#else
- adr r3,.
+ adr r3,.Lsha256_block_data_order
#endif
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
ldr r12,.LOPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap_P
+#ifdef __APPLE__
+ ldr r12,[r12]
+#endif
tst r12,#ARMV8_SHA256
bne .LARMv8
tst r12,#ARMV7_NEON
@@ -454,7 +475,8 @@ $code.=<<___;
.global sha256_block_data_order_neon
.type sha256_block_data_order_neon,%function
-.align 4
+.align 5
+.skip 16
sha256_block_data_order_neon:
.LNEON:
stmdb sp!,{r4-r12,lr}
@@ -580,7 +602,7 @@ my $Ktbl="r3";
$code.=<<___;
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
-# ifdef __thumb2__
+# if defined(__thumb2__)
# define INST(a,b,c,d) .byte c,d|0xc,a,b
# else
# define INST(a,b,c,d) .byte a,b,c,d
@@ -591,14 +613,11 @@ $code.=<<___;
sha256_block_data_order_armv8:
.LARMv8:
vld1.32 {$ABCD,$EFGH},[$ctx]
-# ifdef __thumb2__
- adr $Ktbl,.LARMv8
- sub $Ktbl,$Ktbl,#.LARMv8-K256
-# else
- adrl $Ktbl,K256
-# endif
+ sub $Ktbl,$Ktbl,#256+32
add $len,$inp,$len,lsl#6 @ len to point at the end of inp
+ b .Loop_v8
+.align 4
.Loop_v8:
vld1.8 {@MSG[0]-@MSG[1]},[$inp]!
vld1.8 {@MSG[2]-@MSG[3]},[$inp]!
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha256-c64xplus.pl b/deps/openssl/openssl/crypto/sha/asm/sha256-c64xplus.pl
new file mode 100644
index 0000000000..3ab7d9b689
--- /dev/null
+++ b/deps/openssl/openssl/crypto/sha/asm/sha256-c64xplus.pl
@@ -0,0 +1,320 @@
+#! /usr/bin/env perl
+# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# SHA256 for C64x+.
+#
+# January 2012
+#
+# Performance is just below 10 cycles per processed byte, which is
+# almost 40% faster than compiler-generated code. Unroll is unlikely
+# to give more than ~8% improvement...
+#
+# !!! Note that this module uses AMR, which means that all interrupt
+# service routines are expected to preserve it and for own well-being
+# zero it upon entry.
+
+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
+open STDOUT,">$output";
+
+($CTXA,$INP,$NUM) = ("A4","B4","A6"); # arguments
+ $K256="A3";
+
+($A,$Actx,$B,$Bctx,$C,$Cctx,$D,$Dctx,$T2,$S0,$s1,$t0a,$t1a,$t2a,$X9,$X14)
+ =map("A$_",(16..31));
+($E,$Ectx,$F,$Fctx,$G,$Gctx,$H,$Hctx,$T1,$S1,$s0,$t0e,$t1e,$t2e,$X1,$X15)
+ =map("B$_",(16..31));
+
+($Xia,$Xib)=("A5","B5"); # circular/ring buffer
+ $CTXB=$t2e;
+
+($Xn,$X0,$K)=("B7","B8","B9");
+($Maj,$Ch)=($T2,"B6");
+
+$code.=<<___;
+ .text
+
+ .if .ASSEMBLER_VERSION<7000000
+ .asg 0,__TI_EABI__
+ .endif
+ .if __TI_EABI__
+ .nocmp
+ .asg sha256_block_data_order,_sha256_block_data_order
+ .endif
+
+ .asg B3,RA
+ .asg A15,FP
+ .asg B15,SP
+
+ .if .BIG_ENDIAN
+ .asg SWAP2,MV
+ .asg SWAP4,MV
+ .endif
+
+ .global _sha256_block_data_order
+_sha256_block_data_order:
+__sha256_block:
+ .asmfunc stack_usage(64)
+ MV $NUM,A0 ; reassign $NUM
+|| MVK -64,B0
+ [!A0] BNOP RA ; if ($NUM==0) return;
+|| [A0] STW FP,*SP--[16] ; save frame pointer and alloca(64)
+|| [A0] MV SP,FP
+ [A0] ADDKPC __sha256_block,B2
+|| [A0] AND B0,SP,SP ; align stack at 64 bytes
+ .if __TI_EABI__
+ [A0] MVK 0x00404,B1
+|| [A0] MVKL \$PCR_OFFSET(K256,__sha256_block),$K256
+ [A0] MVKH 0x50000,B1
+|| [A0] MVKH \$PCR_OFFSET(K256,__sha256_block),$K256
+ .else
+ [A0] MVK 0x00404,B1
+|| [A0] MVKL (K256-__sha256_block),$K256
+ [A0] MVKH 0x50000,B1
+|| [A0] MVKH (K256-__sha256_block),$K256
+ .endif
+ [A0] MVC B1,AMR ; setup circular addressing
+|| [A0] MV SP,$Xia
+ [A0] MV SP,$Xib
+|| [A0] ADD B2,$K256,$K256
+|| [A0] MV $CTXA,$CTXB
+|| [A0] SUBAW SP,2,SP ; reserve two words above buffer
+ LDW *${CTXA}[0],$A ; load ctx
+|| LDW *${CTXB}[4],$E
+ LDW *${CTXA}[1],$B
+|| LDW *${CTXB}[5],$F
+ LDW *${CTXA}[2],$C
+|| LDW *${CTXB}[6],$G
+ LDW *${CTXA}[3],$D
+|| LDW *${CTXB}[7],$H
+
+ LDNW *$INP++,$Xn ; pre-fetch input
+ LDW *$K256++,$K ; pre-fetch K256[0]
+ MVK 14,B0 ; loop counters
+ MVK 47,B1
+|| ADDAW $Xia,9,$Xia
+outerloop?:
+ SUB A0,1,A0
+|| MV $A,$Actx
+|| MV $E,$Ectx
+|| MVD $B,$Bctx
+|| MVD $F,$Fctx
+ MV $C,$Cctx
+|| MV $G,$Gctx
+|| MVD $D,$Dctx
+|| MVD $H,$Hctx
+|| SWAP4 $Xn,$X0
+
+ SPLOOPD 8 ; BODY_00_14
+|| MVC B0,ILC
+|| SWAP2 $X0,$X0
+
+ LDNW *$INP++,$Xn
+|| ROTL $A,30,$S0
+|| OR $A,$B,$Maj
+|| AND $A,$B,$t2a
+|| ROTL $E,26,$S1
+|| AND $F,$E,$Ch
+|| ANDN $G,$E,$t2e
+ ROTL $A,19,$t0a
+|| AND $C,$Maj,$Maj
+|| ROTL $E,21,$t0e
+|| XOR $t2e,$Ch,$Ch ; Ch(e,f,g) = (e&f)^(~e&g)
+ ROTL $A,10,$t1a
+|| OR $t2a,$Maj,$Maj ; Maj(a,b,c) = ((a|b)&c)|(a&b)
+|| ROTL $E,7,$t1e
+|| ADD $K,$H,$T1 ; T1 = h + K256[i]
+ ADD $X0,$T1,$T1 ; T1 += X[i];
+|| STW $X0,*$Xib++
+|| XOR $t0a,$S0,$S0
+|| XOR $t0e,$S1,$S1
+ XOR $t1a,$S0,$S0 ; Sigma0(a)
+|| XOR $t1e,$S1,$S1 ; Sigma1(e)
+|| LDW *$K256++,$K ; pre-fetch K256[i+1]
+|| ADD $Ch,$T1,$T1 ; T1 += Ch(e,f,g)
+ ADD $S1,$T1,$T1 ; T1 += Sigma1(e)
+|| ADD $S0,$Maj,$T2 ; T2 = Sigma0(a) + Maj(a,b,c)
+|| ROTL $G,0,$H ; h = g
+|| MV $F,$G ; g = f
+|| MV $X0,$X14
+|| SWAP4 $Xn,$X0
+ SWAP2 $X0,$X0
+|| MV $E,$F ; f = e
+|| ADD $D,$T1,$E ; e = d + T1
+|| MV $C,$D ; d = c
+ MV $B,$C ; c = b
+|| MV $A,$B ; b = a
+|| ADD $T1,$T2,$A ; a = T1 + T2
+ SPKERNEL
+
+ ROTL $A,30,$S0 ; BODY_15
+|| OR $A,$B,$Maj
+|| AND $A,$B,$t2a
+|| ROTL $E,26,$S1
+|| AND $F,$E,$Ch
+|| ANDN $G,$E,$t2e
+|| LDW *${Xib}[1],$Xn ; modulo-scheduled
+ ROTL $A,19,$t0a
+|| AND $C,$Maj,$Maj
+|| ROTL $E,21,$t0e
+|| XOR $t2e,$Ch,$Ch ; Ch(e,f,g) = (e&f)^(~e&g)
+|| LDW *${Xib}[2],$X1 ; modulo-scheduled
+ ROTL $A,10,$t1a
+|| OR $t2a,$Maj,$Maj ; Maj(a,b,c) = ((a|b)&c)|(a&b)
+|| ROTL $E,7,$t1e
+|| ADD $K,$H,$T1 ; T1 = h + K256[i]
+ ADD $X0,$T1,$T1 ; T1 += X[i];
+|| STW $X0,*$Xib++
+|| XOR $t0a,$S0,$S0
+|| XOR $t0e,$S1,$S1
+ XOR $t1a,$S0,$S0 ; Sigma0(a)
+|| XOR $t1e,$S1,$S1 ; Sigma1(e)
+|| LDW *$K256++,$K ; pre-fetch K256[i+1]
+|| ADD $Ch,$T1,$T1 ; T1 += Ch(e,f,g)
+ ADD $S1,$T1,$T1 ; T1 += Sigma1(e)
+|| ADD $S0,$Maj,$T2 ; T2 = Sigma0(a) + Maj(a,b,c)
+|| ROTL $G,0,$H ; h = g
+|| MV $F,$G ; g = f
+|| MV $X0,$X15
+ MV $E,$F ; f = e
+|| ADD $D,$T1,$E ; e = d + T1
+|| MV $C,$D ; d = c
+|| MV $Xn,$X0 ; modulo-scheduled
+|| LDW *$Xia,$X9 ; modulo-scheduled
+|| ROTL $X1,25,$t0e ; modulo-scheduled
+|| ROTL $X14,15,$t0a ; modulo-scheduled
+ SHRU $X1,3,$s0 ; modulo-scheduled
+|| SHRU $X14,10,$s1 ; modulo-scheduled
+|| ROTL $B,0,$C ; c = b
+|| MV $A,$B ; b = a
+|| ADD $T1,$T2,$A ; a = T1 + T2
+
+ SPLOOPD 10 ; BODY_16_63
+|| MVC B1,ILC
+|| ROTL $X1,14,$t1e ; modulo-scheduled
+|| ROTL $X14,13,$t1a ; modulo-scheduled
+
+ XOR $t0e,$s0,$s0
+|| XOR $t0a,$s1,$s1
+|| MV $X15,$X14
+|| MV $X1,$Xn
+ XOR $t1e,$s0,$s0 ; sigma0(X[i+1])
+|| XOR $t1a,$s1,$s1 ; sigma1(X[i+14])
+|| LDW *${Xib}[2],$X1 ; module-scheduled
+ ROTL $A,30,$S0
+|| OR $A,$B,$Maj
+|| AND $A,$B,$t2a
+|| ROTL $E,26,$S1
+|| AND $F,$E,$Ch
+|| ANDN $G,$E,$t2e
+|| ADD $X9,$X0,$X0 ; X[i] += X[i+9]
+ ROTL $A,19,$t0a
+|| AND $C,$Maj,$Maj
+|| ROTL $E,21,$t0e
+|| XOR $t2e,$Ch,$Ch ; Ch(e,f,g) = (e&f)^(~e&g)
+|| ADD $s0,$X0,$X0 ; X[i] += sigma1(X[i+1])
+ ROTL $A,10,$t1a
+|| OR $t2a,$Maj,$Maj ; Maj(a,b,c) = ((a|b)&c)|(a&b)
+|| ROTL $E,7,$t1e
+|| ADD $H,$K,$T1 ; T1 = h + K256[i]
+|| ADD $s1,$X0,$X0 ; X[i] += sigma1(X[i+14])
+ XOR $t0a,$S0,$S0
+|| XOR $t0e,$S1,$S1
+|| ADD $X0,$T1,$T1 ; T1 += X[i]
+|| STW $X0,*$Xib++
+ XOR $t1a,$S0,$S0 ; Sigma0(a)
+|| XOR $t1e,$S1,$S1 ; Sigma1(e)
+|| ADD $Ch,$T1,$T1 ; T1 += Ch(e,f,g)
+|| MV $X0,$X15
+|| ROTL $G,0,$H ; h = g
+|| LDW *$K256++,$K ; pre-fetch K256[i+1]
+ ADD $S1,$T1,$T1 ; T1 += Sigma1(e)
+|| ADD $S0,$Maj,$T2 ; T2 = Sigma0(a) + Maj(a,b,c)
+|| MV $F,$G ; g = f
+|| MV $Xn,$X0 ; modulo-scheduled
+|| LDW *++$Xia,$X9 ; modulo-scheduled
+|| ROTL $X1,25,$t0e ; module-scheduled
+|| ROTL $X14,15,$t0a ; modulo-scheduled
+ ROTL $X1,14,$t1e ; modulo-scheduled
+|| ROTL $X14,13,$t1a ; modulo-scheduled
+|| MV $E,$F ; f = e
+|| ADD $D,$T1,$E ; e = d + T1
+|| MV $C,$D ; d = c
+|| MV $B,$C ; c = b
+ MV $A,$B ; b = a
+|| ADD $T1,$T2,$A ; a = T1 + T2
+|| SHRU $X1,3,$s0 ; modulo-scheduled
+|| SHRU $X14,10,$s1 ; modulo-scheduled
+ SPKERNEL
+
+ [A0] B outerloop?
+|| [A0] LDNW *$INP++,$Xn ; pre-fetch input
+|| [A0] ADDK -260,$K256 ; rewind K256
+|| ADD $Actx,$A,$A ; accumulate ctx
+|| ADD $Ectx,$E,$E
+|| ADD $Bctx,$B,$B
+ ADD $Fctx,$F,$F
+|| ADD $Cctx,$C,$C
+|| ADD $Gctx,$G,$G
+|| ADD $Dctx,$D,$D
+|| ADD $Hctx,$H,$H
+|| [A0] LDW *$K256++,$K ; pre-fetch K256[0]
+
+ [!A0] BNOP RA
+||[!A0] MV $CTXA,$CTXB
+ [!A0] MV FP,SP ; restore stack pointer
+||[!A0] LDW *FP[0],FP ; restore frame pointer
+ [!A0] STW $A,*${CTXA}[0] ; save ctx
+||[!A0] STW $E,*${CTXB}[4]
+||[!A0] MVK 0,B0
+ [!A0] STW $B,*${CTXA}[1]
+||[!A0] STW $F,*${CTXB}[5]
+||[!A0] MVC B0,AMR ; clear AMR
+ STW $C,*${CTXA}[2]
+|| STW $G,*${CTXB}[6]
+ STW $D,*${CTXA}[3]
+|| STW $H,*${CTXB}[7]
+ .endasmfunc
+
+ .if __TI_EABI__
+ .sect ".text:sha_asm.const"
+ .else
+ .sect ".const:sha_asm"
+ .endif
+ .align 128
+K256:
+ .uword 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
+ .uword 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
+ .uword 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
+ .uword 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
+ .uword 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
+ .uword 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
+ .uword 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
+ .uword 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
+ .uword 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
+ .uword 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
+ .uword 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
+ .uword 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
+ .uword 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
+ .uword 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
+ .uword 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
+ .uword 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+ .cstring "SHA256 block transform for C64x+, CRYPTOGAMS by <appro\@openssl.org>"
+ .align 4
+
+___
+
+print $code;
+close STDOUT;
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha256-mb-x86_64.pl b/deps/openssl/openssl/crypto/sha/asm/sha256-mb-x86_64.pl
index 9770286b95..fbcd29f2e8 100644
--- a/deps/openssl/openssl/crypto/sha/asm/sha256-mb-x86_64.pl
+++ b/deps/openssl/openssl/crypto/sha/asm/sha256-mb-x86_64.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -19,6 +26,7 @@
# Sandy Bridge (20.5 +5.15=25.7)/n 11.6 13.0 +103%
# Ivy Bridge (20.4 +5.14=25.5)/n 10.3 11.6 +82%
# Haswell(iii) (21.0 +5.00=26.0)/n 7.80 8.79 +170%
+# Skylake (18.9 +5.00=23.9)/n 7.70 8.17 +170%
# Bulldozer (21.6 +5.76=27.4)/n 13.6 13.7 +100%
#
# (i) multi-block CBC encrypt with 128-bit key;
@@ -63,7 +71,7 @@ if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([
$avx = ($2>=3.0) + ($2>3.0);
}
-open OUT,"| \"$^X\" $xlate $flavour $output";
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
*STDOUT=*OUT;
# void sha256_multi_block (
@@ -382,7 +390,7 @@ $code.=<<___;
jnz .Loop_grande
.Ldone:
- mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp
+ mov `$REG_SZ*17`(%rsp),%rax # original %rsp
___
$code.=<<___ if ($win64);
movaps -0xb8(%rax),%xmm6
@@ -442,7 +450,7 @@ $code.=<<___;
lea K256_shaext+0x80(%rip),$Tbl
.Loop_grande_shaext:
- mov $num,`$REG_SZ*17+8`(%rsp) # orignal $num
+ mov $num,`$REG_SZ*17+8`(%rsp) # original $num
xor $num,$num
___
for($i=0;$i<2;$i++) {
@@ -1061,7 +1069,7 @@ $code.=<<___;
jnz .Loop_grande_avx
.Ldone_avx:
- mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp
+ mov `$REG_SZ*17`(%rsp),%rax # original %rsp
vzeroupper
___
$code.=<<___ if ($win64);
@@ -1237,7 +1245,7 @@ $code.=<<___;
#jnz .Loop_grande_avx2
.Ldone_avx2:
- mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp
+ mov `$REG_SZ*17`(%rsp),%rax # original %rsp
vzeroupper
___
$code.=<<___ if ($win64);
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha512-586.pl b/deps/openssl/openssl/crypto/sha/asm/sha512-586.pl
index 2f6a202c37..3873934b69 100644
--- a/deps/openssl/openssl/crypto/sha/asm/sha512-586.pl
+++ b/deps/openssl/openssl/crypto/sha/asm/sha512-586.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -29,6 +36,7 @@
# VIA Nano 91 - 52 33 14.7
# Atom 126 - 68 48(***) 14.7
# Silvermont 97 - 58 42(***) 17.5
+# Goldmont 80 - 48 19.5 12.0
#
# (*) whichever best applicable.
# (**) x86_64 assembler performance is presented for reference
@@ -50,6 +58,9 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
push(@INC,"${dir}","${dir}../../perlasm");
require "x86asm.pl";
+$output=pop;
+open STDOUT,">$output";
+
&asm_init($ARGV[0],"sha512-586.pl",$ARGV[$#ARGV] eq "386");
$sse2=0;
@@ -909,3 +920,5 @@ sub BODY_00_15_ssse3 { # "phase-less" copy of BODY_00_15_sse2
&asciz("SHA512 block transform for x86, CRYPTOGAMS by <appro\@openssl.org>");
&asm_finish();
+
+close STDOUT;
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha512-armv4.pl b/deps/openssl/openssl/crypto/sha/asm/sha512-armv4.pl
index fb7dc506ac..22b5a9d0b1 100644
--- a/deps/openssl/openssl/crypto/sha/asm/sha512-armv4.pl
+++ b/deps/openssl/openssl/crypto/sha/asm/sha512-armv4.pl
@@ -1,10 +1,19 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
+#
+# Permission to use under GPL terms is granted.
# ====================================================================
# SHA512 block procedure for ARMv4. September 2007.
@@ -34,16 +43,9 @@
# terms it's 22.6 cycles per byte, which is disappointing result.
# Technical writers asserted that 3-way S4 pipeline can sustain
# multiple NEON instructions per cycle, but dual NEON issue could
-# not be observed, and for NEON-only sequences IPC(*) was found to
-# be limited by 1:-( 0.33 and 0.66 were measured for sequences with
-# ILPs(*) of 1 and 2 respectively. This in turn means that you can
-# even find yourself striving, as I did here, for achieving IPC
-# adequate to one delivered by Cortex A8 [for reference, it's
-# 0.5 for ILP of 1, and 1 for higher ILPs].
-#
-# (*) ILP, instruction-level parallelism, how many instructions
-# *can* execute at the same time. IPC, instructions per cycle,
-# indicates how many instructions actually execute.
+# not be observed, see http://www.openssl.org/~appro/Snapdragon-S4.html
+# for further details. On side note Cortex-A15 processes one byte in
+# 16 cycles.
# Byte order [in]dependence. =========================================
#
@@ -55,8 +57,20 @@ $hi="HI";
$lo="LO";
# ====================================================================
-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-open STDOUT,">$output";
+$flavour = shift;
+if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
+else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
+
+if ($flavour && $flavour ne "void") {
+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+ ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+ ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+ die "can't locate arm-xlate.pl";
+
+ open STDOUT,"| \"$^X\" $xlate $flavour $output";
+} else {
+ open STDOUT,">$output";
+}
$ctx="r0"; # parameter block
$inp="r1";
@@ -143,6 +157,9 @@ $code.=<<___;
teq $t0,#$magic
ldr $t3,[sp,#$Coff+0] @ c.lo
+#if __ARM_ARCH__>=7
+ it eq @ Thumb2 thing, sanity check in ARM
+#endif
orreq $Ktbl,$Ktbl,#1
@ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
@ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
@@ -180,7 +197,17 @@ $code.=<<___;
___
}
$code=<<___;
-#include "arm_arch.h"
+#ifndef __KERNEL__
+# include "arm_arch.h"
+# define VFP_ABI_PUSH vstmdb sp!,{d8-d15}
+# define VFP_ABI_POP vldmia sp!,{d8-d15}
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ 7
+# define VFP_ABI_PUSH
+# define VFP_ABI_POP
+#endif
+
#ifdef __ARMEL__
# define LO 0
# define HI 4
@@ -192,7 +219,14 @@ $code=<<___;
#endif
.text
+#if defined(__thumb2__)
+.syntax unified
+.thumb
+# define adrl adr
+#else
.code 32
+#endif
+
.type K512,%object
.align 5
K512:
@@ -237,9 +271,9 @@ WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
.size K512,.-K512
-#if __ARM_MAX_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
.LOPENSSL_armcap:
-.word OPENSSL_armcap_P-sha512_block_data_order
+.word OPENSSL_armcap_P-.Lsha512_block_data_order
.skip 32-4
#else
.skip 32
@@ -248,14 +282,22 @@ WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
.global sha512_block_data_order
.type sha512_block_data_order,%function
sha512_block_data_order:
+.Lsha512_block_data_order:
+#if __ARM_ARCH__<7 && !defined(__thumb2__)
sub r3,pc,#8 @ sha512_block_data_order
- add $len,$inp,$len,lsl#7 @ len to point at the end of inp
-#if __ARM_MAX_ARCH__>=7
+#else
+ adr r3,.Lsha512_block_data_order
+#endif
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
ldr r12,.LOPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap_P
- tst r12,#1
+#ifdef __APPLE__
+ ldr r12,[r12]
+#endif
+ tst r12,#ARMV7_NEON
bne .LNEON
#endif
+ add $len,$inp,$len,lsl#7 @ len to point at the end of inp
stmdb sp!,{r4-r12,lr}
sub $Ktbl,r3,#672 @ K512
sub sp,sp,#9*8
@@ -369,6 +411,9 @@ $code.=<<___;
___
&BODY_00_15(0x17);
$code.=<<___;
+#if __ARM_ARCH__>=7
+ ittt eq @ Thumb2 thing, sanity check in ARM
+#endif
ldreq $t0,[sp,#`$Xoff+8*(16-1)`+0]
ldreq $t1,[sp,#`$Xoff+8*(16-1)`+4]
beq .L16_79
@@ -453,6 +498,7 @@ $code.=<<___;
moveq pc,lr @ be binary compatible with V4, yet
bx lr @ interoperable with Thumb ISA:-)
#endif
+.size sha512_block_data_order,.-sha512_block_data_order
___
{
@@ -559,11 +605,15 @@ $code.=<<___;
.arch armv7-a
.fpu neon
+.global sha512_block_data_order_neon
+.type sha512_block_data_order_neon,%function
.align 4
+sha512_block_data_order_neon:
.LNEON:
dmb @ errata #451034 on early Cortex A8
- vstmdb sp!,{d8-d15} @ ABI specification says so
- sub $Ktbl,r3,#672 @ K512
+ add $len,$inp,$len,lsl#7 @ len to point at the end of inp
+ adr $Ktbl,K512
+ VFP_ABI_PUSH
vldmia $ctx,{$A-$H} @ load context
.Loop_neon:
___
@@ -588,16 +638,16 @@ $code.=<<___;
sub $Ktbl,#640 @ rewind K512
bne .Loop_neon
- vldmia sp!,{d8-d15} @ epilogue
+ VFP_ABI_POP
ret @ bx lr
+.size sha512_block_data_order_neon,.-sha512_block_data_order_neon
#endif
___
}
$code.=<<___;
-.size sha512_block_data_order,.-sha512_block_data_order
.asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
-#if __ARM_MAX_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
.comm OPENSSL_armcap_P,4,4
#endif
___
@@ -605,5 +655,14 @@ ___
$code =~ s/\`([^\`]*)\`/eval $1/gem;
$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
$code =~ s/\bret\b/bx lr/gm;
+
+open SELF,$0;
+while(<SELF>) {
+ next if (/^#!/);
+ last if (!s/^#/@/ and !/^$/);
+ print;
+}
+close SELF;
+
print $code;
close STDOUT; # enforce flush
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha512-armv8.pl b/deps/openssl/openssl/crypto/sha/asm/sha512-armv8.pl
index f7b36b986a..c1aaf778f4 100644
--- a/deps/openssl/openssl/crypto/sha/asm/sha512-armv8.pl
+++ b/deps/openssl/openssl/crypto/sha/asm/sha512-armv8.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -18,6 +25,7 @@
# Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***))
# Denver 2.01 10.5 (+26%) 6.70 (+8%)
# X-Gene 20.0 (+100%) 12.8 (+300%(***))
+# Mongoose 2.36 13.0 (+50%) 8.36 (+33%)
#
# (*) Software SHA256 results are of lesser relevance, presented
# mostly for informational purposes.
@@ -31,7 +39,14 @@
$flavour=shift;
$output=shift;
-open STDOUT,">$output";
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+die "can't locate arm-xlate.pl";
+
+open OUT,"| \"$^X\" $xlate $flavour $output";
+*STDOUT=*OUT;
if ($output =~ /512/) {
$BITS=512;
@@ -155,13 +170,18 @@ $code.=<<___;
.text
+.extern OPENSSL_armcap_P
.globl $func
.type $func,%function
.align 6
$func:
___
$code.=<<___ if ($SZ==4);
+#ifdef __ILP32__
+ ldrsw x16,.LOPENSSL_armcap_P
+#else
ldr x16,.LOPENSSL_armcap_P
+#endif
adr x17,.LOPENSSL_armcap_P
add x16,x16,x17
ldr w16,[x16]
@@ -184,7 +204,7 @@ $code.=<<___;
ldp $E,$F,[$ctx,#4*$SZ]
add $num,$inp,$num,lsl#`log(16*$SZ)/log(2)` // end of input
ldp $G,$H,[$ctx,#6*$SZ]
- adr $Ktbl,K$BITS
+ adr $Ktbl,.LK$BITS
stp $ctx,$num,[x29,#96]
.Loop:
@@ -234,8 +254,8 @@ $code.=<<___;
.size $func,.-$func
.align 6
-.type K$BITS,%object
-K$BITS:
+.type .LK$BITS,%object
+.LK$BITS:
___
$code.=<<___ if ($SZ==8);
.quad 0x428a2f98d728ae22,0x7137449123ef65cd
@@ -300,10 +320,14 @@ $code.=<<___ if ($SZ==4);
.long 0 //terminator
___
$code.=<<___;
-.size K$BITS,.-K$BITS
+.size .LK$BITS,.-.LK$BITS
.align 3
.LOPENSSL_armcap_P:
+#ifdef __ILP32__
+ .long OPENSSL_armcap_P-.
+#else
.quad OPENSSL_armcap_P-.
+#endif
.asciz "SHA$BITS block transform for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
___
@@ -325,7 +349,7 @@ sha256_block_armv8:
add x29,sp,#0
ld1.32 {$ABCD,$EFGH},[$ctx]
- adr $Ktbl,K256
+ adr $Ktbl,.LK256
.Loop_hw:
ld1 {@MSG[0]-@MSG[3]},[$inp],#64
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha512-c64xplus.pl b/deps/openssl/openssl/crypto/sha/asm/sha512-c64xplus.pl
new file mode 100644
index 0000000000..9ebfc92e23
--- /dev/null
+++ b/deps/openssl/openssl/crypto/sha/asm/sha512-c64xplus.pl
@@ -0,0 +1,438 @@
+#! /usr/bin/env perl
+# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# SHA512 for C64x+.
+#
+# January 2012
+#
+# Performance is 19 cycles per processed byte. Compared to block
+# transform function from sha512.c compiled with cl6x with -mv6400+
+# -o2 -DOPENSSL_SMALL_FOOTPRINT it's almost 7x faster and 2x smaller.
+# Loop unroll won't make it, this implementation, any faster, because
+# it's effectively dominated by SHRU||SHL pairs and you can't schedule
+# more of them.
+#
+# !!! Note that this module uses AMR, which means that all interrupt
+# service routines are expected to preserve it and for own well-being
+# zero it upon entry.
+
+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
+open STDOUT,">$output";
+
+($CTXA,$INP,$NUM) = ("A4","B4","A6"); # arguments
+ $K512="A3";
+
+($Ahi,$Actxhi,$Bhi,$Bctxhi,$Chi,$Cctxhi,$Dhi,$Dctxhi,
+ $Ehi,$Ectxhi,$Fhi,$Fctxhi,$Ghi,$Gctxhi,$Hhi,$Hctxhi)=map("A$_",(16..31));
+($Alo,$Actxlo,$Blo,$Bctxlo,$Clo,$Cctxlo,$Dlo,$Dctxlo,
+ $Elo,$Ectxlo,$Flo,$Fctxlo,$Glo,$Gctxlo,$Hlo,$Hctxlo)=map("B$_",(16..31));
+
+($S1hi,$CHhi,$S0hi,$t0hi)=map("A$_",(10..13));
+($S1lo,$CHlo,$S0lo,$t0lo)=map("B$_",(10..13));
+($T1hi, $T2hi)= ("A6","A7");
+($T1lo,$T1carry,$T2lo,$T2carry)=("B6","B7","B8","B9");
+($Khi,$Klo)=("A9","A8");
+($MAJhi,$MAJlo)=($T2hi,$T2lo);
+($t1hi,$t1lo)=($Khi,"B2");
+ $CTXB=$t1lo;
+
+($Xihi,$Xilo)=("A5","B5"); # circular/ring buffer
+
+$code.=<<___;
+ .text
+
+ .if .ASSEMBLER_VERSION<7000000
+ .asg 0,__TI_EABI__
+ .endif
+ .if __TI_EABI__
+ .nocmp
+ .asg sha512_block_data_order,_sha512_block_data_order
+ .endif
+
+ .asg B3,RA
+ .asg A15,FP
+ .asg B15,SP
+
+ .if .BIG_ENDIAN
+ .asg $Khi,KHI
+ .asg $Klo,KLO
+ .else
+ .asg $Khi,KLO
+ .asg $Klo,KHI
+ .endif
+
+ .global _sha512_block_data_order
+_sha512_block_data_order:
+__sha512_block:
+ .asmfunc stack_usage(40+128)
+ MV $NUM,A0 ; reassign $NUM
+|| MVK -128,B0
+ [!A0] BNOP RA ; if ($NUM==0) return;
+|| [A0] STW FP,*SP--(40) ; save frame pointer
+|| [A0] MV SP,FP
+ [A0] STDW B13:B12,*SP[4]
+|| [A0] MVK 0x00404,B1
+ [A0] STDW B11:B10,*SP[3]
+|| [A0] STDW A13:A12,*FP[-3]
+|| [A0] MVKH 0x60000,B1
+ [A0] STDW A11:A10,*SP[1]
+|| [A0] MVC B1,AMR ; setup circular addressing
+|| [A0] ADD B0,SP,SP ; alloca(128)
+ .if __TI_EABI__
+ [A0] AND B0,SP,SP ; align stack at 128 bytes
+|| [A0] ADDKPC __sha512_block,B1
+|| [A0] MVKL \$PCR_OFFSET(K512,__sha512_block),$K512
+ [A0] MVKH \$PCR_OFFSET(K512,__sha512_block),$K512
+|| [A0] SUBAW SP,2,SP ; reserve two words above buffer
+ .else
+ [A0] AND B0,SP,SP ; align stack at 128 bytes
+|| [A0] ADDKPC __sha512_block,B1
+|| [A0] MVKL (K512-__sha512_block),$K512
+ [A0] MVKH (K512-__sha512_block),$K512
+|| [A0] SUBAW SP,2,SP ; reserve two words above buffer
+ .endif
+ ADDAW SP,3,$Xilo
+ ADDAW SP,2,$Xihi
+
+|| MV $CTXA,$CTXB
+ LDW *${CTXA}[0^.LITTLE_ENDIAN],$Ahi ; load ctx
+|| LDW *${CTXB}[1^.LITTLE_ENDIAN],$Alo
+|| ADD B1,$K512,$K512
+ LDW *${CTXA}[2^.LITTLE_ENDIAN],$Bhi
+|| LDW *${CTXB}[3^.LITTLE_ENDIAN],$Blo
+ LDW *${CTXA}[4^.LITTLE_ENDIAN],$Chi
+|| LDW *${CTXB}[5^.LITTLE_ENDIAN],$Clo
+ LDW *${CTXA}[6^.LITTLE_ENDIAN],$Dhi
+|| LDW *${CTXB}[7^.LITTLE_ENDIAN],$Dlo
+ LDW *${CTXA}[8^.LITTLE_ENDIAN],$Ehi
+|| LDW *${CTXB}[9^.LITTLE_ENDIAN],$Elo
+ LDW *${CTXA}[10^.LITTLE_ENDIAN],$Fhi
+|| LDW *${CTXB}[11^.LITTLE_ENDIAN],$Flo
+ LDW *${CTXA}[12^.LITTLE_ENDIAN],$Ghi
+|| LDW *${CTXB}[13^.LITTLE_ENDIAN],$Glo
+ LDW *${CTXA}[14^.LITTLE_ENDIAN],$Hhi
+|| LDW *${CTXB}[15^.LITTLE_ENDIAN],$Hlo
+
+ LDNDW *$INP++,B11:B10 ; pre-fetch input
+ LDDW *$K512++,$Khi:$Klo ; pre-fetch K512[0]
+outerloop?:
+ MVK 15,B0 ; loop counters
+|| MVK 64,B1
+|| SUB A0,1,A0
+ MV $Ahi,$Actxhi
+|| MV $Alo,$Actxlo
+|| MV $Bhi,$Bctxhi
+|| MV $Blo,$Bctxlo
+|| MV $Chi,$Cctxhi
+|| MV $Clo,$Cctxlo
+|| MVD $Dhi,$Dctxhi
+|| MVD $Dlo,$Dctxlo
+ MV $Ehi,$Ectxhi
+|| MV $Elo,$Ectxlo
+|| MV $Fhi,$Fctxhi
+|| MV $Flo,$Fctxlo
+|| MV $Ghi,$Gctxhi
+|| MV $Glo,$Gctxlo
+|| MVD $Hhi,$Hctxhi
+|| MVD $Hlo,$Hctxlo
+loop0_15?:
+ .if .BIG_ENDIAN
+ MV B11,$T1hi
+|| MV B10,$T1lo
+ .else
+ SWAP4 B10,$T1hi
+|| SWAP4 B11,$T1lo
+ SWAP2 $T1hi,$T1hi
+|| SWAP2 $T1lo,$T1lo
+ .endif
+loop16_79?:
+ STW $T1hi,*$Xihi++[2]
+|| STW $T1lo,*$Xilo++[2] ; X[i] = T1
+|| ADD $Hhi,$T1hi,$T1hi
+|| ADDU $Hlo,$T1lo,$T1carry:$T1lo ; T1 += h
+|| SHRU $Ehi,14,$S1hi
+|| SHL $Ehi,32-14,$S1lo
+ XOR $Fhi,$Ghi,$CHhi
+|| XOR $Flo,$Glo,$CHlo
+|| ADD KHI,$T1hi,$T1hi
+|| ADDU KLO,$T1carry:$T1lo,$T1carry:$T1lo ; T1 += K512[i]
+|| SHRU $Elo,14,$t0lo
+|| SHL $Elo,32-14,$t0hi
+ XOR $t0hi,$S1hi,$S1hi
+|| XOR $t0lo,$S1lo,$S1lo
+|| AND $Ehi,$CHhi,$CHhi
+|| AND $Elo,$CHlo,$CHlo
+|| ROTL $Ghi,0,$Hhi
+|| ROTL $Glo,0,$Hlo ; h = g
+|| SHRU $Ehi,18,$t0hi
+|| SHL $Ehi,32-18,$t0lo
+ XOR $t0hi,$S1hi,$S1hi
+|| XOR $t0lo,$S1lo,$S1lo
+|| XOR $Ghi,$CHhi,$CHhi
+|| XOR $Glo,$CHlo,$CHlo ; Ch(e,f,g) = ((f^g)&e)^g
+|| ROTL $Fhi,0,$Ghi
+|| ROTL $Flo,0,$Glo ; g = f
+|| SHRU $Elo,18,$t0lo
+|| SHL $Elo,32-18,$t0hi
+ XOR $t0hi,$S1hi,$S1hi
+|| XOR $t0lo,$S1lo,$S1lo
+|| OR $Ahi,$Bhi,$MAJhi
+|| OR $Alo,$Blo,$MAJlo
+|| ROTL $Ehi,0,$Fhi
+|| ROTL $Elo,0,$Flo ; f = e
+|| SHRU $Ehi,41-32,$t0lo
+|| SHL $Ehi,64-41,$t0hi
+ XOR $t0hi,$S1hi,$S1hi
+|| XOR $t0lo,$S1lo,$S1lo
+|| AND $Chi,$MAJhi,$MAJhi
+|| AND $Clo,$MAJlo,$MAJlo
+|| ROTL $Dhi,0,$Ehi
+|| ROTL $Dlo,0,$Elo ; e = d
+|| SHRU $Elo,41-32,$t0hi
+|| SHL $Elo,64-41,$t0lo
+ XOR $t0hi,$S1hi,$S1hi
+|| XOR $t0lo,$S1lo,$S1lo ; Sigma1(e)
+|| AND $Ahi,$Bhi,$t1hi
+|| AND $Alo,$Blo,$t1lo
+|| ROTL $Chi,0,$Dhi
+|| ROTL $Clo,0,$Dlo ; d = c
+|| SHRU $Ahi,28,$S0hi
+|| SHL $Ahi,32-28,$S0lo
+ OR $t1hi,$MAJhi,$MAJhi
+|| OR $t1lo,$MAJlo,$MAJlo ; Maj(a,b,c) = ((a|b)&c)|(a&b)
+|| ADD $CHhi,$T1hi,$T1hi
+|| ADDU $CHlo,$T1carry:$T1lo,$T1carry:$T1lo ; T1 += Ch(e,f,g)
+|| ROTL $Bhi,0,$Chi
+|| ROTL $Blo,0,$Clo ; c = b
+|| SHRU $Alo,28,$t0lo
+|| SHL $Alo,32-28,$t0hi
+ XOR $t0hi,$S0hi,$S0hi
+|| XOR $t0lo,$S0lo,$S0lo
+|| ADD $S1hi,$T1hi,$T1hi
+|| ADDU $S1lo,$T1carry:$T1lo,$T1carry:$T1lo ; T1 += Sigma1(e)
+|| ROTL $Ahi,0,$Bhi
+|| ROTL $Alo,0,$Blo ; b = a
+|| SHRU $Ahi,34-32,$t0lo
+|| SHL $Ahi,64-34,$t0hi
+ XOR $t0hi,$S0hi,$S0hi
+|| XOR $t0lo,$S0lo,$S0lo
+|| ADD $MAJhi,$T1hi,$T2hi
+|| ADDU $MAJlo,$T1carry:$T1lo,$T2carry:$T2lo ; T2 = T1+Maj(a,b,c)
+|| SHRU $Alo,34-32,$t0hi
+|| SHL $Alo,64-34,$t0lo
+ XOR $t0hi,$S0hi,$S0hi
+|| XOR $t0lo,$S0lo,$S0lo
+|| ADD $Ehi,$T1hi,$T1hi
+|| ADDU $Elo,$T1carry:$T1lo,$T1carry:$T1lo ; T1 += e
+|| [B0] BNOP loop0_15?
+|| SHRU $Ahi,39-32,$t0lo
+|| SHL $Ahi,64-39,$t0hi
+ XOR $t0hi,$S0hi,$S0hi
+|| XOR $t0lo,$S0lo,$S0lo
+|| [B0] LDNDW *$INP++,B11:B10 ; pre-fetch input
+||[!B1] BNOP break?
+|| SHRU $Alo,39-32,$t0hi
+|| SHL $Alo,64-39,$t0lo
+ XOR $t0hi,$S0hi,$S0hi
+|| XOR $t0lo,$S0lo,$S0lo ; Sigma0(a)
+|| ADD $T1carry,$T1hi,$Ehi
+|| MV $T1lo,$Elo ; e = T1
+||[!B0] LDW *${Xihi}[28],$T1hi
+||[!B0] LDW *${Xilo}[28],$T1lo ; X[i+14]
+ ADD $S0hi,$T2hi,$T2hi
+|| ADDU $S0lo,$T2carry:$T2lo,$T2carry:$T2lo ; T2 += Sigma0(a)
+|| [B1] LDDW *$K512++,$Khi:$Klo ; pre-fetch K512[i]
+ NOP ; avoid cross-path stall
+ ADD $T2carry,$T2hi,$Ahi
+|| MV $T2lo,$Alo ; a = T2
+|| [B0] SUB B0,1,B0
+;;===== branch to loop00_15? is taken here
+ NOP
+;;===== branch to break? is taken here
+ LDW *${Xihi}[2],$T2hi
+|| LDW *${Xilo}[2],$T2lo ; X[i+1]
+|| SHRU $T1hi,19,$S1hi
+|| SHL $T1hi,32-19,$S1lo
+ SHRU $T1lo,19,$t0lo
+|| SHL $T1lo,32-19,$t0hi
+ XOR $t0hi,$S1hi,$S1hi
+|| XOR $t0lo,$S1lo,$S1lo
+|| SHRU $T1hi,61-32,$t0lo
+|| SHL $T1hi,64-61,$t0hi
+ XOR $t0hi,$S1hi,$S1hi
+|| XOR $t0lo,$S1lo,$S1lo
+|| SHRU $T1lo,61-32,$t0hi
+|| SHL $T1lo,64-61,$t0lo
+ XOR $t0hi,$S1hi,$S1hi
+|| XOR $t0lo,$S1lo,$S1lo
+|| SHRU $T1hi,6,$t0hi
+|| SHL $T1hi,32-6,$t0lo
+ XOR $t0hi,$S1hi,$S1hi
+|| XOR $t0lo,$S1lo,$S1lo
+|| SHRU $T1lo,6,$t0lo
+|| LDW *${Xihi}[18],$T1hi
+|| LDW *${Xilo}[18],$T1lo ; X[i+9]
+ XOR $t0lo,$S1lo,$S1lo ; sigma1(Xi[i+14])
+
+|| LDW *${Xihi}[0],$CHhi
+|| LDW *${Xilo}[0],$CHlo ; X[i]
+|| SHRU $T2hi,1,$S0hi
+|| SHL $T2hi,32-1,$S0lo
+ SHRU $T2lo,1,$t0lo
+|| SHL $T2lo,32-1,$t0hi
+ XOR $t0hi,$S0hi,$S0hi
+|| XOR $t0lo,$S0lo,$S0lo
+|| SHRU $T2hi,8,$t0hi
+|| SHL $T2hi,32-8,$t0lo
+ XOR $t0hi,$S0hi,$S0hi
+|| XOR $t0lo,$S0lo,$S0lo
+|| SHRU $T2lo,8,$t0lo
+|| SHL $T2lo,32-8,$t0hi
+ XOR $t0hi,$S0hi,$S0hi
+|| XOR $t0lo,$S0lo,$S0lo
+|| ADD $S1hi,$T1hi,$T1hi
+|| ADDU $S1lo,$T1lo,$T1carry:$T1lo ; T1 = X[i+9]+sigma1()
+|| [B1] BNOP loop16_79?
+|| SHRU $T2hi,7,$t0hi
+|| SHL $T2hi,32-7,$t0lo
+ XOR $t0hi,$S0hi,$S0hi
+|| XOR $t0lo,$S0lo,$S0lo
+|| ADD $CHhi,$T1hi,$T1hi
+|| ADDU $CHlo,$T1carry:$T1lo,$T1carry:$T1lo ; T1 += X[i]
+|| SHRU $T2lo,7,$t0lo
+ XOR $t0lo,$S0lo,$S0lo ; sigma0(Xi[i+1]
+
+ ADD $S0hi,$T1hi,$T1hi
+|| ADDU $S0lo,$T1carry:$T1lo,$T1carry:$T1lo ; T1 += sigma0()
+|| [B1] SUB B1,1,B1
+ NOP ; avoid cross-path stall
+ ADD $T1carry,$T1hi,$T1hi
+;;===== branch to loop16_79? is taken here
+
+break?:
+ ADD $Ahi,$Actxhi,$Ahi ; accumulate ctx
+|| ADDU $Alo,$Actxlo,$Actxlo:$Alo
+|| [A0] LDNDW *$INP++,B11:B10 ; pre-fetch input
+|| [A0] ADDK -640,$K512 ; rewind pointer to K512
+ ADD $Bhi,$Bctxhi,$Bhi
+|| ADDU $Blo,$Bctxlo,$Bctxlo:$Blo
+|| [A0] LDDW *$K512++,$Khi:$Klo ; pre-fetch K512[0]
+ ADD $Chi,$Cctxhi,$Chi
+|| ADDU $Clo,$Cctxlo,$Cctxlo:$Clo
+|| ADD $Actxlo,$Ahi,$Ahi
+||[!A0] MV $CTXA,$CTXB
+ ADD $Dhi,$Dctxhi,$Dhi
+|| ADDU $Dlo,$Dctxlo,$Dctxlo:$Dlo
+|| ADD $Bctxlo,$Bhi,$Bhi
+||[!A0] STW $Ahi,*${CTXA}[0^.LITTLE_ENDIAN] ; save ctx
+||[!A0] STW $Alo,*${CTXB}[1^.LITTLE_ENDIAN]
+ ADD $Ehi,$Ectxhi,$Ehi
+|| ADDU $Elo,$Ectxlo,$Ectxlo:$Elo
+|| ADD $Cctxlo,$Chi,$Chi
+|| [A0] BNOP outerloop?
+||[!A0] STW $Bhi,*${CTXA}[2^.LITTLE_ENDIAN]
+||[!A0] STW $Blo,*${CTXB}[3^.LITTLE_ENDIAN]
+ ADD $Fhi,$Fctxhi,$Fhi
+|| ADDU $Flo,$Fctxlo,$Fctxlo:$Flo
+|| ADD $Dctxlo,$Dhi,$Dhi
+||[!A0] STW $Chi,*${CTXA}[4^.LITTLE_ENDIAN]
+||[!A0] STW $Clo,*${CTXB}[5^.LITTLE_ENDIAN]
+ ADD $Ghi,$Gctxhi,$Ghi
+|| ADDU $Glo,$Gctxlo,$Gctxlo:$Glo
+|| ADD $Ectxlo,$Ehi,$Ehi
+||[!A0] STW $Dhi,*${CTXA}[6^.LITTLE_ENDIAN]
+||[!A0] STW $Dlo,*${CTXB}[7^.LITTLE_ENDIAN]
+ ADD $Hhi,$Hctxhi,$Hhi
+|| ADDU $Hlo,$Hctxlo,$Hctxlo:$Hlo
+|| ADD $Fctxlo,$Fhi,$Fhi
+||[!A0] STW $Ehi,*${CTXA}[8^.LITTLE_ENDIAN]
+||[!A0] STW $Elo,*${CTXB}[9^.LITTLE_ENDIAN]
+ ADD $Gctxlo,$Ghi,$Ghi
+||[!A0] STW $Fhi,*${CTXA}[10^.LITTLE_ENDIAN]
+||[!A0] STW $Flo,*${CTXB}[11^.LITTLE_ENDIAN]
+ ADD $Hctxlo,$Hhi,$Hhi
+||[!A0] STW $Ghi,*${CTXA}[12^.LITTLE_ENDIAN]
+||[!A0] STW $Glo,*${CTXB}[13^.LITTLE_ENDIAN]
+;;===== branch to outerloop? is taken here
+
+ STW $Hhi,*${CTXA}[14^.LITTLE_ENDIAN]
+|| STW $Hlo,*${CTXB}[15^.LITTLE_ENDIAN]
+|| MVK -40,B0
+ ADD FP,B0,SP ; destroy circular buffer
+|| LDDW *FP[-4],A11:A10
+ LDDW *SP[2],A13:A12
+|| LDDW *FP[-2],B11:B10
+ LDDW *SP[4],B13:B12
+|| BNOP RA
+ LDW *++SP(40),FP ; restore frame pointer
+ MVK 0,B0
+ MVC B0,AMR ; clear AMR
+ NOP 2 ; wait till FP is committed
+ .endasmfunc
+
+ .if __TI_EABI__
+ .sect ".text:sha_asm.const"
+ .else
+ .sect ".const:sha_asm"
+ .endif
+ .align 128
+K512:
+ .uword 0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd
+ .uword 0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc
+ .uword 0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019
+ .uword 0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118
+ .uword 0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe
+ .uword 0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2
+ .uword 0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1
+ .uword 0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694
+ .uword 0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3
+ .uword 0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65
+ .uword 0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483
+ .uword 0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5
+ .uword 0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210
+ .uword 0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4
+ .uword 0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725
+ .uword 0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70
+ .uword 0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926
+ .uword 0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df
+ .uword 0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8
+ .uword 0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b
+ .uword 0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001
+ .uword 0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30
+ .uword 0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910
+ .uword 0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8
+ .uword 0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53
+ .uword 0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8
+ .uword 0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb
+ .uword 0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3
+ .uword 0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60
+ .uword 0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec
+ .uword 0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9
+ .uword 0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b
+ .uword 0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207
+ .uword 0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178
+ .uword 0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6
+ .uword 0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b
+ .uword 0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493
+ .uword 0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c
+ .uword 0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a
+ .uword 0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817
+ .cstring "SHA512 block transform for C64x+, CRYPTOGAMS by <appro\@openssl.org>"
+ .align 4
+___
+
+print $code;
+close STDOUT;
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha512-ia64.pl b/deps/openssl/openssl/crypto/sha/asm/sha512-ia64.pl
index 59f889a095..356a46aced 100755
--- a/deps/openssl/openssl/crypto/sha/asm/sha512-ia64.pl
+++ b/deps/openssl/openssl/crypto/sha/asm/sha512-ia64.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2004-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -68,7 +75,7 @@
# To generate code, pass the file name with either 256 or 512 in its
# name and compiler flags.
-$output=shift;
+$output=pop;
if ($output =~ /512.*\.[s|asm]/) {
$SZ=8;
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha512-mips.pl b/deps/openssl/openssl/crypto/sha/asm/sha512-mips.pl
index b468cfb456..5c2d23faaf 100644
--- a/deps/openssl/openssl/crypto/sha/asm/sha512-mips.pl
+++ b/deps/openssl/openssl/crypto/sha/asm/sha512-mips.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -52,6 +59,7 @@
$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
if ($flavour =~ /64|n32/i) {
+ $PTR_LA="dla";
$PTR_ADD="dadd"; # incidentally works even on n32
$PTR_SUB="dsub"; # incidentally works even on n32
$REG_S="sd";
@@ -59,6 +67,7 @@ if ($flavour =~ /64|n32/i) {
$PTR_SLL="dsll"; # incidentally works even on n32
$SZREG=8;
} else {
+ $PTR_LA="la";
$PTR_ADD="add";
$PTR_SUB="sub";
$REG_S="sw";
@@ -74,7 +83,7 @@ $pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
$big_endian=(`echo MIPSEL | $ENV{CC} -E -`=~/MIPSEL/)?1:0 if ($ENV{CC});
-for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); }
+for (@ARGV) { $output=$_ if (/\w[\w\-]*\.\w+$/); }
open STDOUT,">$output";
if (!defined($big_endian)) { $big_endian=(unpack('L',pack('N',1))==1); }
@@ -286,7 +295,7 @@ ___
}
$FRAMESIZE=16*$SZ+16*$SZREG;
-$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
+$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc0fff008" : "0xc0ff0000";
$code.=<<___;
#ifdef OPENSSL_FIPSCANISTER
@@ -343,7 +352,7 @@ $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
___
$code.=<<___;
.set reorder
- la $Ktbl,K${label} # PIC-ified 'load address'
+ $PTR_LA $Ktbl,K${label} # PIC-ified 'load address'
$LD $A,0*$SZ($ctx) # load context
$LD $B,1*$SZ($ctx)
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha512-parisc.pl b/deps/openssl/openssl/crypto/sha/asm/sha512-parisc.pl
index 6cad72e255..fcb6157902 100755
--- a/deps/openssl/openssl/crypto/sha/asm/sha512-parisc.pl
+++ b/deps/openssl/openssl/crypto/sha/asm/sha512-parisc.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha512-ppc.pl b/deps/openssl/openssl/crypto/sha/asm/sha512-ppc.pl
index 17fdc6e8e5..fe95b01509 100755
--- a/deps/openssl/openssl/crypto/sha/asm/sha512-ppc.pl
+++ b/deps/openssl/openssl/crypto/sha/asm/sha512-ppc.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2006-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha512-s390x.pl b/deps/openssl/openssl/crypto/sha/asm/sha512-s390x.pl
index 9c10e4e9ee..427d6f8252 100644
--- a/deps/openssl/openssl/crypto/sha/asm/sha512-s390x.pl
+++ b/deps/openssl/openssl/crypto/sha/asm/sha512-s390x.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
@@ -33,7 +40,7 @@
# instructions and achieve "64-bit" performance even in 31-bit legacy
# application context. The feature is not specific to any particular
# processor, as long as it's "z-CPU". Latter implies that the code
-# remains z/Architecture specific. On z900 SHA256 was measured to
+# remains z/Architecture specific. On z990 SHA256 was measured to
# perform 2.4x and SHA512 - 13x better than code generated by gcc 4.3.
$flavour = shift;
@@ -64,7 +71,7 @@ $tbl="%r13";
$T1="%r14";
$sp="%r15";
-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
if ($output =~ /512/) {
@@ -237,9 +244,6 @@ $Func:
___
$code.=<<___ if ($kimdfunc);
larl %r1,OPENSSL_s390xcap_P
- lg %r0,0(%r1)
- tmhl %r0,0x4000 # check for message-security assist
- jz .Lsoftware
lg %r0,16(%r1) # check kimd capabilities
tmhh %r0,`0x8000>>$kimdfunc`
jz .Lsoftware
@@ -308,7 +312,6 @@ $code.=<<___;
br %r14
.size $Func,.-$Func
.string "SHA${label} block transform for s390x, CRYPTOGAMS by <appro\@openssl.org>"
-.comm OPENSSL_s390xcap_P,80,8
___
$code =~ s/\`([^\`]*)\`/eval $1/gem;
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha512-sparcv9.pl b/deps/openssl/openssl/crypto/sha/asm/sha512-sparcv9.pl
index 5a9c15d1d3..4a1ce5fe3e 100644
--- a/deps/openssl/openssl/crypto/sha/asm/sha512-sparcv9.pl
+++ b/deps/openssl/openssl/crypto/sha/asm/sha512-sparcv9.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
@@ -49,7 +56,7 @@
# saturates at 11.5x single-process result on 8-core processor, or
# ~11/16GBps per 2.85GHz socket.
-$output=shift;
+$output=pop;
open STDOUT,">$output";
if ($output =~ /512/) {
@@ -791,7 +798,7 @@ ___
# Purpose of these subroutines is to explicitly encode VIS instructions,
# so that one can compile the module without having to specify VIS
-# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
+# extensions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
# Idea is to reserve for option to produce "universal" binary and let
# programmer detect if current CPU is VIS capable at run-time.
sub unvis {
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha512-x86_64.pl b/deps/openssl/openssl/crypto/sha/asm/sha512-x86_64.pl
index 78e445f3fe..c9b7b28123 100755
--- a/deps/openssl/openssl/crypto/sha/asm/sha512-x86_64.pl
+++ b/deps/openssl/openssl/crypto/sha/asm/sha512-x86_64.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -34,7 +41,7 @@
# level parallelism, on a given CPU implementation in this case.
#
# Special note on Intel EM64T. While Opteron CPU exhibits perfect
-# perfromance ratio of 1.5 between 64- and 32-bit flavors [see above],
+# performance ratio of 1.5 between 64- and 32-bit flavors [see above],
# [currently available] EM64T CPUs apparently are far from it. On the
# contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit
# sha256_block:-( This is presumably because 64-bit shifts/rotates
@@ -86,12 +93,14 @@
# Sandy Bridge 17.4 14.2(+23%) 11.6(+50%(**)) 11.2 8.10(+38%(**))
# Ivy Bridge 12.6 10.5(+20%) 10.3(+22%) 8.17 7.22(+13%)
# Haswell 12.2 9.28(+31%) 7.80(+56%) 7.66 5.40(+42%)
+# Skylake 11.4 9.03(+26%) 7.70(+48%) 7.25 5.20(+40%)
# Bulldozer 21.1 13.6(+54%) 13.6(+54%(***)) 13.5 8.58(+57%)
# VIA Nano 23.0 16.5(+39%) - 14.7 -
# Atom 23.0 18.9(+22%) - 14.7 -
# Silvermont 27.4 20.6(+33%) - 17.5 -
+# Goldmont 18.9 14.3(+32%) 4.16(+350%) 12.0 -
#
-# (*) whichever best applicable;
+# (*) whichever best applicable, including SHAEXT;
# (**) switch from ror to shrd stands for fair share of improvement;
# (***) execution time is fully determined by remaining integer-only
# part, body_00_15; reducing the amount of SIMD instructions
@@ -131,7 +140,7 @@ if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([
$shaext=1; ### set to zero if compiling for 1.0.1
$avx=1 if (!$shaext && $avx);
-open OUT,"| \"$^X\" $xlate $flavour $output";
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
*STDOUT=*OUT;
if ($output =~ /512/) {
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha512p8-ppc.pl b/deps/openssl/openssl/crypto/sha/asm/sha512p8-ppc.pl
index 47189502c6..4d3d3b2f8c 100755
--- a/deps/openssl/openssl/crypto/sha/asm/sha512p8-ppc.pl
+++ b/deps/openssl/openssl/crypto/sha/asm/sha512p8-ppc.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL