summaryrefslogtreecommitdiff
path: root/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha256-x86_64.s
diff options
context:
space:
mode:
Diffstat (limited to 'deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha256-x86_64.s')
-rw-r--r--deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha256-x86_64.s152
1 files changed, 112 insertions, 40 deletions
diff --git a/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha256-x86_64.s b/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha256-x86_64.s
index 05e973612b..e43cdd7040 100644
--- a/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha256-x86_64.s
+++ b/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha256-x86_64.s
@@ -5,6 +5,7 @@
.p2align 4
_sha256_block_data_order:
+
leaq _OPENSSL_ia32cap_P(%rip),%r11
movl 0(%r11),%r9d
movl 4(%r11),%r10d
@@ -21,13 +22,20 @@ _sha256_block_data_order:
je L$avx_shortcut
testl $512,%r10d
jnz L$ssse3_shortcut
+ movq %rsp,%rax
+
pushq %rbx
+
pushq %rbp
+
pushq %r12
+
pushq %r13
+
pushq %r14
+
pushq %r15
- movq %rsp,%r11
+
shlq $4,%rdx
subq $64+32,%rsp
leaq (%rsi,%rdx,4),%rdx
@@ -35,7 +43,8 @@ _sha256_block_data_order:
movq %rdi,64+0(%rsp)
movq %rsi,64+8(%rsp)
movq %rdx,64+16(%rsp)
- movq %r11,64+24(%rsp)
+ movq %rax,88(%rsp)
+
L$prologue:
movl 0(%rdi),%eax
@@ -1699,17 +1708,26 @@ L$rounds_16_xx:
movl %r11d,28(%rdi)
jb L$loop
- movq 64+24(%rsp),%rsi
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq 88(%rsp),%rsi
+
+ movq -48(%rsi),%r15
+
+ movq -40(%rsi),%r14
+
+ movq -32(%rsi),%r13
+
+ movq -24(%rsi),%r12
+
+ movq -16(%rsi),%rbp
+
+ movq -8(%rsi),%rbx
+
+ leaq (%rsi),%rsp
+
L$epilogue:
.byte 0xf3,0xc3
+
.p2align 6
K256:
@@ -1963,14 +1981,22 @@ L$oop_shaext:
.p2align 6
sha256_block_data_order_ssse3:
+
L$ssse3_shortcut:
+ movq %rsp,%rax
+
pushq %rbx
+
pushq %rbp
+
pushq %r12
+
pushq %r13
+
pushq %r14
+
pushq %r15
- movq %rsp,%r11
+
shlq $4,%rdx
subq $96,%rsp
leaq (%rsi,%rdx,4),%rdx
@@ -1978,7 +2004,8 @@ L$ssse3_shortcut:
movq %rdi,64+0(%rsp)
movq %rsi,64+8(%rsp)
movq %rdx,64+16(%rsp)
- movq %r11,64+24(%rsp)
+ movq %rax,88(%rsp)
+
L$prologue_ssse3:
movl 0(%rdi),%eax
@@ -3044,28 +3071,45 @@ L$ssse3_00_47:
movl %r11d,28(%rdi)
jb L$loop_ssse3
- movq 64+24(%rsp),%rsi
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq 88(%rsp),%rsi
+
+ movq -48(%rsi),%r15
+
+ movq -40(%rsi),%r14
+
+ movq -32(%rsi),%r13
+
+ movq -24(%rsi),%r12
+
+ movq -16(%rsi),%rbp
+
+ movq -8(%rsi),%rbx
+
+ leaq (%rsi),%rsp
+
L$epilogue_ssse3:
.byte 0xf3,0xc3
+
.p2align 6
sha256_block_data_order_avx:
+
L$avx_shortcut:
+ movq %rsp,%rax
+
pushq %rbx
+
pushq %rbp
+
pushq %r12
+
pushq %r13
+
pushq %r14
+
pushq %r15
- movq %rsp,%r11
+
shlq $4,%rdx
subq $96,%rsp
leaq (%rsi,%rdx,4),%rdx
@@ -3073,7 +3117,8 @@ L$avx_shortcut:
movq %rdi,64+0(%rsp)
movq %rsi,64+8(%rsp)
movq %rdx,64+16(%rsp)
- movq %r11,64+24(%rsp)
+ movq %rax,88(%rsp)
+
L$prologue_avx:
vzeroupper
@@ -4100,29 +4145,46 @@ L$avx_00_47:
movl %r11d,28(%rdi)
jb L$loop_avx
- movq 64+24(%rsp),%rsi
+ movq 88(%rsp),%rsi
+
vzeroupper
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq -48(%rsi),%r15
+
+ movq -40(%rsi),%r14
+
+ movq -32(%rsi),%r13
+
+ movq -24(%rsi),%r12
+
+ movq -16(%rsi),%rbp
+
+ movq -8(%rsi),%rbx
+
+ leaq (%rsi),%rsp
+
L$epilogue_avx:
.byte 0xf3,0xc3
+
.p2align 6
sha256_block_data_order_avx2:
+
L$avx2_shortcut:
+ movq %rsp,%rax
+
pushq %rbx
+
pushq %rbp
+
pushq %r12
+
pushq %r13
+
pushq %r14
+
pushq %r15
- movq %rsp,%r11
+
subq $544,%rsp
shlq $4,%rdx
andq $-1024,%rsp
@@ -4131,7 +4193,8 @@ L$avx2_shortcut:
movq %rdi,64+0(%rsp)
movq %rsi,64+8(%rsp)
movq %rdx,64+16(%rsp)
- movq %r11,64+24(%rsp)
+ movq %rax,88(%rsp)
+
L$prologue_avx2:
vzeroupper
@@ -5344,15 +5407,24 @@ L$ower_avx2:
L$done_avx2:
leaq (%rbp),%rsp
- movq 64+24(%rsp),%rsi
+ movq 88(%rsp),%rsi
+
vzeroupper
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq -48(%rsi),%r15
+
+ movq -40(%rsi),%r14
+
+ movq -32(%rsi),%r13
+
+ movq -24(%rsi),%r12
+
+ movq -16(%rsi),%rbp
+
+ movq -8(%rsi),%rbx
+
+ leaq (%rsi),%rsp
+
L$epilogue_avx2:
.byte 0xf3,0xc3
+