summaryrefslogtreecommitdiff
path: root/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha
diff options
context:
space:
mode:
Diffstat (limited to 'deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha')
-rw-r--r--deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/keccak1600-x86_64.s492
-rw-r--r--deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha1-mb-x86_64.s46
-rw-r--r--deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha1-x86_64.s164
-rw-r--r--deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha256-mb-x86_64.s46
-rw-r--r--deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha256-x86_64.s152
-rw-r--r--deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha512-x86_64.s152
6 files changed, 917 insertions, 135 deletions
diff --git a/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/keccak1600-x86_64.s b/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/keccak1600-x86_64.s
new file mode 100644
index 0000000000..ec096c5ab0
--- /dev/null
+++ b/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/keccak1600-x86_64.s
@@ -0,0 +1,492 @@
+.text
+
+
+.p2align 5
+__KeccakF1600:
+ movq 60(%rdi),%rax
+ movq 68(%rdi),%rbx
+ movq 76(%rdi),%rcx
+ movq 84(%rdi),%rdx
+ movq 92(%rdi),%rbp
+ jmp L$oop
+
+.p2align 5
+L$oop:
+ movq -100(%rdi),%r8
+ movq -52(%rdi),%r9
+ movq -4(%rdi),%r10
+ movq 44(%rdi),%r11
+
+ xorq -84(%rdi),%rcx
+ xorq -76(%rdi),%rdx
+ xorq %r8,%rax
+ xorq -92(%rdi),%rbx
+ xorq -44(%rdi),%rcx
+ xorq -60(%rdi),%rax
+ movq %rbp,%r12
+ xorq -68(%rdi),%rbp
+
+ xorq %r10,%rcx
+ xorq -20(%rdi),%rax
+ xorq -36(%rdi),%rdx
+ xorq %r9,%rbx
+ xorq -28(%rdi),%rbp
+
+ xorq 36(%rdi),%rcx
+ xorq 20(%rdi),%rax
+ xorq 4(%rdi),%rdx
+ xorq -12(%rdi),%rbx
+ xorq 12(%rdi),%rbp
+
+ movq %rcx,%r13
+ rolq $1,%rcx
+ xorq %rax,%rcx
+ xorq %r11,%rdx
+
+ rolq $1,%rax
+ xorq %rdx,%rax
+ xorq 28(%rdi),%rbx
+
+ rolq $1,%rdx
+ xorq %rbx,%rdx
+ xorq 52(%rdi),%rbp
+
+ rolq $1,%rbx
+ xorq %rbp,%rbx
+
+ rolq $1,%rbp
+ xorq %r13,%rbp
+ xorq %rcx,%r9
+ xorq %rdx,%r10
+ rolq $44,%r9
+ xorq %rbp,%r11
+ xorq %rax,%r12
+ rolq $43,%r10
+ xorq %rbx,%r8
+ movq %r9,%r13
+ rolq $21,%r11
+ orq %r10,%r9
+ xorq %r8,%r9
+ rolq $14,%r12
+
+ xorq (%r15),%r9
+ leaq 8(%r15),%r15
+
+ movq %r12,%r14
+ andq %r11,%r12
+ movq %r9,-100(%rsi)
+ xorq %r10,%r12
+ notq %r10
+ movq %r12,-84(%rsi)
+
+ orq %r11,%r10
+ movq 76(%rdi),%r12
+ xorq %r13,%r10
+ movq %r10,-92(%rsi)
+
+ andq %r8,%r13
+ movq -28(%rdi),%r9
+ xorq %r14,%r13
+ movq -20(%rdi),%r10
+ movq %r13,-68(%rsi)
+
+ orq %r8,%r14
+ movq -76(%rdi),%r8
+ xorq %r11,%r14
+ movq 28(%rdi),%r11
+ movq %r14,-76(%rsi)
+
+
+ xorq %rbp,%r8
+ xorq %rdx,%r12
+ rolq $28,%r8
+ xorq %rcx,%r11
+ xorq %rax,%r9
+ rolq $61,%r12
+ rolq $45,%r11
+ xorq %rbx,%r10
+ rolq $20,%r9
+ movq %r8,%r13
+ orq %r12,%r8
+ rolq $3,%r10
+
+ xorq %r11,%r8
+ movq %r8,-36(%rsi)
+
+ movq %r9,%r14
+ andq %r13,%r9
+ movq -92(%rdi),%r8
+ xorq %r12,%r9
+ notq %r12
+ movq %r9,-28(%rsi)
+
+ orq %r11,%r12
+ movq -44(%rdi),%r9
+ xorq %r10,%r12
+ movq %r12,-44(%rsi)
+
+ andq %r10,%r11
+ movq 60(%rdi),%r12
+ xorq %r14,%r11
+ movq %r11,-52(%rsi)
+
+ orq %r10,%r14
+ movq 4(%rdi),%r10
+ xorq %r13,%r14
+ movq 52(%rdi),%r11
+ movq %r14,-60(%rsi)
+
+
+ xorq %rbp,%r10
+ xorq %rax,%r11
+ rolq $25,%r10
+ xorq %rdx,%r9
+ rolq $8,%r11
+ xorq %rbx,%r12
+ rolq $6,%r9
+ xorq %rcx,%r8
+ rolq $18,%r12
+ movq %r10,%r13
+ andq %r11,%r10
+ rolq $1,%r8
+
+ notq %r11
+ xorq %r9,%r10
+ movq %r10,-12(%rsi)
+
+ movq %r12,%r14
+ andq %r11,%r12
+ movq -12(%rdi),%r10
+ xorq %r13,%r12
+ movq %r12,-4(%rsi)
+
+ orq %r9,%r13
+ movq 84(%rdi),%r12
+ xorq %r8,%r13
+ movq %r13,-20(%rsi)
+
+ andq %r8,%r9
+ xorq %r14,%r9
+ movq %r9,12(%rsi)
+
+ orq %r8,%r14
+ movq -60(%rdi),%r9
+ xorq %r11,%r14
+ movq 36(%rdi),%r11
+ movq %r14,4(%rsi)
+
+
+ movq -68(%rdi),%r8
+
+ xorq %rcx,%r10
+ xorq %rdx,%r11
+ rolq $10,%r10
+ xorq %rbx,%r9
+ rolq $15,%r11
+ xorq %rbp,%r12
+ rolq $36,%r9
+ xorq %rax,%r8
+ rolq $56,%r12
+ movq %r10,%r13
+ orq %r11,%r10
+ rolq $27,%r8
+
+ notq %r11
+ xorq %r9,%r10
+ movq %r10,28(%rsi)
+
+ movq %r12,%r14
+ orq %r11,%r12
+ xorq %r13,%r12
+ movq %r12,36(%rsi)
+
+ andq %r9,%r13
+ xorq %r8,%r13
+ movq %r13,20(%rsi)
+
+ orq %r8,%r9
+ xorq %r14,%r9
+ movq %r9,52(%rsi)
+
+ andq %r14,%r8
+ xorq %r11,%r8
+ movq %r8,44(%rsi)
+
+
+ xorq -84(%rdi),%rdx
+ xorq -36(%rdi),%rbp
+ rolq $62,%rdx
+ xorq 68(%rdi),%rcx
+ rolq $55,%rbp
+ xorq 12(%rdi),%rax
+ rolq $2,%rcx
+ xorq 20(%rdi),%rbx
+ xchgq %rsi,%rdi
+ rolq $39,%rax
+ rolq $41,%rbx
+ movq %rdx,%r13
+ andq %rbp,%rdx
+ notq %rbp
+ xorq %rcx,%rdx
+ movq %rdx,92(%rdi)
+
+ movq %rax,%r14
+ andq %rbp,%rax
+ xorq %r13,%rax
+ movq %rax,60(%rdi)
+
+ orq %rcx,%r13
+ xorq %rbx,%r13
+ movq %r13,84(%rdi)
+
+ andq %rbx,%rcx
+ xorq %r14,%rcx
+ movq %rcx,76(%rdi)
+
+ orq %r14,%rbx
+ xorq %rbp,%rbx
+ movq %rbx,68(%rdi)
+
+ movq %rdx,%rbp
+ movq %r13,%rdx
+
+ testq $255,%r15
+ jnz L$oop
+
+ leaq -192(%r15),%r15
+ .byte 0xf3,0xc3
+
+
+
+.p2align 5
+KeccakF1600:
+
+ pushq %rbx
+
+ pushq %rbp
+
+ pushq %r12
+
+ pushq %r13
+
+ pushq %r14
+
+ pushq %r15
+
+
+ leaq 100(%rdi),%rdi
+ subq $200,%rsp
+
+
+ notq -92(%rdi)
+ notq -84(%rdi)
+ notq -36(%rdi)
+ notq -4(%rdi)
+ notq 36(%rdi)
+ notq 60(%rdi)
+
+ leaq iotas(%rip),%r15
+ leaq 100(%rsp),%rsi
+
+ call __KeccakF1600
+
+ notq -92(%rdi)
+ notq -84(%rdi)
+ notq -36(%rdi)
+ notq -4(%rdi)
+ notq 36(%rdi)
+ notq 60(%rdi)
+ leaq -100(%rdi),%rdi
+
+ addq $200,%rsp
+
+
+ popq %r15
+
+ popq %r14
+
+ popq %r13
+
+ popq %r12
+
+ popq %rbp
+
+ popq %rbx
+
+ .byte 0xf3,0xc3
+
+
+.globl _SHA3_absorb
+
+.p2align 5
+_SHA3_absorb:
+
+ pushq %rbx
+
+ pushq %rbp
+
+ pushq %r12
+
+ pushq %r13
+
+ pushq %r14
+
+ pushq %r15
+
+
+ leaq 100(%rdi),%rdi
+ subq $232,%rsp
+
+
+ movq %rsi,%r9
+ leaq 100(%rsp),%rsi
+
+ notq -92(%rdi)
+ notq -84(%rdi)
+ notq -36(%rdi)
+ notq -4(%rdi)
+ notq 36(%rdi)
+ notq 60(%rdi)
+ leaq iotas(%rip),%r15
+
+ movq %rcx,216-100(%rsi)
+
+L$oop_absorb:
+ cmpq %rcx,%rdx
+ jc L$done_absorb
+
+ shrq $3,%rcx
+ leaq -100(%rdi),%r8
+
+L$block_absorb:
+ movq (%r9),%rax
+ leaq 8(%r9),%r9
+ xorq (%r8),%rax
+ leaq 8(%r8),%r8
+ subq $8,%rdx
+ movq %rax,-8(%r8)
+ subq $1,%rcx
+ jnz L$block_absorb
+
+ movq %r9,200-100(%rsi)
+ movq %rdx,208-100(%rsi)
+ call __KeccakF1600
+ movq 200-100(%rsi),%r9
+ movq 208-100(%rsi),%rdx
+ movq 216-100(%rsi),%rcx
+ jmp L$oop_absorb
+
+.p2align 5
+L$done_absorb:
+ movq %rdx,%rax
+
+ notq -92(%rdi)
+ notq -84(%rdi)
+ notq -36(%rdi)
+ notq -4(%rdi)
+ notq 36(%rdi)
+ notq 60(%rdi)
+
+ addq $232,%rsp
+
+
+ popq %r15
+
+ popq %r14
+
+ popq %r13
+
+ popq %r12
+
+ popq %rbp
+
+ popq %rbx
+
+ .byte 0xf3,0xc3
+
+
+.globl _SHA3_squeeze
+
+.p2align 5
+_SHA3_squeeze:
+
+ pushq %r12
+
+ pushq %r13
+
+ pushq %r14
+
+
+ shrq $3,%rcx
+ movq %rdi,%r8
+ movq %rsi,%r12
+ movq %rdx,%r13
+ movq %rcx,%r14
+ jmp L$oop_squeeze
+
+.p2align 5
+L$oop_squeeze:
+ cmpq $8,%r13
+ jb L$tail_squeeze
+
+ movq (%r8),%rax
+ leaq 8(%r8),%r8
+ movq %rax,(%r12)
+ leaq 8(%r12),%r12
+ subq $8,%r13
+ jz L$done_squeeze
+
+ subq $1,%rcx
+ jnz L$oop_squeeze
+
+ call KeccakF1600
+ movq %rdi,%r8
+ movq %r14,%rcx
+ jmp L$oop_squeeze
+
+L$tail_squeeze:
+ movq %r8,%rsi
+ movq %r12,%rdi
+ movq %r13,%rcx
+.byte 0xf3,0xa4
+
+L$done_squeeze:
+ popq %r14
+
+ popq %r13
+
+ popq %r12
+
+ .byte 0xf3,0xc3
+
+
+.p2align 8
+.quad 0,0,0,0,0,0,0,0
+
+iotas:
+.quad 0x0000000000000001
+.quad 0x0000000000008082
+.quad 0x800000000000808a
+.quad 0x8000000080008000
+.quad 0x000000000000808b
+.quad 0x0000000080000001
+.quad 0x8000000080008081
+.quad 0x8000000000008009
+.quad 0x000000000000008a
+.quad 0x0000000000000088
+.quad 0x0000000080008009
+.quad 0x000000008000000a
+.quad 0x000000008000808b
+.quad 0x800000000000008b
+.quad 0x8000000000008089
+.quad 0x8000000000008003
+.quad 0x8000000000008002
+.quad 0x8000000000000080
+.quad 0x000000000000800a
+.quad 0x800000008000000a
+.quad 0x8000000080008081
+.quad 0x8000000000008080
+.quad 0x0000000080000001
+.quad 0x8000000080008008
+
+.byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
diff --git a/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha1-mb-x86_64.s b/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha1-mb-x86_64.s
index 7026de0e76..b2009fb28f 100644
--- a/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha1-mb-x86_64.s
+++ b/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha1-mb-x86_64.s
@@ -6,17 +6,22 @@
.p2align 5
_sha1_multi_block:
+
movq _OPENSSL_ia32cap_P+4(%rip),%rcx
btq $61,%rcx
jc _shaext_shortcut
testl $268435456,%ecx
jnz _avx_shortcut
movq %rsp,%rax
+
pushq %rbx
+
pushq %rbp
+
subq $288,%rsp
andq $-256,%rsp
movq %rax,272(%rsp)
+
L$body:
leaq K_XX_XX(%rip),%rbp
leaq 256(%rsp),%rbx
@@ -2546,19 +2551,28 @@ L$oop:
L$done:
movq 272(%rsp),%rax
+
movq -16(%rax),%rbp
+
movq -8(%rax),%rbx
+
leaq (%rax),%rsp
+
L$epilogue:
.byte 0xf3,0xc3
+
.p2align 5
sha1_multi_block_shaext:
+
_shaext_shortcut:
movq %rsp,%rax
+
pushq %rbx
+
pushq %rbp
+
subq $288,%rsp
shll $1,%edx
andq $-256,%rsp
@@ -2914,14 +2928,19 @@ L$oop_shaext:
L$done_shaext:
movq -16(%rax),%rbp
+
movq -8(%rax),%rbx
+
leaq (%rax),%rsp
+
L$epilogue_shaext:
.byte 0xf3,0xc3
+
.p2align 5
sha1_multi_block_avx:
+
_avx_shortcut:
shrq $32,%rcx
cmpl $2,%edx
@@ -2932,11 +2951,15 @@ _avx_shortcut:
.p2align 5
L$avx:
movq %rsp,%rax
+
pushq %rbx
+
pushq %rbp
+
subq $288,%rsp
andq $-256,%rsp
movq %rax,272(%rsp)
+
L$body_avx:
leaq K_XX_XX(%rip),%rbp
leaq 256(%rsp),%rbx
@@ -4986,27 +5009,41 @@ L$oop_avx:
L$done_avx:
movq 272(%rsp),%rax
+
vzeroupper
movq -16(%rax),%rbp
+
movq -8(%rax),%rbx
+
leaq (%rax),%rsp
+
L$epilogue_avx:
.byte 0xf3,0xc3
+
.p2align 5
sha1_multi_block_avx2:
+
_avx2_shortcut:
movq %rsp,%rax
+
pushq %rbx
+
pushq %rbp
+
pushq %r12
+
pushq %r13
+
pushq %r14
+
pushq %r15
+
subq $576,%rsp
andq $-256,%rsp
movq %rax,544(%rsp)
+
L$body_avx2:
leaq K_XX_XX(%rip),%rbp
shrl $1,%edx
@@ -7193,18 +7230,27 @@ L$oop_avx2:
L$done_avx2:
movq 544(%rsp),%rax
+
vzeroupper
movq -48(%rax),%r15
+
movq -40(%rax),%r14
+
movq -32(%rax),%r13
+
movq -24(%rax),%r12
+
movq -16(%rax),%rbp
+
movq -8(%rax),%rbx
+
leaq (%rax),%rsp
+
L$epilogue_avx2:
.byte 0xf3,0xc3
+
.p2align 8
.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
diff --git a/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha1-x86_64.s b/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha1-x86_64.s
index 3e3633911f..02472d0b7d 100644
--- a/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha1-x86_64.s
+++ b/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha1-x86_64.s
@@ -5,6 +5,7 @@
.p2align 4
_sha1_block_data_order:
+
movl _OPENSSL_ia32cap_P+0(%rip),%r9d
movl _OPENSSL_ia32cap_P+4(%rip),%r8d
movl _OPENSSL_ia32cap_P+8(%rip),%r10d
@@ -25,17 +26,24 @@ _sha1_block_data_order:
.p2align 4
L$ialu:
movq %rsp,%rax
+
pushq %rbx
+
pushq %rbp
+
pushq %r12
+
pushq %r13
+
pushq %r14
+
movq %rdi,%r8
subq $72,%rsp
movq %rsi,%r9
andq $-64,%rsp
movq %rdx,%r10
movq %rax,64(%rsp)
+
L$prologue:
movl 0(%r8),%esi
@@ -1230,19 +1238,28 @@ L$loop:
jnz L$loop
movq 64(%rsp),%rsi
+
movq -40(%rsi),%r14
+
movq -32(%rsi),%r13
+
movq -24(%rsi),%r12
+
movq -16(%rsi),%rbp
+
movq -8(%rsi),%rbx
+
leaq (%rsi),%rsp
+
L$epilogue:
.byte 0xf3,0xc3
+
.p2align 5
sha1_block_data_order_shaext:
_shaext_shortcut:
+
movdqu (%rdi),%xmm0
movd 16(%rdi),%xmm1
movdqa K_XX_XX+160(%rip),%xmm3
@@ -1404,20 +1421,27 @@ L$oop_shaext:
pshufd $27,%xmm1,%xmm1
movdqu %xmm0,(%rdi)
movd %xmm1,16(%rdi)
+
.byte 0xf3,0xc3
.p2align 4
sha1_block_data_order_ssse3:
_ssse3_shortcut:
- movq %rsp,%rax
+
+ movq %rsp,%r11
+
pushq %rbx
+
pushq %rbp
+
pushq %r12
+
pushq %r13
+
pushq %r14
+
leaq -64(%rsp),%rsp
- movq %rax,%r14
andq $-64,%rsp
movq %rdi,%r8
movq %rsi,%r9
@@ -1425,7 +1449,7 @@ _ssse3_shortcut:
shlq $6,%r10
addq %r9,%r10
- leaq K_XX_XX+64(%rip),%r11
+ leaq K_XX_XX+64(%rip),%r14
movl 0(%r8),%eax
movl 4(%r8),%ebx
@@ -1437,8 +1461,8 @@ _ssse3_shortcut:
xorl %edx,%edi
andl %edi,%esi
- movdqa 64(%r11),%xmm6
- movdqa -64(%r11),%xmm9
+ movdqa 64(%r14),%xmm6
+ movdqa -64(%r14),%xmm9
movdqu 0(%r9),%xmm0
movdqu 16(%r9),%xmm1
movdqu 32(%r9),%xmm2
@@ -1514,7 +1538,7 @@ L$oop_ssse3:
pslld $2,%xmm9
pxor %xmm10,%xmm4
xorl %ebp,%edx
- movdqa -64(%r11),%xmm10
+ movdqa -64(%r14),%xmm10
roll $5,%ecx
addl %edi,%ebx
andl %edx,%esi
@@ -1575,7 +1599,7 @@ L$oop_ssse3:
pslld $2,%xmm10
pxor %xmm8,%xmm5
xorl %eax,%ebp
- movdqa -32(%r11),%xmm8
+ movdqa -32(%r14),%xmm8
roll $5,%edx
addl %edi,%ecx
andl %ebp,%esi
@@ -1636,7 +1660,7 @@ L$oop_ssse3:
pslld $2,%xmm8
pxor %xmm9,%xmm6
xorl %ebx,%eax
- movdqa -32(%r11),%xmm9
+ movdqa -32(%r14),%xmm9
roll $5,%ebp
addl %edi,%edx
andl %eax,%esi
@@ -1697,7 +1721,7 @@ L$oop_ssse3:
pslld $2,%xmm9
pxor %xmm10,%xmm7
xorl %ecx,%ebx
- movdqa -32(%r11),%xmm10
+ movdqa -32(%r14),%xmm10
roll $5,%eax
addl %edi,%ebp
andl %ebx,%esi
@@ -1808,7 +1832,7 @@ L$oop_ssse3:
pxor %xmm3,%xmm2
addl %esi,%eax
xorl %edx,%edi
- movdqa 0(%r11),%xmm10
+ movdqa 0(%r14),%xmm10
rorl $7,%ecx
paddd %xmm1,%xmm9
addl %ebx,%eax
@@ -2043,7 +2067,7 @@ L$oop_ssse3:
pxor %xmm0,%xmm7
roll $5,%ebx
addl %esi,%eax
- movdqa 32(%r11),%xmm9
+ movdqa 32(%r14),%xmm9
xorl %ecx,%edi
paddd %xmm6,%xmm8
xorl %edx,%ecx
@@ -2334,8 +2358,8 @@ L$oop_ssse3:
addl %edx,%ecx
cmpq %r10,%r9
je L$done_ssse3
- movdqa 64(%r11),%xmm6
- movdqa -64(%r11),%xmm9
+ movdqa 64(%r14),%xmm6
+ movdqa -64(%r14),%xmm9
movdqu 0(%r9),%xmm0
movdqu 16(%r9),%xmm1
movdqu 32(%r9),%xmm2
@@ -2572,29 +2596,41 @@ L$done_ssse3:
movl %ecx,8(%r8)
movl %edx,12(%r8)
movl %ebp,16(%r8)
- leaq (%r14),%rsi
- movq -40(%rsi),%r14
- movq -32(%rsi),%r13
- movq -24(%rsi),%r12
- movq -16(%rsi),%rbp
- movq -8(%rsi),%rbx
- leaq (%rsi),%rsp
+ movq -40(%r11),%r14
+
+ movq -32(%r11),%r13
+
+ movq -24(%r11),%r12
+
+ movq -16(%r11),%rbp
+
+ movq -8(%r11),%rbx
+
+ leaq (%r11),%rsp
+
L$epilogue_ssse3:
.byte 0xf3,0xc3
+
.p2align 4
sha1_block_data_order_avx:
_avx_shortcut:
- movq %rsp,%rax
+
+ movq %rsp,%r11
+
pushq %rbx
+
pushq %rbp
+
pushq %r12
+
pushq %r13
+
pushq %r14
+
leaq -64(%rsp),%rsp
vzeroupper
- movq %rax,%r14
andq $-64,%rsp
movq %rdi,%r8
movq %rsi,%r9
@@ -2602,7 +2638,7 @@ _avx_shortcut:
shlq $6,%r10
addq %r9,%r10
- leaq K_XX_XX+64(%rip),%r11
+ leaq K_XX_XX+64(%rip),%r14
movl 0(%r8),%eax
movl 4(%r8),%ebx
@@ -2614,8 +2650,8 @@ _avx_shortcut:
xorl %edx,%edi
andl %edi,%esi
- vmovdqa 64(%r11),%xmm6
- vmovdqa -64(%r11),%xmm11
+ vmovdqa 64(%r14),%xmm6
+ vmovdqa -64(%r14),%xmm11
vmovdqu 0(%r9),%xmm0
vmovdqu 16(%r9),%xmm1
vmovdqu 32(%r9),%xmm2
@@ -2740,7 +2776,7 @@ L$oop_avx:
vpxor %xmm10,%xmm5,%xmm5
xorl %eax,%ebp
shldl $5,%edx,%edx
- vmovdqa -32(%r11),%xmm11
+ vmovdqa -32(%r14),%xmm11
addl %edi,%ecx
andl %ebp,%esi
xorl %eax,%ebp
@@ -2953,7 +2989,7 @@ L$oop_avx:
addl %esi,%eax
xorl %edx,%edi
vpaddd %xmm1,%xmm11,%xmm9
- vmovdqa 0(%r11),%xmm11
+ vmovdqa 0(%r14),%xmm11
shrdl $7,%ecx,%ecx
addl %ebx,%eax
vpxor %xmm8,%xmm2,%xmm2
@@ -3172,7 +3208,7 @@ L$oop_avx:
movl %ebx,%edi
xorl %edx,%esi
vpaddd %xmm6,%xmm11,%xmm9
- vmovdqa 32(%r11),%xmm11
+ vmovdqa 32(%r14),%xmm11
shldl $5,%ebx,%ebx
addl %esi,%eax
vpxor %xmm8,%xmm7,%xmm7
@@ -3451,8 +3487,8 @@ L$oop_avx:
addl %edx,%ecx
cmpq %r10,%r9
je L$done_avx
- vmovdqa 64(%r11),%xmm6
- vmovdqa -64(%r11),%xmm11
+ vmovdqa 64(%r14),%xmm6
+ vmovdqa -64(%r14),%xmm11
vmovdqu 0(%r9),%xmm0
vmovdqu 16(%r9),%xmm1
vmovdqu 32(%r9),%xmm2
@@ -3688,28 +3724,40 @@ L$done_avx:
movl %ecx,8(%r8)
movl %edx,12(%r8)
movl %ebp,16(%r8)
- leaq (%r14),%rsi
- movq -40(%rsi),%r14
- movq -32(%rsi),%r13
- movq -24(%rsi),%r12
- movq -16(%rsi),%rbp
- movq -8(%rsi),%rbx
- leaq (%rsi),%rsp
+ movq -40(%r11),%r14
+
+ movq -32(%r11),%r13
+
+ movq -24(%r11),%r12
+
+ movq -16(%r11),%rbp
+
+ movq -8(%r11),%rbx
+
+ leaq (%r11),%rsp
+
L$epilogue_avx:
.byte 0xf3,0xc3
+
.p2align 4
sha1_block_data_order_avx2:
_avx2_shortcut:
- movq %rsp,%rax
+
+ movq %rsp,%r11
+
pushq %rbx
+
pushq %rbp
+
pushq %r12
+
pushq %r13
+
pushq %r14
+
vzeroupper
- movq %rax,%r14
movq %rdi,%r8
movq %rsi,%r9
movq %rdx,%r10
@@ -3719,7 +3767,7 @@ _avx2_shortcut:
leaq 64(%r9),%r13
andq $-128,%rsp
addq %r9,%r10
- leaq K_XX_XX+64(%rip),%r11
+ leaq K_XX_XX+64(%rip),%r14
movl 0(%r8),%eax
cmpq %r10,%r13
@@ -3728,7 +3776,7 @@ _avx2_shortcut:
movl 8(%r8),%ecx
movl 12(%r8),%edx
movl 16(%r8),%esi
- vmovdqu 64(%r11),%ymm6
+ vmovdqu 64(%r14),%ymm6
vmovdqu (%r9),%xmm0
vmovdqu 16(%r9),%xmm1
@@ -3742,7 +3790,7 @@ _avx2_shortcut:
vpshufb %ymm6,%ymm1,%ymm1
vinserti128 $1,48(%r13),%ymm3,%ymm3
vpshufb %ymm6,%ymm2,%ymm2
- vmovdqu -64(%r11),%ymm11
+ vmovdqu -64(%r14),%ymm11
vpshufb %ymm6,%ymm3,%ymm3
vpaddd %ymm11,%ymm0,%ymm4
@@ -3774,7 +3822,7 @@ _avx2_shortcut:
vpxor %ymm3,%ymm8,%ymm8
vpxor %ymm8,%ymm5,%ymm5
vpsrld $31,%ymm5,%ymm8
- vmovdqu -32(%r11),%ymm11
+ vmovdqu -32(%r14),%ymm11
vpslldq $12,%ymm5,%ymm10
vpaddd %ymm5,%ymm5,%ymm5
vpsrld $30,%ymm10,%ymm9
@@ -3928,7 +3976,7 @@ L$align32_1:
addl -56(%r13),%ebp
andnl %esi,%ebx,%edi
vpxor %ymm3,%ymm2,%ymm2
- vmovdqu 0(%r11),%ymm11
+ vmovdqu 0(%r14),%ymm11
addl %ecx,%ebp
rorxl $27,%ebx,%r12d
rorxl $2,%ebx,%ecx
@@ -4159,7 +4207,7 @@ L$align32_1:
addl -116(%r13),%eax
leal (%rax,%rbx,1),%eax
vpxor %ymm0,%ymm7,%ymm7
- vmovdqu 32(%r11),%ymm11
+ vmovdqu 32(%r14),%ymm11
rorxl $27,%ebp,%r12d
rorxl $2,%ebp,%ebx
xorl %ecx,%ebp
@@ -4604,7 +4652,7 @@ L$align32_2:
cmpq %r10,%r9
je L$done_avx2
- vmovdqu 64(%r11),%ymm6
+ vmovdqu 64(%r14),%ymm6
cmpq %r10,%rdi
ja L$ast_avx2
@@ -4820,7 +4868,7 @@ L$ast_avx2:
xorl %ebx,%eax
addl %r12d,%esi
xorl %ecx,%eax
- vmovdqu -64(%r11),%ymm11
+ vmovdqu -64(%r14),%ymm11
vpshufb %ymm6,%ymm0,%ymm0
addl 68(%r13),%edx
leal (%rdx,%rax,1),%edx
@@ -5176,7 +5224,7 @@ L$align32_3:
xorl %ebp,%esi
addl %r12d,%edx
vpsrld $31,%ymm5,%ymm8
- vmovdqu -32(%r11),%ymm11
+ vmovdqu -32(%r14),%ymm11
xorl %ebx,%esi
addl 104(%r13),%ecx
leal (%rcx,%rsi,1),%ecx
@@ -5369,16 +5417,22 @@ L$align32_3:
L$done_avx2:
vzeroupper
- leaq (%r14),%rsi
- movq -40(%rsi),%r14
- movq -32(%rsi),%r13
- movq -24(%rsi),%r12
- movq -16(%rsi),%rbp
- movq -8(%rsi),%rbx
- leaq (%rsi),%rsp
+ movq -40(%r11),%r14
+
+ movq -32(%r11),%r13
+
+ movq -24(%r11),%r12
+
+ movq -16(%r11),%rbp
+
+ movq -8(%r11),%rbx
+
+ leaq (%r11),%rsp
+
L$epilogue_avx2:
.byte 0xf3,0xc3
+
.p2align 6
K_XX_XX:
.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
diff --git a/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha256-mb-x86_64.s b/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha256-mb-x86_64.s
index 95e0e774af..bab9a565a2 100644
--- a/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha256-mb-x86_64.s
+++ b/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha256-mb-x86_64.s
@@ -6,17 +6,22 @@
.p2align 5
_sha256_multi_block:
+
movq _OPENSSL_ia32cap_P+4(%rip),%rcx
btq $61,%rcx
jc _shaext_shortcut
testl $268435456,%ecx
jnz _avx_shortcut
movq %rsp,%rax
+
pushq %rbx
+
pushq %rbp
+
subq $288,%rsp
andq $-256,%rsp
movq %rax,272(%rsp)
+
L$body:
leaq K256+128(%rip),%rbp
leaq 256(%rsp),%rbx
@@ -2615,19 +2620,28 @@ L$oop_16_xx:
L$done:
movq 272(%rsp),%rax
+
movq -16(%rax),%rbp
+
movq -8(%rax),%rbx
+
leaq (%rax),%rsp
+
L$epilogue:
.byte 0xf3,0xc3
+
.p2align 5
sha256_multi_block_shaext:
+
_shaext_shortcut:
movq %rsp,%rax
+
pushq %rbx
+
pushq %rbp
+
subq $288,%rsp
shll $1,%edx
andq $-256,%rsp
@@ -3102,14 +3116,19 @@ L$oop_shaext:
L$done_shaext:
movq -16(%rax),%rbp
+
movq -8(%rax),%rbx
+
leaq (%rax),%rsp
+
L$epilogue_shaext:
.byte 0xf3,0xc3
+
.p2align 5
sha256_multi_block_avx:
+
_avx_shortcut:
shrq $32,%rcx
cmpl $2,%edx
@@ -3120,11 +3139,15 @@ _avx_shortcut:
.p2align 5
L$avx:
movq %rsp,%rax
+
pushq %rbx
+
pushq %rbp
+
subq $288,%rsp
andq $-256,%rsp
movq %rax,272(%rsp)
+
L$body_avx:
leaq K256+128(%rip),%rbp
leaq 256(%rsp),%rbx
@@ -5353,27 +5376,41 @@ L$oop_16_xx_avx:
L$done_avx:
movq 272(%rsp),%rax
+
vzeroupper
movq -16(%rax),%rbp
+
movq -8(%rax),%rbx
+
leaq (%rax),%rsp
+
L$epilogue_avx:
.byte 0xf3,0xc3
+
.p2align 5
sha256_multi_block_avx2:
+
_avx2_shortcut:
movq %rsp,%rax
+
pushq %rbx
+
pushq %rbp
+
pushq %r12
+
pushq %r13
+
pushq %r14
+
pushq %r15
+
subq $576,%rsp
andq $-256,%rsp
movq %rax,544(%rsp)
+
L$body_avx2:
leaq K256+128(%rip),%rbp
leaq 128(%rdi),%rdi
@@ -7738,17 +7775,26 @@ L$oop_16_xx_avx2:
L$done_avx2:
movq 544(%rsp),%rax
+
vzeroupper
movq -48(%rax),%r15
+
movq -40(%rax),%r14
+
movq -32(%rax),%r13
+
movq -24(%rax),%r12
+
movq -16(%rax),%rbp
+
movq -8(%rax),%rbx
+
leaq (%rax),%rsp
+
L$epilogue_avx2:
.byte 0xf3,0xc3
+
.p2align 8
K256:
.long 1116352408,1116352408,1116352408,1116352408
diff --git a/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha256-x86_64.s b/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha256-x86_64.s
index 05e973612b..e43cdd7040 100644
--- a/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha256-x86_64.s
+++ b/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha256-x86_64.s
@@ -5,6 +5,7 @@
.p2align 4
_sha256_block_data_order:
+
leaq _OPENSSL_ia32cap_P(%rip),%r11
movl 0(%r11),%r9d
movl 4(%r11),%r10d
@@ -21,13 +22,20 @@ _sha256_block_data_order:
je L$avx_shortcut
testl $512,%r10d
jnz L$ssse3_shortcut
+ movq %rsp,%rax
+
pushq %rbx
+
pushq %rbp
+
pushq %r12
+
pushq %r13
+
pushq %r14
+
pushq %r15
- movq %rsp,%r11
+
shlq $4,%rdx
subq $64+32,%rsp
leaq (%rsi,%rdx,4),%rdx
@@ -35,7 +43,8 @@ _sha256_block_data_order:
movq %rdi,64+0(%rsp)
movq %rsi,64+8(%rsp)
movq %rdx,64+16(%rsp)
- movq %r11,64+24(%rsp)
+ movq %rax,88(%rsp)
+
L$prologue:
movl 0(%rdi),%eax
@@ -1699,17 +1708,26 @@ L$rounds_16_xx:
movl %r11d,28(%rdi)
jb L$loop
- movq 64+24(%rsp),%rsi
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq 88(%rsp),%rsi
+
+ movq -48(%rsi),%r15
+
+ movq -40(%rsi),%r14
+
+ movq -32(%rsi),%r13
+
+ movq -24(%rsi),%r12
+
+ movq -16(%rsi),%rbp
+
+ movq -8(%rsi),%rbx
+
+ leaq (%rsi),%rsp
+
L$epilogue:
.byte 0xf3,0xc3
+
.p2align 6
K256:
@@ -1963,14 +1981,22 @@ L$oop_shaext:
.p2align 6
sha256_block_data_order_ssse3:
+
L$ssse3_shortcut:
+ movq %rsp,%rax
+
pushq %rbx
+
pushq %rbp
+
pushq %r12
+
pushq %r13
+
pushq %r14
+
pushq %r15
- movq %rsp,%r11
+
shlq $4,%rdx
subq $96,%rsp
leaq (%rsi,%rdx,4),%rdx
@@ -1978,7 +2004,8 @@ L$ssse3_shortcut:
movq %rdi,64+0(%rsp)
movq %rsi,64+8(%rsp)
movq %rdx,64+16(%rsp)
- movq %r11,64+24(%rsp)
+ movq %rax,88(%rsp)
+
L$prologue_ssse3:
movl 0(%rdi),%eax
@@ -3044,28 +3071,45 @@ L$ssse3_00_47:
movl %r11d,28(%rdi)
jb L$loop_ssse3
- movq 64+24(%rsp),%rsi
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq 88(%rsp),%rsi
+
+ movq -48(%rsi),%r15
+
+ movq -40(%rsi),%r14
+
+ movq -32(%rsi),%r13
+
+ movq -24(%rsi),%r12
+
+ movq -16(%rsi),%rbp
+
+ movq -8(%rsi),%rbx
+
+ leaq (%rsi),%rsp
+
L$epilogue_ssse3:
.byte 0xf3,0xc3
+
.p2align 6
sha256_block_data_order_avx:
+
L$avx_shortcut:
+ movq %rsp,%rax
+
pushq %rbx
+
pushq %rbp
+
pushq %r12
+
pushq %r13
+
pushq %r14
+
pushq %r15
- movq %rsp,%r11
+
shlq $4,%rdx
subq $96,%rsp
leaq (%rsi,%rdx,4),%rdx
@@ -3073,7 +3117,8 @@ L$avx_shortcut:
movq %rdi,64+0(%rsp)
movq %rsi,64+8(%rsp)
movq %rdx,64+16(%rsp)
- movq %r11,64+24(%rsp)
+ movq %rax,88(%rsp)
+
L$prologue_avx:
vzeroupper
@@ -4100,29 +4145,46 @@ L$avx_00_47:
movl %r11d,28(%rdi)
jb L$loop_avx
- movq 64+24(%rsp),%rsi
+ movq 88(%rsp),%rsi
+
vzeroupper
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq -48(%rsi),%r15
+
+ movq -40(%rsi),%r14
+
+ movq -32(%rsi),%r13
+
+ movq -24(%rsi),%r12
+
+ movq -16(%rsi),%rbp
+
+ movq -8(%rsi),%rbx
+
+ leaq (%rsi),%rsp
+
L$epilogue_avx:
.byte 0xf3,0xc3
+
.p2align 6
sha256_block_data_order_avx2:
+
L$avx2_shortcut:
+ movq %rsp,%rax
+
pushq %rbx
+
pushq %rbp
+
pushq %r12
+
pushq %r13
+
pushq %r14
+
pushq %r15
- movq %rsp,%r11
+
subq $544,%rsp
shlq $4,%rdx
andq $-1024,%rsp
@@ -4131,7 +4193,8 @@ L$avx2_shortcut:
movq %rdi,64+0(%rsp)
movq %rsi,64+8(%rsp)
movq %rdx,64+16(%rsp)
- movq %r11,64+24(%rsp)
+ movq %rax,88(%rsp)
+
L$prologue_avx2:
vzeroupper
@@ -5344,15 +5407,24 @@ L$ower_avx2:
L$done_avx2:
leaq (%rbp),%rsp
- movq 64+24(%rsp),%rsi
+ movq 88(%rsp),%rsi
+
vzeroupper
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq -48(%rsi),%r15
+
+ movq -40(%rsi),%r14
+
+ movq -32(%rsi),%r13
+
+ movq -24(%rsi),%r12
+
+ movq -16(%rsi),%rbp
+
+ movq -8(%rsi),%rbx
+
+ leaq (%rsi),%rsp
+
L$epilogue_avx2:
.byte 0xf3,0xc3
+
diff --git a/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha512-x86_64.s b/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha512-x86_64.s
index 234616bc3b..51ace9a686 100644
--- a/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha512-x86_64.s
+++ b/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha512-x86_64.s
@@ -5,6 +5,7 @@
.p2align 4
_sha512_block_data_order:
+
leaq _OPENSSL_ia32cap_P(%rip),%r11
movl 0(%r11),%r9d
movl 4(%r11),%r10d
@@ -19,13 +20,20 @@ _sha512_block_data_order:
orl %r9d,%r10d
cmpl $1342177792,%r10d
je L$avx_shortcut
+ movq %rsp,%rax
+
pushq %rbx
+
pushq %rbp
+
pushq %r12
+
pushq %r13
+
pushq %r14
+
pushq %r15
- movq %rsp,%r11
+
shlq $4,%rdx
subq $128+32,%rsp
leaq (%rsi,%rdx,8),%rdx
@@ -33,7 +41,8 @@ _sha512_block_data_order:
movq %rdi,128+0(%rsp)
movq %rsi,128+8(%rsp)
movq %rdx,128+16(%rsp)
- movq %r11,128+24(%rsp)
+ movq %rax,152(%rsp)
+
L$prologue:
movq 0(%rdi),%rax
@@ -1697,17 +1706,26 @@ L$rounds_16_xx:
movq %r11,56(%rdi)
jb L$loop
- movq 128+24(%rsp),%rsi
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq 152(%rsp),%rsi
+
+ movq -48(%rsi),%r15
+
+ movq -40(%rsi),%r14
+
+ movq -32(%rsi),%r13
+
+ movq -24(%rsi),%r12
+
+ movq -16(%rsi),%rbp
+
+ movq -8(%rsi),%rbx
+
+ leaq (%rsi),%rsp
+
L$epilogue:
.byte 0xf3,0xc3
+
.p2align 6
K512:
@@ -1798,14 +1816,22 @@ K512:
.p2align 6
sha512_block_data_order_xop:
+
L$xop_shortcut:
+ movq %rsp,%rax
+
pushq %rbx
+
pushq %rbp
+
pushq %r12
+
pushq %r13
+
pushq %r14
+
pushq %r15
- movq %rsp,%r11
+
shlq $4,%rdx
subq $160,%rsp
leaq (%rsi,%rdx,8),%rdx
@@ -1813,7 +1839,8 @@ L$xop_shortcut:
movq %rdi,128+0(%rsp)
movq %rsi,128+8(%rsp)
movq %rdx,128+16(%rsp)
- movq %r11,128+24(%rsp)
+ movq %rax,152(%rsp)
+
L$prologue_xop:
vzeroupper
@@ -2866,29 +2893,46 @@ L$xop_00_47:
movq %r11,56(%rdi)
jb L$loop_xop
- movq 128+24(%rsp),%rsi
+ movq 152(%rsp),%rsi
+
vzeroupper
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq -48(%rsi),%r15
+
+ movq -40(%rsi),%r14
+
+ movq -32(%rsi),%r13
+
+ movq -24(%rsi),%r12
+
+ movq -16(%rsi),%rbp
+
+ movq -8(%rsi),%rbx
+
+ leaq (%rsi),%rsp
+
L$epilogue_xop:
.byte 0xf3,0xc3
+
.p2align 6
sha512_block_data_order_avx:
+
L$avx_shortcut:
+ movq %rsp,%rax
+
pushq %rbx
+
pushq %rbp
+
pushq %r12
+
pushq %r13
+
pushq %r14
+
pushq %r15
- movq %rsp,%r11
+
shlq $4,%rdx
subq $160,%rsp
leaq (%rsi,%rdx,8),%rdx
@@ -2896,7 +2940,8 @@ L$avx_shortcut:
movq %rdi,128+0(%rsp)
movq %rsi,128+8(%rsp)
movq %rdx,128+16(%rsp)
- movq %r11,128+24(%rsp)
+ movq %rax,152(%rsp)
+
L$prologue_avx:
vzeroupper
@@ -4013,29 +4058,46 @@ L$avx_00_47:
movq %r11,56(%rdi)
jb L$loop_avx
- movq 128+24(%rsp),%rsi
+ movq 152(%rsp),%rsi
+
vzeroupper
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq -48(%rsi),%r15
+
+ movq -40(%rsi),%r14
+
+ movq -32(%rsi),%r13
+
+ movq -24(%rsi),%r12
+
+ movq -16(%rsi),%rbp
+
+ movq -8(%rsi),%rbx
+
+ leaq (%rsi),%rsp
+
L$epilogue_avx:
.byte 0xf3,0xc3
+
.p2align 6
sha512_block_data_order_avx2:
+
L$avx2_shortcut:
+ movq %rsp,%rax
+
pushq %rbx
+
pushq %rbp
+
pushq %r12
+
pushq %r13
+
pushq %r14
+
pushq %r15
- movq %rsp,%r11
+
subq $1312,%rsp
shlq $4,%rdx
andq $-2048,%rsp
@@ -4044,7 +4106,8 @@ L$avx2_shortcut:
movq %rdi,128+0(%rsp)
movq %rsi,128+8(%rsp)
movq %rdx,128+16(%rsp)
- movq %r11,128+24(%rsp)
+ movq %rax,152(%rsp)
+
L$prologue_avx2:
vzeroupper
@@ -5351,15 +5414,24 @@ L$ower_avx2:
L$done_avx2:
leaq (%rbp),%rsp
- movq 128+24(%rsp),%rsi
+ movq 152(%rsp),%rsi
+
vzeroupper
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq -48(%rsi),%r15
+
+ movq -40(%rsi),%r14
+
+ movq -32(%rsi),%r13
+
+ movq -24(%rsi),%r12
+
+ movq -16(%rsi),%rbp
+
+ movq -8(%rsi),%rbx
+
+ leaq (%rsi),%rsp
+
L$epilogue_avx2:
.byte 0xf3,0xc3
+