diff options
Diffstat (limited to 'deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha256-x86_64.s')
-rw-r--r-- | deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha256-x86_64.s | 152 |
1 files changed, 112 insertions, 40 deletions
diff --git a/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha256-x86_64.s b/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha256-x86_64.s index 05e973612b..e43cdd7040 100644 --- a/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha256-x86_64.s +++ b/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/sha/sha256-x86_64.s @@ -5,6 +5,7 @@ .p2align 4 _sha256_block_data_order: + leaq _OPENSSL_ia32cap_P(%rip),%r11 movl 0(%r11),%r9d movl 4(%r11),%r10d @@ -21,13 +22,20 @@ _sha256_block_data_order: je L$avx_shortcut testl $512,%r10d jnz L$ssse3_shortcut + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 - movq %rsp,%r11 + shlq $4,%rdx subq $64+32,%rsp leaq (%rsi,%rdx,4),%rdx @@ -35,7 +43,8 @@ _sha256_block_data_order: movq %rdi,64+0(%rsp) movq %rsi,64+8(%rsp) movq %rdx,64+16(%rsp) - movq %r11,64+24(%rsp) + movq %rax,88(%rsp) + L$prologue: movl 0(%rdi),%eax @@ -1699,17 +1708,26 @@ L$rounds_16_xx: movl %r11d,28(%rdi) jb L$loop - movq 64+24(%rsp),%rsi - movq (%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp + movq 88(%rsp),%rsi + + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbp + + movq -8(%rsi),%rbx + + leaq (%rsi),%rsp + L$epilogue: .byte 0xf3,0xc3 + .p2align 6 K256: @@ -1963,14 +1981,22 @@ L$oop_shaext: .p2align 6 sha256_block_data_order_ssse3: + L$ssse3_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 - movq %rsp,%r11 + shlq $4,%rdx subq $96,%rsp leaq (%rsi,%rdx,4),%rdx @@ -1978,7 +2004,8 @@ L$ssse3_shortcut: movq %rdi,64+0(%rsp) movq %rsi,64+8(%rsp) movq %rdx,64+16(%rsp) - movq %r11,64+24(%rsp) + movq %rax,88(%rsp) + L$prologue_ssse3: movl 0(%rdi),%eax @@ -3044,28 +3071,45 @@ L$ssse3_00_47: movl %r11d,28(%rdi) jb L$loop_ssse3 - movq 64+24(%rsp),%rsi - movq (%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp + movq 88(%rsp),%rsi + + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbp + + movq -8(%rsi),%rbx + + leaq (%rsi),%rsp + L$epilogue_ssse3: .byte 0xf3,0xc3 + .p2align 6 sha256_block_data_order_avx: + L$avx_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 - movq %rsp,%r11 + shlq $4,%rdx subq $96,%rsp leaq (%rsi,%rdx,4),%rdx @@ -3073,7 +3117,8 @@ L$avx_shortcut: movq %rdi,64+0(%rsp) movq %rsi,64+8(%rsp) movq %rdx,64+16(%rsp) - movq %r11,64+24(%rsp) + movq %rax,88(%rsp) + L$prologue_avx: vzeroupper @@ -4100,29 +4145,46 @@ L$avx_00_47: movl %r11d,28(%rdi) jb L$loop_avx - movq 64+24(%rsp),%rsi + movq 88(%rsp),%rsi + vzeroupper - movq (%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbp + + movq -8(%rsi),%rbx + + leaq (%rsi),%rsp + L$epilogue_avx: .byte 0xf3,0xc3 + .p2align 6 sha256_block_data_order_avx2: + L$avx2_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 - movq %rsp,%r11 + subq $544,%rsp shlq $4,%rdx andq $-1024,%rsp @@ -4131,7 +4193,8 @@ L$avx2_shortcut: movq %rdi,64+0(%rsp) movq %rsi,64+8(%rsp) movq %rdx,64+16(%rsp) - movq %r11,64+24(%rsp) + movq %rax,88(%rsp) + L$prologue_avx2: vzeroupper @@ -5344,15 +5407,24 @@ L$ower_avx2: L$done_avx2: leaq (%rbp),%rsp - movq 64+24(%rsp),%rsi + movq 88(%rsp),%rsi + vzeroupper - movq (%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbp + + movq -8(%rsi),%rbx + + leaq (%rsi),%rsp + L$epilogue_avx2: .byte 0xf3,0xc3 + |