diff options
Diffstat (limited to 'deps/openssl/config/archs/linux-x86_64/asm/crypto/sha')
6 files changed, 947 insertions, 135 deletions
diff --git a/deps/openssl/config/archs/linux-x86_64/asm/crypto/sha/keccak1600-x86_64.s b/deps/openssl/config/archs/linux-x86_64/asm/crypto/sha/keccak1600-x86_64.s new file mode 100644 index 0000000000..e511f25035 --- /dev/null +++ b/deps/openssl/config/archs/linux-x86_64/asm/crypto/sha/keccak1600-x86_64.s @@ -0,0 +1,522 @@ +.text + +.type __KeccakF1600,@function +.align 32 +__KeccakF1600: + movq 60(%rdi),%rax + movq 68(%rdi),%rbx + movq 76(%rdi),%rcx + movq 84(%rdi),%rdx + movq 92(%rdi),%rbp + jmp .Loop + +.align 32 +.Loop: + movq -100(%rdi),%r8 + movq -52(%rdi),%r9 + movq -4(%rdi),%r10 + movq 44(%rdi),%r11 + + xorq -84(%rdi),%rcx + xorq -76(%rdi),%rdx + xorq %r8,%rax + xorq -92(%rdi),%rbx + xorq -44(%rdi),%rcx + xorq -60(%rdi),%rax + movq %rbp,%r12 + xorq -68(%rdi),%rbp + + xorq %r10,%rcx + xorq -20(%rdi),%rax + xorq -36(%rdi),%rdx + xorq %r9,%rbx + xorq -28(%rdi),%rbp + + xorq 36(%rdi),%rcx + xorq 20(%rdi),%rax + xorq 4(%rdi),%rdx + xorq -12(%rdi),%rbx + xorq 12(%rdi),%rbp + + movq %rcx,%r13 + rolq $1,%rcx + xorq %rax,%rcx + xorq %r11,%rdx + + rolq $1,%rax + xorq %rdx,%rax + xorq 28(%rdi),%rbx + + rolq $1,%rdx + xorq %rbx,%rdx + xorq 52(%rdi),%rbp + + rolq $1,%rbx + xorq %rbp,%rbx + + rolq $1,%rbp + xorq %r13,%rbp + xorq %rcx,%r9 + xorq %rdx,%r10 + rolq $44,%r9 + xorq %rbp,%r11 + xorq %rax,%r12 + rolq $43,%r10 + xorq %rbx,%r8 + movq %r9,%r13 + rolq $21,%r11 + orq %r10,%r9 + xorq %r8,%r9 + rolq $14,%r12 + + xorq (%r15),%r9 + leaq 8(%r15),%r15 + + movq %r12,%r14 + andq %r11,%r12 + movq %r9,-100(%rsi) + xorq %r10,%r12 + notq %r10 + movq %r12,-84(%rsi) + + orq %r11,%r10 + movq 76(%rdi),%r12 + xorq %r13,%r10 + movq %r10,-92(%rsi) + + andq %r8,%r13 + movq -28(%rdi),%r9 + xorq %r14,%r13 + movq -20(%rdi),%r10 + movq %r13,-68(%rsi) + + orq %r8,%r14 + movq -76(%rdi),%r8 + xorq %r11,%r14 + movq 28(%rdi),%r11 + movq %r14,-76(%rsi) + + + xorq %rbp,%r8 + xorq %rdx,%r12 + rolq $28,%r8 + xorq %rcx,%r11 + xorq %rax,%r9 + rolq $61,%r12 + rolq $45,%r11 + xorq %rbx,%r10 + rolq $20,%r9 + movq %r8,%r13 + orq %r12,%r8 + rolq $3,%r10 + + xorq %r11,%r8 + movq %r8,-36(%rsi) + + movq %r9,%r14 + andq %r13,%r9 + movq -92(%rdi),%r8 + xorq %r12,%r9 + notq %r12 + movq %r9,-28(%rsi) + + orq %r11,%r12 + movq -44(%rdi),%r9 + xorq %r10,%r12 + movq %r12,-44(%rsi) + + andq %r10,%r11 + movq 60(%rdi),%r12 + xorq %r14,%r11 + movq %r11,-52(%rsi) + + orq %r10,%r14 + movq 4(%rdi),%r10 + xorq %r13,%r14 + movq 52(%rdi),%r11 + movq %r14,-60(%rsi) + + + xorq %rbp,%r10 + xorq %rax,%r11 + rolq $25,%r10 + xorq %rdx,%r9 + rolq $8,%r11 + xorq %rbx,%r12 + rolq $6,%r9 + xorq %rcx,%r8 + rolq $18,%r12 + movq %r10,%r13 + andq %r11,%r10 + rolq $1,%r8 + + notq %r11 + xorq %r9,%r10 + movq %r10,-12(%rsi) + + movq %r12,%r14 + andq %r11,%r12 + movq -12(%rdi),%r10 + xorq %r13,%r12 + movq %r12,-4(%rsi) + + orq %r9,%r13 + movq 84(%rdi),%r12 + xorq %r8,%r13 + movq %r13,-20(%rsi) + + andq %r8,%r9 + xorq %r14,%r9 + movq %r9,12(%rsi) + + orq %r8,%r14 + movq -60(%rdi),%r9 + xorq %r11,%r14 + movq 36(%rdi),%r11 + movq %r14,4(%rsi) + + + movq -68(%rdi),%r8 + + xorq %rcx,%r10 + xorq %rdx,%r11 + rolq $10,%r10 + xorq %rbx,%r9 + rolq $15,%r11 + xorq %rbp,%r12 + rolq $36,%r9 + xorq %rax,%r8 + rolq $56,%r12 + movq %r10,%r13 + orq %r11,%r10 + rolq $27,%r8 + + notq %r11 + xorq %r9,%r10 + movq %r10,28(%rsi) + + movq %r12,%r14 + orq %r11,%r12 + xorq %r13,%r12 + movq %r12,36(%rsi) + + andq %r9,%r13 + xorq %r8,%r13 + movq %r13,20(%rsi) + + orq %r8,%r9 + xorq %r14,%r9 + movq %r9,52(%rsi) + + andq %r14,%r8 + xorq %r11,%r8 + movq %r8,44(%rsi) + + + xorq -84(%rdi),%rdx + xorq -36(%rdi),%rbp + rolq $62,%rdx + xorq 68(%rdi),%rcx + rolq $55,%rbp + xorq 12(%rdi),%rax + rolq $2,%rcx + xorq 20(%rdi),%rbx + xchgq %rsi,%rdi + rolq $39,%rax + rolq $41,%rbx + movq %rdx,%r13 + andq %rbp,%rdx + notq %rbp + xorq %rcx,%rdx + movq %rdx,92(%rdi) + + movq %rax,%r14 + andq %rbp,%rax + xorq %r13,%rax + movq %rax,60(%rdi) + + orq %rcx,%r13 + xorq %rbx,%r13 + movq %r13,84(%rdi) + + andq %rbx,%rcx + xorq %r14,%rcx + movq %rcx,76(%rdi) + + orq %r14,%rbx + xorq %rbp,%rbx + movq %rbx,68(%rdi) + + movq %rdx,%rbp + movq %r13,%rdx + + testq $255,%r15 + jnz .Loop + + leaq -192(%r15),%r15 + .byte 0xf3,0xc3 +.size __KeccakF1600,.-__KeccakF1600 + +.type KeccakF1600,@function +.align 32 +KeccakF1600: +.cfi_startproc + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + + leaq 100(%rdi),%rdi + subq $200,%rsp +.cfi_adjust_cfa_offset 200 + + notq -92(%rdi) + notq -84(%rdi) + notq -36(%rdi) + notq -4(%rdi) + notq 36(%rdi) + notq 60(%rdi) + + leaq iotas(%rip),%r15 + leaq 100(%rsp),%rsi + + call __KeccakF1600 + + notq -92(%rdi) + notq -84(%rdi) + notq -36(%rdi) + notq -4(%rdi) + notq 36(%rdi) + notq 60(%rdi) + leaq -100(%rdi),%rdi + + addq $200,%rsp +.cfi_adjust_cfa_offset -200 + + popq %r15 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r15 + popq %r14 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r14 + popq %r13 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r13 + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r12 + popq %rbp +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbp + popq %rbx +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbx + .byte 0xf3,0xc3 +.cfi_endproc +.size KeccakF1600,.-KeccakF1600 +.globl SHA3_absorb +.type SHA3_absorb,@function +.align 32 +SHA3_absorb: +.cfi_startproc + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + + leaq 100(%rdi),%rdi + subq $232,%rsp +.cfi_adjust_cfa_offset 232 + + movq %rsi,%r9 + leaq 100(%rsp),%rsi + + notq -92(%rdi) + notq -84(%rdi) + notq -36(%rdi) + notq -4(%rdi) + notq 36(%rdi) + notq 60(%rdi) + leaq iotas(%rip),%r15 + + movq %rcx,216-100(%rsi) + +.Loop_absorb: + cmpq %rcx,%rdx + jc .Ldone_absorb + + shrq $3,%rcx + leaq -100(%rdi),%r8 + +.Lblock_absorb: + movq (%r9),%rax + leaq 8(%r9),%r9 + xorq (%r8),%rax + leaq 8(%r8),%r8 + subq $8,%rdx + movq %rax,-8(%r8) + subq $1,%rcx + jnz .Lblock_absorb + + movq %r9,200-100(%rsi) + movq %rdx,208-100(%rsi) + call __KeccakF1600 + movq 200-100(%rsi),%r9 + movq 208-100(%rsi),%rdx + movq 216-100(%rsi),%rcx + jmp .Loop_absorb + +.align 32 +.Ldone_absorb: + movq %rdx,%rax + + notq -92(%rdi) + notq -84(%rdi) + notq -36(%rdi) + notq -4(%rdi) + notq 36(%rdi) + notq 60(%rdi) + + addq $232,%rsp +.cfi_adjust_cfa_offset -232 + + popq %r15 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r15 + popq %r14 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r14 + popq %r13 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r13 + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r12 + popq %rbp +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbp + popq %rbx +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbx + .byte 0xf3,0xc3 +.cfi_endproc +.size SHA3_absorb,.-SHA3_absorb +.globl SHA3_squeeze +.type SHA3_squeeze,@function +.align 32 +SHA3_squeeze: +.cfi_startproc + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-16 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-24 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-32 + + shrq $3,%rcx + movq %rdi,%r8 + movq %rsi,%r12 + movq %rdx,%r13 + movq %rcx,%r14 + jmp .Loop_squeeze + +.align 32 +.Loop_squeeze: + cmpq $8,%r13 + jb .Ltail_squeeze + + movq (%r8),%rax + leaq 8(%r8),%r8 + movq %rax,(%r12) + leaq 8(%r12),%r12 + subq $8,%r13 + jz .Ldone_squeeze + + subq $1,%rcx + jnz .Loop_squeeze + + call KeccakF1600 + movq %rdi,%r8 + movq %r14,%rcx + jmp .Loop_squeeze + +.Ltail_squeeze: + movq %r8,%rsi + movq %r12,%rdi + movq %r13,%rcx +.byte 0xf3,0xa4 + +.Ldone_squeeze: + popq %r14 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r14 + popq %r13 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r13 + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r13 + .byte 0xf3,0xc3 +.cfi_endproc +.size SHA3_squeeze,.-SHA3_squeeze +.align 256 +.quad 0,0,0,0,0,0,0,0 +.type iotas,@object +iotas: +.quad 0x0000000000000001 +.quad 0x0000000000008082 +.quad 0x800000000000808a +.quad 0x8000000080008000 +.quad 0x000000000000808b +.quad 0x0000000080000001 +.quad 0x8000000080008081 +.quad 0x8000000000008009 +.quad 0x000000000000008a +.quad 0x0000000000000088 +.quad 0x0000000080008009 +.quad 0x000000008000000a +.quad 0x000000008000808b +.quad 0x800000000000008b +.quad 0x8000000000008089 +.quad 0x8000000000008003 +.quad 0x8000000000008002 +.quad 0x8000000000000080 +.quad 0x000000000000800a +.quad 0x800000008000000a +.quad 0x8000000080008081 +.quad 0x8000000000008080 +.quad 0x0000000080000001 +.quad 0x8000000080008008 +.size iotas,.-iotas +.byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 diff --git a/deps/openssl/config/archs/linux-x86_64/asm/crypto/sha/sha1-mb-x86_64.s b/deps/openssl/config/archs/linux-x86_64/asm/crypto/sha/sha1-mb-x86_64.s index d266d776ec..1a0de0f100 100644 --- a/deps/openssl/config/archs/linux-x86_64/asm/crypto/sha/sha1-mb-x86_64.s +++ b/deps/openssl/config/archs/linux-x86_64/asm/crypto/sha/sha1-mb-x86_64.s @@ -6,17 +6,22 @@ .type sha1_multi_block,@function .align 32 sha1_multi_block: +.cfi_startproc movq OPENSSL_ia32cap_P+4(%rip),%rcx btq $61,%rcx jc _shaext_shortcut testl $268435456,%ecx jnz _avx_shortcut movq %rsp,%rax +.cfi_def_cfa_register %rax pushq %rbx +.cfi_offset %rbx,-16 pushq %rbp +.cfi_offset %rbx,-24 subq $288,%rsp andq $-256,%rsp movq %rax,272(%rsp) +.cfi_escape 0x0f,0x06,0x77,0x90,0x02,0x06,0x23,0x08 .Lbody: leaq K_XX_XX(%rip),%rbp leaq 256(%rsp),%rbx @@ -2546,19 +2551,28 @@ sha1_multi_block: .Ldone: movq 272(%rsp),%rax +.cfi_def_cfa %rax,8 movq -16(%rax),%rbp +.cfi_restore %rbp movq -8(%rax),%rbx +.cfi_restore %rbx leaq (%rax),%rsp +.cfi_def_cfa_register %rsp .Lepilogue: .byte 0xf3,0xc3 +.cfi_endproc .size sha1_multi_block,.-sha1_multi_block .type sha1_multi_block_shaext,@function .align 32 sha1_multi_block_shaext: +.cfi_startproc _shaext_shortcut: movq %rsp,%rax +.cfi_def_cfa_register %rax pushq %rbx +.cfi_offset %rbx,-16 pushq %rbp +.cfi_offset %rbp,-24 subq $288,%rsp shll $1,%edx andq $-256,%rsp @@ -2914,14 +2928,19 @@ _shaext_shortcut: .Ldone_shaext: movq -16(%rax),%rbp +.cfi_restore %rbp movq -8(%rax),%rbx +.cfi_restore %rbx leaq (%rax),%rsp +.cfi_def_cfa_register %rsp .Lepilogue_shaext: .byte 0xf3,0xc3 +.cfi_endproc .size sha1_multi_block_shaext,.-sha1_multi_block_shaext .type sha1_multi_block_avx,@function .align 32 sha1_multi_block_avx: +.cfi_startproc _avx_shortcut: shrq $32,%rcx cmpl $2,%edx @@ -2932,11 +2951,15 @@ _avx_shortcut: .align 32 .Lavx: movq %rsp,%rax +.cfi_def_cfa_register %rax pushq %rbx +.cfi_offset %rbx,-16 pushq %rbp +.cfi_offset %rbp,-24 subq $288,%rsp andq $-256,%rsp movq %rax,272(%rsp) +.cfi_escape 0x0f,0x06,0x77,0x90,0x02,0x06,0x23,0x08 .Lbody_avx: leaq K_XX_XX(%rip),%rbp leaq 256(%rsp),%rbx @@ -4986,27 +5009,41 @@ _avx_shortcut: .Ldone_avx: movq 272(%rsp),%rax +.cfi_def_cfa %rax,8 vzeroupper movq -16(%rax),%rbp +.cfi_restore %rbp movq -8(%rax),%rbx +.cfi_restore %rbx leaq (%rax),%rsp +.cfi_def_cfa_register %rsp .Lepilogue_avx: .byte 0xf3,0xc3 +.cfi_endproc .size sha1_multi_block_avx,.-sha1_multi_block_avx .type sha1_multi_block_avx2,@function .align 32 sha1_multi_block_avx2: +.cfi_startproc _avx2_shortcut: movq %rsp,%rax +.cfi_def_cfa_register %rax pushq %rbx +.cfi_offset %rbx,-16 pushq %rbp +.cfi_offset %rbp,-24 pushq %r12 +.cfi_offset %r12,-32 pushq %r13 +.cfi_offset %r13,-40 pushq %r14 +.cfi_offset %r14,-48 pushq %r15 +.cfi_offset %r15,-56 subq $576,%rsp andq $-256,%rsp movq %rax,544(%rsp) +.cfi_escape 0x0f,0x06,0x77,0xa0,0x04,0x06,0x23,0x08 .Lbody_avx2: leaq K_XX_XX(%rip),%rbp shrl $1,%edx @@ -7193,16 +7230,25 @@ _avx2_shortcut: .Ldone_avx2: movq 544(%rsp),%rax +.cfi_def_cfa %rax,8 vzeroupper movq -48(%rax),%r15 +.cfi_restore %r15 movq -40(%rax),%r14 +.cfi_restore %r14 movq -32(%rax),%r13 +.cfi_restore %r13 movq -24(%rax),%r12 +.cfi_restore %r12 movq -16(%rax),%rbp +.cfi_restore %rbp movq -8(%rax),%rbx +.cfi_restore %rbx leaq (%rax),%rsp +.cfi_def_cfa_register %rsp .Lepilogue_avx2: .byte 0xf3,0xc3 +.cfi_endproc .size sha1_multi_block_avx2,.-sha1_multi_block_avx2 .align 256 diff --git a/deps/openssl/config/archs/linux-x86_64/asm/crypto/sha/sha1-x86_64.s b/deps/openssl/config/archs/linux-x86_64/asm/crypto/sha/sha1-x86_64.s index dbeebed9a0..e436521a04 100644 --- a/deps/openssl/config/archs/linux-x86_64/asm/crypto/sha/sha1-x86_64.s +++ b/deps/openssl/config/archs/linux-x86_64/asm/crypto/sha/sha1-x86_64.s @@ -5,6 +5,7 @@ .type sha1_block_data_order,@function .align 16 sha1_block_data_order: +.cfi_startproc movl OPENSSL_ia32cap_P+0(%rip),%r9d movl OPENSSL_ia32cap_P+4(%rip),%r8d movl OPENSSL_ia32cap_P+8(%rip),%r10d @@ -25,17 +26,24 @@ sha1_block_data_order: .align 16 .Lialu: movq %rsp,%rax +.cfi_def_cfa_register %rax pushq %rbx +.cfi_offset %rbx,-16 pushq %rbp +.cfi_offset %rbp,-24 pushq %r12 +.cfi_offset %r12,-32 pushq %r13 +.cfi_offset %r13,-40 pushq %r14 +.cfi_offset %r14,-48 movq %rdi,%r8 subq $72,%rsp movq %rsi,%r9 andq $-64,%rsp movq %rdx,%r10 movq %rax,64(%rsp) +.cfi_escape 0x0f,0x06,0x77,0xc0,0x00,0x06,0x23,0x08 .Lprologue: movl 0(%r8),%esi @@ -1230,19 +1238,28 @@ sha1_block_data_order: jnz .Lloop movq 64(%rsp),%rsi +.cfi_def_cfa %rsi,8 movq -40(%rsi),%r14 +.cfi_restore %r14 movq -32(%rsi),%r13 +.cfi_restore %r13 movq -24(%rsi),%r12 +.cfi_restore %r12 movq -16(%rsi),%rbp +.cfi_restore %rbp movq -8(%rsi),%rbx +.cfi_restore %rbx leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lepilogue: .byte 0xf3,0xc3 +.cfi_endproc .size sha1_block_data_order,.-sha1_block_data_order .type sha1_block_data_order_shaext,@function .align 32 sha1_block_data_order_shaext: _shaext_shortcut: +.cfi_startproc movdqu (%rdi),%xmm0 movd 16(%rdi),%xmm1 movdqa K_XX_XX+160(%rip),%xmm3 @@ -1404,20 +1421,27 @@ _shaext_shortcut: pshufd $27,%xmm1,%xmm1 movdqu %xmm0,(%rdi) movd %xmm1,16(%rdi) +.cfi_endproc .byte 0xf3,0xc3 .size sha1_block_data_order_shaext,.-sha1_block_data_order_shaext .type sha1_block_data_order_ssse3,@function .align 16 sha1_block_data_order_ssse3: _ssse3_shortcut: - movq %rsp,%rax +.cfi_startproc + movq %rsp,%r11 +.cfi_def_cfa_register %r11 pushq %rbx +.cfi_offset %rbx,-16 pushq %rbp +.cfi_offset %rbp,-24 pushq %r12 +.cfi_offset %r12,-32 pushq %r13 +.cfi_offset %r13,-40 pushq %r14 +.cfi_offset %r14,-48 leaq -64(%rsp),%rsp - movq %rax,%r14 andq $-64,%rsp movq %rdi,%r8 movq %rsi,%r9 @@ -1425,7 +1449,7 @@ _ssse3_shortcut: shlq $6,%r10 addq %r9,%r10 - leaq K_XX_XX+64(%rip),%r11 + leaq K_XX_XX+64(%rip),%r14 movl 0(%r8),%eax movl 4(%r8),%ebx @@ -1437,8 +1461,8 @@ _ssse3_shortcut: xorl %edx,%edi andl %edi,%esi - movdqa 64(%r11),%xmm6 - movdqa -64(%r11),%xmm9 + movdqa 64(%r14),%xmm6 + movdqa -64(%r14),%xmm9 movdqu 0(%r9),%xmm0 movdqu 16(%r9),%xmm1 movdqu 32(%r9),%xmm2 @@ -1514,7 +1538,7 @@ _ssse3_shortcut: pslld $2,%xmm9 pxor %xmm10,%xmm4 xorl %ebp,%edx - movdqa -64(%r11),%xmm10 + movdqa -64(%r14),%xmm10 roll $5,%ecx addl %edi,%ebx andl %edx,%esi @@ -1575,7 +1599,7 @@ _ssse3_shortcut: pslld $2,%xmm10 pxor %xmm8,%xmm5 xorl %eax,%ebp - movdqa -32(%r11),%xmm8 + movdqa -32(%r14),%xmm8 roll $5,%edx addl %edi,%ecx andl %ebp,%esi @@ -1636,7 +1660,7 @@ _ssse3_shortcut: pslld $2,%xmm8 pxor %xmm9,%xmm6 xorl %ebx,%eax - movdqa -32(%r11),%xmm9 + movdqa -32(%r14),%xmm9 roll $5,%ebp addl %edi,%edx andl %eax,%esi @@ -1697,7 +1721,7 @@ _ssse3_shortcut: pslld $2,%xmm9 pxor %xmm10,%xmm7 xorl %ecx,%ebx - movdqa -32(%r11),%xmm10 + movdqa -32(%r14),%xmm10 roll $5,%eax addl %edi,%ebp andl %ebx,%esi @@ -1808,7 +1832,7 @@ _ssse3_shortcut: pxor %xmm3,%xmm2 addl %esi,%eax xorl %edx,%edi - movdqa 0(%r11),%xmm10 + movdqa 0(%r14),%xmm10 rorl $7,%ecx paddd %xmm1,%xmm9 addl %ebx,%eax @@ -2043,7 +2067,7 @@ _ssse3_shortcut: pxor %xmm0,%xmm7 roll $5,%ebx addl %esi,%eax - movdqa 32(%r11),%xmm9 + movdqa 32(%r14),%xmm9 xorl %ecx,%edi paddd %xmm6,%xmm8 xorl %edx,%ecx @@ -2334,8 +2358,8 @@ _ssse3_shortcut: addl %edx,%ecx cmpq %r10,%r9 je .Ldone_ssse3 - movdqa 64(%r11),%xmm6 - movdqa -64(%r11),%xmm9 + movdqa 64(%r14),%xmm6 + movdqa -64(%r14),%xmm9 movdqu 0(%r9),%xmm0 movdqu 16(%r9),%xmm1 movdqu 32(%r9),%xmm2 @@ -2572,29 +2596,41 @@ _ssse3_shortcut: movl %ecx,8(%r8) movl %edx,12(%r8) movl %ebp,16(%r8) - leaq (%r14),%rsi - movq -40(%rsi),%r14 - movq -32(%rsi),%r13 - movq -24(%rsi),%r12 - movq -16(%rsi),%rbp - movq -8(%rsi),%rbx - leaq (%rsi),%rsp + movq -40(%r11),%r14 +.cfi_restore %r14 + movq -32(%r11),%r13 +.cfi_restore %r13 + movq -24(%r11),%r12 +.cfi_restore %r12 + movq -16(%r11),%rbp +.cfi_restore %rbp + movq -8(%r11),%rbx +.cfi_restore %rbx + leaq (%r11),%rsp +.cfi_def_cfa_register %rsp .Lepilogue_ssse3: .byte 0xf3,0xc3 +.cfi_endproc .size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3 .type sha1_block_data_order_avx,@function .align 16 sha1_block_data_order_avx: _avx_shortcut: - movq %rsp,%rax +.cfi_startproc + movq %rsp,%r11 +.cfi_def_cfa_register %r11 pushq %rbx +.cfi_offset %rbx,-16 pushq %rbp +.cfi_offset %rbp,-24 pushq %r12 +.cfi_offset %r12,-32 pushq %r13 +.cfi_offset %r13,-40 pushq %r14 +.cfi_offset %r14,-48 leaq -64(%rsp),%rsp vzeroupper - movq %rax,%r14 andq $-64,%rsp movq %rdi,%r8 movq %rsi,%r9 @@ -2602,7 +2638,7 @@ _avx_shortcut: shlq $6,%r10 addq %r9,%r10 - leaq K_XX_XX+64(%rip),%r11 + leaq K_XX_XX+64(%rip),%r14 movl 0(%r8),%eax movl 4(%r8),%ebx @@ -2614,8 +2650,8 @@ _avx_shortcut: xorl %edx,%edi andl %edi,%esi - vmovdqa 64(%r11),%xmm6 - vmovdqa -64(%r11),%xmm11 + vmovdqa 64(%r14),%xmm6 + vmovdqa -64(%r14),%xmm11 vmovdqu 0(%r9),%xmm0 vmovdqu 16(%r9),%xmm1 vmovdqu 32(%r9),%xmm2 @@ -2740,7 +2776,7 @@ _avx_shortcut: vpxor %xmm10,%xmm5,%xmm5 xorl %eax,%ebp shldl $5,%edx,%edx - vmovdqa -32(%r11),%xmm11 + vmovdqa -32(%r14),%xmm11 addl %edi,%ecx andl %ebp,%esi xorl %eax,%ebp @@ -2953,7 +2989,7 @@ _avx_shortcut: addl %esi,%eax xorl %edx,%edi vpaddd %xmm1,%xmm11,%xmm9 - vmovdqa 0(%r11),%xmm11 + vmovdqa 0(%r14),%xmm11 shrdl $7,%ecx,%ecx addl %ebx,%eax vpxor %xmm8,%xmm2,%xmm2 @@ -3172,7 +3208,7 @@ _avx_shortcut: movl %ebx,%edi xorl %edx,%esi vpaddd %xmm6,%xmm11,%xmm9 - vmovdqa 32(%r11),%xmm11 + vmovdqa 32(%r14),%xmm11 shldl $5,%ebx,%ebx addl %esi,%eax vpxor %xmm8,%xmm7,%xmm7 @@ -3451,8 +3487,8 @@ _avx_shortcut: addl %edx,%ecx cmpq %r10,%r9 je .Ldone_avx - vmovdqa 64(%r11),%xmm6 - vmovdqa -64(%r11),%xmm11 + vmovdqa 64(%r14),%xmm6 + vmovdqa -64(%r14),%xmm11 vmovdqu 0(%r9),%xmm0 vmovdqu 16(%r9),%xmm1 vmovdqu 32(%r9),%xmm2 @@ -3688,28 +3724,40 @@ _avx_shortcut: movl %ecx,8(%r8) movl %edx,12(%r8) movl %ebp,16(%r8) - leaq (%r14),%rsi - movq -40(%rsi),%r14 - movq -32(%rsi),%r13 - movq -24(%rsi),%r12 - movq -16(%rsi),%rbp - movq -8(%rsi),%rbx - leaq (%rsi),%rsp + movq -40(%r11),%r14 +.cfi_restore %r14 + movq -32(%r11),%r13 +.cfi_restore %r13 + movq -24(%r11),%r12 +.cfi_restore %r12 + movq -16(%r11),%rbp +.cfi_restore %rbp + movq -8(%r11),%rbx +.cfi_restore %rbx + leaq (%r11),%rsp +.cfi_def_cfa_register %rsp .Lepilogue_avx: .byte 0xf3,0xc3 +.cfi_endproc .size sha1_block_data_order_avx,.-sha1_block_data_order_avx .type sha1_block_data_order_avx2,@function .align 16 sha1_block_data_order_avx2: _avx2_shortcut: - movq %rsp,%rax +.cfi_startproc + movq %rsp,%r11 +.cfi_def_cfa_register %r11 pushq %rbx +.cfi_offset %rbx,-16 pushq %rbp +.cfi_offset %rbp,-24 pushq %r12 +.cfi_offset %r12,-32 pushq %r13 +.cfi_offset %r13,-40 pushq %r14 +.cfi_offset %r14,-48 vzeroupper - movq %rax,%r14 movq %rdi,%r8 movq %rsi,%r9 movq %rdx,%r10 @@ -3719,7 +3767,7 @@ _avx2_shortcut: leaq 64(%r9),%r13 andq $-128,%rsp addq %r9,%r10 - leaq K_XX_XX+64(%rip),%r11 + leaq K_XX_XX+64(%rip),%r14 movl 0(%r8),%eax cmpq %r10,%r13 @@ -3728,7 +3776,7 @@ _avx2_shortcut: movl 8(%r8),%ecx movl 12(%r8),%edx movl 16(%r8),%esi - vmovdqu 64(%r11),%ymm6 + vmovdqu 64(%r14),%ymm6 vmovdqu (%r9),%xmm0 vmovdqu 16(%r9),%xmm1 @@ -3742,7 +3790,7 @@ _avx2_shortcut: vpshufb %ymm6,%ymm1,%ymm1 vinserti128 $1,48(%r13),%ymm3,%ymm3 vpshufb %ymm6,%ymm2,%ymm2 - vmovdqu -64(%r11),%ymm11 + vmovdqu -64(%r14),%ymm11 vpshufb %ymm6,%ymm3,%ymm3 vpaddd %ymm11,%ymm0,%ymm4 @@ -3774,7 +3822,7 @@ _avx2_shortcut: vpxor %ymm3,%ymm8,%ymm8 vpxor %ymm8,%ymm5,%ymm5 vpsrld $31,%ymm5,%ymm8 - vmovdqu -32(%r11),%ymm11 + vmovdqu -32(%r14),%ymm11 vpslldq $12,%ymm5,%ymm10 vpaddd %ymm5,%ymm5,%ymm5 vpsrld $30,%ymm10,%ymm9 @@ -3928,7 +3976,7 @@ _avx2_shortcut: addl -56(%r13),%ebp andnl %esi,%ebx,%edi vpxor %ymm3,%ymm2,%ymm2 - vmovdqu 0(%r11),%ymm11 + vmovdqu 0(%r14),%ymm11 addl %ecx,%ebp rorxl $27,%ebx,%r12d rorxl $2,%ebx,%ecx @@ -4159,7 +4207,7 @@ _avx2_shortcut: addl -116(%r13),%eax leal (%rax,%rbx,1),%eax vpxor %ymm0,%ymm7,%ymm7 - vmovdqu 32(%r11),%ymm11 + vmovdqu 32(%r14),%ymm11 rorxl $27,%ebp,%r12d rorxl $2,%ebp,%ebx xorl %ecx,%ebp @@ -4604,7 +4652,7 @@ _avx2_shortcut: cmpq %r10,%r9 je .Ldone_avx2 - vmovdqu 64(%r11),%ymm6 + vmovdqu 64(%r14),%ymm6 cmpq %r10,%rdi ja .Last_avx2 @@ -4820,7 +4868,7 @@ _avx2_shortcut: xorl %ebx,%eax addl %r12d,%esi xorl %ecx,%eax - vmovdqu -64(%r11),%ymm11 + vmovdqu -64(%r14),%ymm11 vpshufb %ymm6,%ymm0,%ymm0 addl 68(%r13),%edx leal (%rdx,%rax,1),%edx @@ -5176,7 +5224,7 @@ _avx2_shortcut: xorl %ebp,%esi addl %r12d,%edx vpsrld $31,%ymm5,%ymm8 - vmovdqu -32(%r11),%ymm11 + vmovdqu -32(%r14),%ymm11 xorl %ebx,%esi addl 104(%r13),%ecx leal (%rcx,%rsi,1),%ecx @@ -5369,15 +5417,21 @@ _avx2_shortcut: .Ldone_avx2: vzeroupper - leaq (%r14),%rsi - movq -40(%rsi),%r14 - movq -32(%rsi),%r13 - movq -24(%rsi),%r12 - movq -16(%rsi),%rbp - movq -8(%rsi),%rbx - leaq (%rsi),%rsp + movq -40(%r11),%r14 +.cfi_restore %r14 + movq -32(%r11),%r13 +.cfi_restore %r13 + movq -24(%r11),%r12 +.cfi_restore %r12 + movq -16(%r11),%rbp +.cfi_restore %rbp + movq -8(%r11),%rbx +.cfi_restore %rbx + leaq (%r11),%rsp +.cfi_def_cfa_register %rsp .Lepilogue_avx2: .byte 0xf3,0xc3 +.cfi_endproc .size sha1_block_data_order_avx2,.-sha1_block_data_order_avx2 .align 64 K_XX_XX: diff --git a/deps/openssl/config/archs/linux-x86_64/asm/crypto/sha/sha256-mb-x86_64.s b/deps/openssl/config/archs/linux-x86_64/asm/crypto/sha/sha256-mb-x86_64.s index f2896b4d6e..59cf9c984e 100644 --- a/deps/openssl/config/archs/linux-x86_64/asm/crypto/sha/sha256-mb-x86_64.s +++ b/deps/openssl/config/archs/linux-x86_64/asm/crypto/sha/sha256-mb-x86_64.s @@ -6,17 +6,22 @@ .type sha256_multi_block,@function .align 32 sha256_multi_block: +.cfi_startproc movq OPENSSL_ia32cap_P+4(%rip),%rcx btq $61,%rcx jc _shaext_shortcut testl $268435456,%ecx jnz _avx_shortcut movq %rsp,%rax +.cfi_def_cfa_register %rax pushq %rbx +.cfi_offset %rbx,-16 pushq %rbp +.cfi_offset %rbp,-24 subq $288,%rsp andq $-256,%rsp movq %rax,272(%rsp) +.cfi_escape 0x0f,0x06,0x77,0x90,0x02,0x06,0x23,0x08 .Lbody: leaq K256+128(%rip),%rbp leaq 256(%rsp),%rbx @@ -2615,19 +2620,28 @@ sha256_multi_block: .Ldone: movq 272(%rsp),%rax +.cfi_def_cfa %rax,8 movq -16(%rax),%rbp +.cfi_restore %rbp movq -8(%rax),%rbx +.cfi_restore %rbx leaq (%rax),%rsp +.cfi_def_cfa_register %rsp .Lepilogue: .byte 0xf3,0xc3 +.cfi_endproc .size sha256_multi_block,.-sha256_multi_block .type sha256_multi_block_shaext,@function .align 32 sha256_multi_block_shaext: +.cfi_startproc _shaext_shortcut: movq %rsp,%rax +.cfi_def_cfa_register %rax pushq %rbx +.cfi_offset %rbx,-16 pushq %rbp +.cfi_offset %rbp,-24 subq $288,%rsp shll $1,%edx andq $-256,%rsp @@ -3102,14 +3116,19 @@ _shaext_shortcut: .Ldone_shaext: movq -16(%rax),%rbp +.cfi_restore %rbp movq -8(%rax),%rbx +.cfi_restore %rbx leaq (%rax),%rsp +.cfi_def_cfa_register %rsp .Lepilogue_shaext: .byte 0xf3,0xc3 +.cfi_endproc .size sha256_multi_block_shaext,.-sha256_multi_block_shaext .type sha256_multi_block_avx,@function .align 32 sha256_multi_block_avx: +.cfi_startproc _avx_shortcut: shrq $32,%rcx cmpl $2,%edx @@ -3120,11 +3139,15 @@ _avx_shortcut: .align 32 .Lavx: movq %rsp,%rax +.cfi_def_cfa_register %rax pushq %rbx +.cfi_offset %rbx,-16 pushq %rbp +.cfi_offset %rbp,-24 subq $288,%rsp andq $-256,%rsp movq %rax,272(%rsp) +.cfi_escape 0x0f,0x06,0x77,0x90,0x02,0x06,0x23,0x08 .Lbody_avx: leaq K256+128(%rip),%rbp leaq 256(%rsp),%rbx @@ -5353,27 +5376,41 @@ _avx_shortcut: .Ldone_avx: movq 272(%rsp),%rax +.cfi_def_cfa %rax,8 vzeroupper movq -16(%rax),%rbp +.cfi_restore %rbp movq -8(%rax),%rbx +.cfi_restore %rbx leaq (%rax),%rsp +.cfi_def_cfa_register %rsp .Lepilogue_avx: .byte 0xf3,0xc3 +.cfi_endproc .size sha256_multi_block_avx,.-sha256_multi_block_avx .type sha256_multi_block_avx2,@function .align 32 sha256_multi_block_avx2: +.cfi_startproc _avx2_shortcut: movq %rsp,%rax +.cfi_def_cfa_register %rax pushq %rbx +.cfi_offset %rbx,-16 pushq %rbp +.cfi_offset %rbp,-24 pushq %r12 +.cfi_offset %r12,-32 pushq %r13 +.cfi_offset %r13,-40 pushq %r14 +.cfi_offset %r14,-48 pushq %r15 +.cfi_offset %r15,-56 subq $576,%rsp andq $-256,%rsp movq %rax,544(%rsp) +.cfi_escape 0x0f,0x06,0x77,0xa0,0x04,0x06,0x23,0x08 .Lbody_avx2: leaq K256+128(%rip),%rbp leaq 128(%rdi),%rdi @@ -7738,16 +7775,25 @@ _avx2_shortcut: .Ldone_avx2: movq 544(%rsp),%rax +.cfi_def_cfa %rax,8 vzeroupper movq -48(%rax),%r15 +.cfi_restore %r15 movq -40(%rax),%r14 +.cfi_restore %r14 movq -32(%rax),%r13 +.cfi_restore %r13 movq -24(%rax),%r12 +.cfi_restore %r12 movq -16(%rax),%rbp +.cfi_restore %rbp movq -8(%rax),%rbx +.cfi_restore %rbx leaq (%rax),%rsp +.cfi_def_cfa_register %rsp .Lepilogue_avx2: .byte 0xf3,0xc3 +.cfi_endproc .size sha256_multi_block_avx2,.-sha256_multi_block_avx2 .align 256 K256: diff --git a/deps/openssl/config/archs/linux-x86_64/asm/crypto/sha/sha256-x86_64.s b/deps/openssl/config/archs/linux-x86_64/asm/crypto/sha/sha256-x86_64.s index 8264a7dbdf..42b24df18e 100644 --- a/deps/openssl/config/archs/linux-x86_64/asm/crypto/sha/sha256-x86_64.s +++ b/deps/openssl/config/archs/linux-x86_64/asm/crypto/sha/sha256-x86_64.s @@ -5,6 +5,7 @@ .type sha256_block_data_order,@function .align 16 sha256_block_data_order: +.cfi_startproc leaq OPENSSL_ia32cap_P(%rip),%r11 movl 0(%r11),%r9d movl 4(%r11),%r10d @@ -21,13 +22,20 @@ sha256_block_data_order: je .Lavx_shortcut testl $512,%r10d jnz .Lssse3_shortcut + movq %rsp,%rax +.cfi_def_cfa_register %rax pushq %rbx +.cfi_offset %rbx,-16 pushq %rbp +.cfi_offset %rbp,-24 pushq %r12 +.cfi_offset %r12,-32 pushq %r13 +.cfi_offset %r13,-40 pushq %r14 +.cfi_offset %r14,-48 pushq %r15 - movq %rsp,%r11 +.cfi_offset %r15,-56 shlq $4,%rdx subq $64+32,%rsp leaq (%rsi,%rdx,4),%rdx @@ -35,7 +43,8 @@ sha256_block_data_order: movq %rdi,64+0(%rsp) movq %rsi,64+8(%rsp) movq %rdx,64+16(%rsp) - movq %r11,64+24(%rsp) + movq %rax,88(%rsp) +.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08 .Lprologue: movl 0(%rdi),%eax @@ -1699,16 +1708,25 @@ sha256_block_data_order: movl %r11d,28(%rdi) jb .Lloop - movq 64+24(%rsp),%rsi - movq (%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp + movq 88(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lepilogue: .byte 0xf3,0xc3 +.cfi_endproc .size sha256_block_data_order,.-sha256_block_data_order .align 64 .type K256,@object @@ -1963,14 +1981,22 @@ _shaext_shortcut: .type sha256_block_data_order_ssse3,@function .align 64 sha256_block_data_order_ssse3: +.cfi_startproc .Lssse3_shortcut: + movq %rsp,%rax +.cfi_def_cfa_register %rax pushq %rbx +.cfi_offset %rbx,-16 pushq %rbp +.cfi_offset %rbp,-24 pushq %r12 +.cfi_offset %r12,-32 pushq %r13 +.cfi_offset %r13,-40 pushq %r14 +.cfi_offset %r14,-48 pushq %r15 - movq %rsp,%r11 +.cfi_offset %r15,-56 shlq $4,%rdx subq $96,%rsp leaq (%rsi,%rdx,4),%rdx @@ -1978,7 +2004,8 @@ sha256_block_data_order_ssse3: movq %rdi,64+0(%rsp) movq %rsi,64+8(%rsp) movq %rdx,64+16(%rsp) - movq %r11,64+24(%rsp) + movq %rax,88(%rsp) +.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08 .Lprologue_ssse3: movl 0(%rdi),%eax @@ -3044,28 +3071,45 @@ sha256_block_data_order_ssse3: movl %r11d,28(%rdi) jb .Lloop_ssse3 - movq 64+24(%rsp),%rsi - movq (%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp + movq 88(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lepilogue_ssse3: .byte 0xf3,0xc3 +.cfi_endproc .size sha256_block_data_order_ssse3,.-sha256_block_data_order_ssse3 .type sha256_block_data_order_avx,@function .align 64 sha256_block_data_order_avx: +.cfi_startproc .Lavx_shortcut: + movq %rsp,%rax +.cfi_def_cfa_register %rax pushq %rbx +.cfi_offset %rbx,-16 pushq %rbp +.cfi_offset %rbp,-24 pushq %r12 +.cfi_offset %r12,-32 pushq %r13 +.cfi_offset %r13,-40 pushq %r14 +.cfi_offset %r14,-48 pushq %r15 - movq %rsp,%r11 +.cfi_offset %r15,-56 shlq $4,%rdx subq $96,%rsp leaq (%rsi,%rdx,4),%rdx @@ -3073,7 +3117,8 @@ sha256_block_data_order_avx: movq %rdi,64+0(%rsp) movq %rsi,64+8(%rsp) movq %rdx,64+16(%rsp) - movq %r11,64+24(%rsp) + movq %rax,88(%rsp) +.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08 .Lprologue_avx: vzeroupper @@ -4100,29 +4145,46 @@ sha256_block_data_order_avx: movl %r11d,28(%rdi) jb .Lloop_avx - movq 64+24(%rsp),%rsi + movq 88(%rsp),%rsi +.cfi_def_cfa %rsi,8 vzeroupper - movq (%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lepilogue_avx: .byte 0xf3,0xc3 +.cfi_endproc .size sha256_block_data_order_avx,.-sha256_block_data_order_avx .type sha256_block_data_order_avx2,@function .align 64 sha256_block_data_order_avx2: +.cfi_startproc .Lavx2_shortcut: + movq %rsp,%rax +.cfi_def_cfa_register %rax pushq %rbx +.cfi_offset %rbx,-16 pushq %rbp +.cfi_offset %rbp,-24 pushq %r12 +.cfi_offset %r12,-32 pushq %r13 +.cfi_offset %r13,-40 pushq %r14 +.cfi_offset %r14,-48 pushq %r15 - movq %rsp,%r11 +.cfi_offset %r15,-56 subq $544,%rsp shlq $4,%rdx andq $-1024,%rsp @@ -4131,7 +4193,8 @@ sha256_block_data_order_avx2: movq %rdi,64+0(%rsp) movq %rsi,64+8(%rsp) movq %rdx,64+16(%rsp) - movq %r11,64+24(%rsp) + movq %rax,88(%rsp) +.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08 .Lprologue_avx2: vzeroupper @@ -5344,15 +5407,24 @@ sha256_block_data_order_avx2: .Ldone_avx2: leaq (%rbp),%rsp - movq 64+24(%rsp),%rsi + movq 88(%rsp),%rsi +.cfi_def_cfa %rsi,8 vzeroupper - movq (%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lepilogue_avx2: .byte 0xf3,0xc3 +.cfi_endproc .size sha256_block_data_order_avx2,.-sha256_block_data_order_avx2 diff --git a/deps/openssl/config/archs/linux-x86_64/asm/crypto/sha/sha512-x86_64.s b/deps/openssl/config/archs/linux-x86_64/asm/crypto/sha/sha512-x86_64.s index 6f8488a38a..5931a2a932 100644 --- a/deps/openssl/config/archs/linux-x86_64/asm/crypto/sha/sha512-x86_64.s +++ b/deps/openssl/config/archs/linux-x86_64/asm/crypto/sha/sha512-x86_64.s @@ -5,6 +5,7 @@ .type sha512_block_data_order,@function .align 16 sha512_block_data_order: +.cfi_startproc leaq OPENSSL_ia32cap_P(%rip),%r11 movl 0(%r11),%r9d movl 4(%r11),%r10d @@ -19,13 +20,20 @@ sha512_block_data_order: orl %r9d,%r10d cmpl $1342177792,%r10d je .Lavx_shortcut + movq %rsp,%rax +.cfi_def_cfa_register %rax pushq %rbx +.cfi_offset %rbx,-16 pushq %rbp +.cfi_offset %rbp,-24 pushq %r12 +.cfi_offset %r12,-32 pushq %r13 +.cfi_offset %r13,-40 pushq %r14 +.cfi_offset %r14,-48 pushq %r15 - movq %rsp,%r11 +.cfi_offset %r15,-56 shlq $4,%rdx subq $128+32,%rsp leaq (%rsi,%rdx,8),%rdx @@ -33,7 +41,8 @@ sha512_block_data_order: movq %rdi,128+0(%rsp) movq %rsi,128+8(%rsp) movq %rdx,128+16(%rsp) - movq %r11,128+24(%rsp) + movq %rax,152(%rsp) +.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x08 .Lprologue: movq 0(%rdi),%rax @@ -1697,16 +1706,25 @@ sha512_block_data_order: movq %r11,56(%rdi) jb .Lloop - movq 128+24(%rsp),%rsi - movq (%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp + movq 152(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lepilogue: .byte 0xf3,0xc3 +.cfi_endproc .size sha512_block_data_order,.-sha512_block_data_order .align 64 .type K512,@object @@ -1798,14 +1816,22 @@ K512: .type sha512_block_data_order_xop,@function .align 64 sha512_block_data_order_xop: +.cfi_startproc .Lxop_shortcut: + movq %rsp,%rax +.cfi_def_cfa_register %rax pushq %rbx +.cfi_offset %rbx,-16 pushq %rbp +.cfi_offset %rbp,-24 pushq %r12 +.cfi_offset %r12,-32 pushq %r13 +.cfi_offset %r13,-40 pushq %r14 +.cfi_offset %r14,-48 pushq %r15 - movq %rsp,%r11 +.cfi_offset %r15,-56 shlq $4,%rdx subq $160,%rsp leaq (%rsi,%rdx,8),%rdx @@ -1813,7 +1839,8 @@ sha512_block_data_order_xop: movq %rdi,128+0(%rsp) movq %rsi,128+8(%rsp) movq %rdx,128+16(%rsp) - movq %r11,128+24(%rsp) + movq %rax,152(%rsp) +.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x08 .Lprologue_xop: vzeroupper @@ -2866,29 +2893,46 @@ sha512_block_data_order_xop: movq %r11,56(%rdi) jb .Lloop_xop - movq 128+24(%rsp),%rsi + movq 152(%rsp),%rsi +.cfi_def_cfa %rsi,8 vzeroupper - movq (%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lepilogue_xop: .byte 0xf3,0xc3 +.cfi_endproc .size sha512_block_data_order_xop,.-sha512_block_data_order_xop .type sha512_block_data_order_avx,@function .align 64 sha512_block_data_order_avx: +.cfi_startproc .Lavx_shortcut: + movq %rsp,%rax +.cfi_def_cfa_register %rax pushq %rbx +.cfi_offset %rbx,-16 pushq %rbp +.cfi_offset %rbp,-24 pushq %r12 +.cfi_offset %r12,-32 pushq %r13 +.cfi_offset %r13,-40 pushq %r14 +.cfi_offset %r14,-48 pushq %r15 - movq %rsp,%r11 +.cfi_offset %r15,-56 shlq $4,%rdx subq $160,%rsp leaq (%rsi,%rdx,8),%rdx @@ -2896,7 +2940,8 @@ sha512_block_data_order_avx: movq %rdi,128+0(%rsp) movq %rsi,128+8(%rsp) movq %rdx,128+16(%rsp) - movq %r11,128+24(%rsp) + movq %rax,152(%rsp) +.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x08 .Lprologue_avx: vzeroupper @@ -4013,29 +4058,46 @@ sha512_block_data_order_avx: movq %r11,56(%rdi) jb .Lloop_avx - movq 128+24(%rsp),%rsi + movq 152(%rsp),%rsi +.cfi_def_cfa %rsi,8 vzeroupper - movq (%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lepilogue_avx: .byte 0xf3,0xc3 +.cfi_endproc .size sha512_block_data_order_avx,.-sha512_block_data_order_avx .type sha512_block_data_order_avx2,@function .align 64 sha512_block_data_order_avx2: +.cfi_startproc .Lavx2_shortcut: + movq %rsp,%rax +.cfi_def_cfa_register %rax pushq %rbx +.cfi_offset %rbx,-16 pushq %rbp +.cfi_offset %rbp,-24 pushq %r12 +.cfi_offset %r12,-32 pushq %r13 +.cfi_offset %r13,-40 pushq %r14 +.cfi_offset %r14,-48 pushq %r15 - movq %rsp,%r11 +.cfi_offset %r15,-56 subq $1312,%rsp shlq $4,%rdx andq $-2048,%rsp @@ -4044,7 +4106,8 @@ sha512_block_data_order_avx2: movq %rdi,128+0(%rsp) movq %rsi,128+8(%rsp) movq %rdx,128+16(%rsp) - movq %r11,128+24(%rsp) + movq %rax,152(%rsp) +.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x08 .Lprologue_avx2: vzeroupper @@ -5351,15 +5414,24 @@ sha512_block_data_order_avx2: .Ldone_avx2: leaq (%rbp),%rsp - movq 128+24(%rsp),%rsi + movq 152(%rsp),%rsi +.cfi_def_cfa %rsi,8 vzeroupper - movq (%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lepilogue_avx2: .byte 0xf3,0xc3 +.cfi_endproc .size sha512_block_data_order_avx2,.-sha512_block_data_order_avx2 |