diff options
Diffstat (limited to 'deps/openssl/openssl/crypto/sha/asm/sha256-mb-x86_64.pl')
-rw-r--r-- | deps/openssl/openssl/crypto/sha/asm/sha256-mb-x86_64.pl | 56 |
1 files changed, 51 insertions, 5 deletions
diff --git a/deps/openssl/openssl/crypto/sha/asm/sha256-mb-x86_64.pl b/deps/openssl/openssl/crypto/sha/asm/sha256-mb-x86_64.pl index fbcd29f2e8..73978dbd81 100644 --- a/deps/openssl/openssl/crypto/sha/asm/sha256-mb-x86_64.pl +++ b/deps/openssl/openssl/crypto/sha/asm/sha256-mb-x86_64.pl @@ -36,7 +36,7 @@ # (iii) "this" is for n=8, when we gather twice as much data, result # for n=4 is 20.3+4.44=24.7; # (iv) presented improvement coefficients are asymptotic limits and -# in real-life application are somewhat lower, e.g. for 2KB +# in real-life application are somewhat lower, e.g. for 2KB # fragments they range from 75% to 130% (on Haswell); $flavour = shift; @@ -244,6 +244,7 @@ $code.=<<___; .type sha256_multi_block,\@function,3 .align 32 sha256_multi_block: +.cfi_startproc mov OPENSSL_ia32cap_P+4(%rip),%rcx bt \$61,%rcx # check SHA bit jc _shaext_shortcut @@ -254,8 +255,11 @@ $code.=<<___ if ($avx); ___ $code.=<<___; mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp @@ -274,6 +278,7 @@ $code.=<<___; sub \$`$REG_SZ*18`, %rsp and \$-256,%rsp mov %rax,`$REG_SZ*17`(%rsp) # original %rsp +.cfi_cfa_expression %rsp+`$REG_SZ*17`,deref,+8 .Lbody: lea K256+128(%rip),$Tbl lea `$REG_SZ*16`(%rsp),%rbx @@ -391,6 +396,7 @@ $code.=<<___; .Ldone: mov `$REG_SZ*17`(%rsp),%rax # original %rsp +.cfi_def_cfa %rax,8 ___ $code.=<<___ if ($win64); movaps -0xb8(%rax),%xmm6 @@ -406,10 +412,14 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp +.cfi_def_cfa_register %rsp .Lepilogue: ret +.cfi_endproc .size sha256_multi_block,.-sha256_multi_block ___ {{{ @@ -421,10 +431,14 @@ $code.=<<___; .type sha256_multi_block_shaext,\@function,3 .align 32 sha256_multi_block_shaext: +.cfi_startproc _shaext_shortcut: mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp @@ -758,10 +772,14 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp +.cfi_def_cfa_register %rsp .Lepilogue_shaext: ret +.cfi_endproc .size sha256_multi_block_shaext,.-sha256_multi_block_shaext ___ }}} @@ -921,6 +939,7 @@ $code.=<<___; .type sha256_multi_block_avx,\@function,3 .align 32 sha256_multi_block_avx: +.cfi_startproc _avx_shortcut: ___ $code.=<<___ if ($avx>1); @@ -935,8 +954,11 @@ $code.=<<___ if ($avx>1); ___ $code.=<<___; mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp @@ -955,6 +977,7 @@ $code.=<<___; sub \$`$REG_SZ*18`, %rsp and \$-256,%rsp mov %rax,`$REG_SZ*17`(%rsp) # original %rsp +.cfi_cfa_expression %rsp+`$REG_SZ*17`,deref,+8 .Lbody_avx: lea K256+128(%rip),$Tbl lea `$REG_SZ*16`(%rsp),%rbx @@ -1070,6 +1093,7 @@ $code.=<<___; .Ldone_avx: mov `$REG_SZ*17`(%rsp),%rax # original %rsp +.cfi_def_cfa %rax,8 vzeroupper ___ $code.=<<___ if ($win64); @@ -1086,10 +1110,14 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp +.cfi_def_cfa_register %rsp .Lepilogue_avx: ret +.cfi_endproc .size sha256_multi_block_avx,.-sha256_multi_block_avx ___ if ($avx>1) { @@ -1105,14 +1133,22 @@ $code.=<<___; .type sha256_multi_block_avx2,\@function,3 .align 32 sha256_multi_block_avx2: +.cfi_startproc _avx2_shortcut: mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp @@ -1131,6 +1167,7 @@ $code.=<<___; sub \$`$REG_SZ*18`, %rsp and \$-256,%rsp mov %rax,`$REG_SZ*17`(%rsp) # original %rsp +.cfi_cfa_expression %rsp+`$REG_SZ*17`,deref,+8 .Lbody_avx2: lea K256+128(%rip),$Tbl lea 0x80($ctx),$ctx # size optimization @@ -1246,6 +1283,7 @@ $code.=<<___; .Ldone_avx2: mov `$REG_SZ*17`(%rsp),%rax # original %rsp +.cfi_def_cfa %rax,8 vzeroupper ___ $code.=<<___ if ($win64); @@ -1262,14 +1300,22 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax),%r15 +.cfi_restore %r15 mov -40(%rax),%r14 +.cfi_restore %r14 mov -32(%rax),%r13 +.cfi_restore %r13 mov -24(%rax),%r12 +.cfi_restore %r12 mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp +.cfi_def_cfa_register %rsp .Lepilogue_avx2: ret +.cfi_endproc .size sha256_multi_block_avx2,.-sha256_multi_block_avx2 ___ } }}} @@ -1462,10 +1508,10 @@ avx2_handler: mov -48(%rax),%r15 mov %rbx,144($context) # restore context->Rbx mov %rbp,160($context) # restore context->Rbp - mov %r12,216($context) # restore cotnext->R12 - mov %r13,224($context) # restore cotnext->R13 - mov %r14,232($context) # restore cotnext->R14 - mov %r15,240($context) # restore cotnext->R15 + mov %r12,216($context) # restore context->R12 + mov %r13,224($context) # restore context->R13 + mov %r14,232($context) # restore context->R14 + mov %r15,240($context) # restore context->R15 lea -56-10*16(%rax),%rsi lea 512($context),%rdi # &context.Xmm6 |