From 807ed7883a12423270450776f015a7c2348c0913 Mon Sep 17 00:00:00 2001 From: Sam Roberts Date: Thu, 22 Nov 2018 11:47:07 -0800 Subject: deps: update archs files for OpenSSL-1.1.1a `cd deps/openssl/config; make` updates all archs dependant files. PR-URL: https://github.com/nodejs/node/pull/25381 Reviewed-By: Daniel Bevenius Reviewed-By: Shigeki Ohtsu --- .../asm_avx2/crypto/aes/aesni-sha256-x86_64.s | 4409 ++++++++++++++++++++ 1 file changed, 4409 insertions(+) create mode 100644 deps/openssl/config/archs/solaris64-x86_64-gcc/asm_avx2/crypto/aes/aesni-sha256-x86_64.s (limited to 'deps/openssl/config/archs/solaris64-x86_64-gcc/asm_avx2/crypto/aes/aesni-sha256-x86_64.s') diff --git a/deps/openssl/config/archs/solaris64-x86_64-gcc/asm_avx2/crypto/aes/aesni-sha256-x86_64.s b/deps/openssl/config/archs/solaris64-x86_64-gcc/asm_avx2/crypto/aes/aesni-sha256-x86_64.s new file mode 100644 index 0000000000..5a47b3ee51 --- /dev/null +++ b/deps/openssl/config/archs/solaris64-x86_64-gcc/asm_avx2/crypto/aes/aesni-sha256-x86_64.s @@ -0,0 +1,4409 @@ +.text + + +.globl aesni_cbc_sha256_enc +.type aesni_cbc_sha256_enc,@function +.align 16 +aesni_cbc_sha256_enc: + leaq OPENSSL_ia32cap_P(%rip),%r11 + movl $1,%eax + cmpq $0,%rdi + je .Lprobe + movl 0(%r11),%eax + movq 4(%r11),%r10 + btq $61,%r10 + jc aesni_cbc_sha256_enc_shaext + movq %r10,%r11 + shrq $32,%r11 + + testl $2048,%r10d + jnz aesni_cbc_sha256_enc_xop + andl $296,%r11d + cmpl $296,%r11d + je aesni_cbc_sha256_enc_avx2 + andl $268435456,%r10d + jnz aesni_cbc_sha256_enc_avx + ud2 + xorl %eax,%eax + cmpq $0,%rdi + je .Lprobe + ud2 +.Lprobe: + .byte 0xf3,0xc3 +.size aesni_cbc_sha256_enc,.-aesni_cbc_sha256_enc + +.align 64 +.type K256,@object +K256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0,0,0,0, 0,0,0,0, -1,-1,-1,-1 +.long 0,0,0,0, 0,0,0,0 +.byte 65,69,83,78,73,45,67,66,67,43,83,72,65,50,53,54,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 +.type aesni_cbc_sha256_enc_xop,@function +.align 64 +aesni_cbc_sha256_enc_xop: +.cfi_startproc +.Lxop_shortcut: + movq 8(%rsp),%r10 + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + subq $128,%rsp + andq $-64,%rsp + + shlq $6,%rdx + subq %rdi,%rsi + subq %rdi,%r10 + addq %rdi,%rdx + + + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + + movq %r8,64+32(%rsp) + movq %r9,64+40(%rsp) + movq %r10,64+48(%rsp) + movq %rax,120(%rsp) +.cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08 +.Lprologue_xop: + vzeroall + + movq %rdi,%r12 + leaq 128(%rcx),%rdi + leaq K256+544(%rip),%r13 + movl 240-128(%rdi),%r14d + movq %r9,%r15 + movq %r10,%rsi + vmovdqu (%r8),%xmm8 + subq $9,%r14 + + movl 0(%r15),%eax + movl 4(%r15),%ebx + movl 8(%r15),%ecx + movl 12(%r15),%edx + movl 16(%r15),%r8d + movl 20(%r15),%r9d + movl 24(%r15),%r10d + movl 28(%r15),%r11d + + vmovdqa 0(%r13,%r14,8),%xmm14 + vmovdqa 16(%r13,%r14,8),%xmm13 + vmovdqa 32(%r13,%r14,8),%xmm12 + vmovdqu 0-128(%rdi),%xmm10 + jmp .Lloop_xop +.align 16 +.Lloop_xop: + vmovdqa K256+512(%rip),%xmm7 + vmovdqu 0(%rsi,%r12,1),%xmm0 + vmovdqu 16(%rsi,%r12,1),%xmm1 + vmovdqu 32(%rsi,%r12,1),%xmm2 + vmovdqu 48(%rsi,%r12,1),%xmm3 + vpshufb %xmm7,%xmm0,%xmm0 + leaq K256(%rip),%rbp + vpshufb %xmm7,%xmm1,%xmm1 + vpshufb %xmm7,%xmm2,%xmm2 + vpaddd 0(%rbp),%xmm0,%xmm4 + vpshufb %xmm7,%xmm3,%xmm3 + vpaddd 32(%rbp),%xmm1,%xmm5 + vpaddd 64(%rbp),%xmm2,%xmm6 + vpaddd 96(%rbp),%xmm3,%xmm7 + vmovdqa %xmm4,0(%rsp) + movl %eax,%r14d + vmovdqa %xmm5,16(%rsp) + movl %ebx,%esi + vmovdqa %xmm6,32(%rsp) + xorl %ecx,%esi + vmovdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp .Lxop_00_47 + +.align 16 +.Lxop_00_47: + subq $-32*4,%rbp + vmovdqu (%r12),%xmm9 + movq %r12,64+0(%rsp) + vpalignr $4,%xmm0,%xmm1,%xmm4 + rorl $14,%r13d + movl %r14d,%eax + vpalignr $4,%xmm2,%xmm3,%xmm7 + movl %r9d,%r12d + xorl %r8d,%r13d +.byte 143,232,120,194,236,14 + rorl $9,%r14d + xorl %r10d,%r12d + vpsrld $3,%xmm4,%xmm4 + rorl $5,%r13d + xorl %eax,%r14d + vpaddd %xmm7,%xmm0,%xmm0 + andl %r8d,%r12d + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d +.byte 143,232,120,194,245,11 + rorl $11,%r14d + xorl %r10d,%r12d + vpxor %xmm5,%xmm4,%xmm4 + xorl %ebx,%r15d + rorl $6,%r13d + addl %r12d,%r11d + andl %r15d,%esi +.byte 143,232,120,194,251,13 + xorl %eax,%r14d + addl %r13d,%r11d + vpxor %xmm6,%xmm4,%xmm4 + xorl %ebx,%esi + addl %r11d,%edx + vpsrld $10,%xmm3,%xmm6 + rorl $2,%r14d + addl %esi,%r11d + vpaddd %xmm4,%xmm0,%xmm0 + movl %edx,%r13d + addl %r11d,%r14d +.byte 143,232,120,194,239,2 + rorl $14,%r13d + movl %r14d,%r11d + vpxor %xmm6,%xmm7,%xmm7 + movl %r8d,%r12d + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%r12d + vpxor %xmm5,%xmm7,%xmm7 + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + vpxor %xmm8,%xmm9,%xmm9 + xorl %edx,%r13d + vpsrldq $8,%xmm7,%xmm7 + addl 4(%rsp),%r10d + movl %r11d,%esi + rorl $11,%r14d + xorl %r9d,%r12d + vpaddd %xmm7,%xmm0,%xmm0 + xorl %eax,%esi + rorl $6,%r13d + addl %r12d,%r10d + andl %esi,%r15d +.byte 143,232,120,194,248,13 + xorl %r11d,%r14d + addl %r13d,%r10d + vpsrld $10,%xmm0,%xmm6 + xorl %eax,%r15d + addl %r10d,%ecx +.byte 143,232,120,194,239,2 + rorl $2,%r14d + addl %r15d,%r10d + vpxor %xmm6,%xmm7,%xmm7 + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + vpxor %xmm5,%xmm7,%xmm7 + movl %edx,%r12d + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r12d + vpslldq $8,%xmm7,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %ecx,%r13d + vpaddd %xmm7,%xmm0,%xmm0 + addl 8(%rsp),%r9d + movl %r10d,%r15d + rorl $11,%r14d + xorl %r8d,%r12d + vpaddd 0(%rbp),%xmm0,%xmm6 + xorl %r11d,%r15d + rorl $6,%r13d + addl %r12d,%r9d + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%esi + addl %r9d,%ebx + rorl $2,%r14d + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%esi + rorl $11,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + rorl $6,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + rorl $2,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,0(%rsp) + vpalignr $4,%xmm1,%xmm2,%xmm4 + rorl $14,%r13d + movl %r14d,%r8d + vpalignr $4,%xmm3,%xmm0,%xmm7 + movl %ebx,%r12d + xorl %eax,%r13d +.byte 143,232,120,194,236,14 + rorl $9,%r14d + xorl %ecx,%r12d + vpsrld $3,%xmm4,%xmm4 + rorl $5,%r13d + xorl %r8d,%r14d + vpaddd %xmm7,%xmm1,%xmm1 + andl %eax,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d +.byte 143,232,120,194,245,11 + rorl $11,%r14d + xorl %ecx,%r12d + vpxor %xmm5,%xmm4,%xmm4 + xorl %r9d,%r15d + rorl $6,%r13d + addl %r12d,%edx + andl %r15d,%esi +.byte 143,232,120,194,248,13 + xorl %r8d,%r14d + addl %r13d,%edx + vpxor %xmm6,%xmm4,%xmm4 + xorl %r9d,%esi + addl %edx,%r11d + vpsrld $10,%xmm0,%xmm6 + rorl $2,%r14d + addl %esi,%edx + vpaddd %xmm4,%xmm1,%xmm1 + movl %r11d,%r13d + addl %edx,%r14d +.byte 143,232,120,194,239,2 + rorl $14,%r13d + movl %r14d,%edx + vpxor %xmm6,%xmm7,%xmm7 + movl %eax,%r12d + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%r12d + vpxor %xmm5,%xmm7,%xmm7 + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r11d,%r13d + vpsrldq $8,%xmm7,%xmm7 + addl 20(%rsp),%ecx + movl %edx,%esi + rorl $11,%r14d + xorl %ebx,%r12d + vpaddd %xmm7,%xmm1,%xmm1 + xorl %r8d,%esi + rorl $6,%r13d + addl %r12d,%ecx + andl %esi,%r15d +.byte 143,232,120,194,249,13 + xorl %edx,%r14d + addl %r13d,%ecx + vpsrld $10,%xmm1,%xmm6 + xorl %r8d,%r15d + addl %ecx,%r10d +.byte 143,232,120,194,239,2 + rorl $2,%r14d + addl %r15d,%ecx + vpxor %xmm6,%xmm7,%xmm7 + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + vpxor %xmm5,%xmm7,%xmm7 + movl %r11d,%r12d + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r12d + vpslldq $8,%xmm7,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r10d,%r13d + vpaddd %xmm7,%xmm1,%xmm1 + addl 24(%rsp),%ebx + movl %ecx,%r15d + rorl $11,%r14d + xorl %eax,%r12d + vpaddd 32(%rbp),%xmm1,%xmm6 + xorl %edx,%r15d + rorl $6,%r13d + addl %r12d,%ebx + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%esi + addl %ebx,%r9d + rorl $2,%r14d + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%esi + rorl $11,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + rorl $6,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + rorl $2,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,16(%rsp) + vpalignr $4,%xmm2,%xmm3,%xmm4 + rorl $14,%r13d + movl %r14d,%eax + vpalignr $4,%xmm0,%xmm1,%xmm7 + movl %r9d,%r12d + xorl %r8d,%r13d +.byte 143,232,120,194,236,14 + rorl $9,%r14d + xorl %r10d,%r12d + vpsrld $3,%xmm4,%xmm4 + rorl $5,%r13d + xorl %eax,%r14d + vpaddd %xmm7,%xmm2,%xmm2 + andl %r8d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d +.byte 143,232,120,194,245,11 + rorl $11,%r14d + xorl %r10d,%r12d + vpxor %xmm5,%xmm4,%xmm4 + xorl %ebx,%r15d + rorl $6,%r13d + addl %r12d,%r11d + andl %r15d,%esi +.byte 143,232,120,194,249,13 + xorl %eax,%r14d + addl %r13d,%r11d + vpxor %xmm6,%xmm4,%xmm4 + xorl %ebx,%esi + addl %r11d,%edx + vpsrld $10,%xmm1,%xmm6 + rorl $2,%r14d + addl %esi,%r11d + vpaddd %xmm4,%xmm2,%xmm2 + movl %edx,%r13d + addl %r11d,%r14d +.byte 143,232,120,194,239,2 + rorl $14,%r13d + movl %r14d,%r11d + vpxor %xmm6,%xmm7,%xmm7 + movl %r8d,%r12d + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%r12d + vpxor %xmm5,%xmm7,%xmm7 + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %edx,%r13d + vpsrldq $8,%xmm7,%xmm7 + addl 36(%rsp),%r10d + movl %r11d,%esi + rorl $11,%r14d + xorl %r9d,%r12d + vpaddd %xmm7,%xmm2,%xmm2 + xorl %eax,%esi + rorl $6,%r13d + addl %r12d,%r10d + andl %esi,%r15d +.byte 143,232,120,194,250,13 + xorl %r11d,%r14d + addl %r13d,%r10d + vpsrld $10,%xmm2,%xmm6 + xorl %eax,%r15d + addl %r10d,%ecx +.byte 143,232,120,194,239,2 + rorl $2,%r14d + addl %r15d,%r10d + vpxor %xmm6,%xmm7,%xmm7 + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + vpxor %xmm5,%xmm7,%xmm7 + movl %edx,%r12d + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r12d + vpslldq $8,%xmm7,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %ecx,%r13d + vpaddd %xmm7,%xmm2,%xmm2 + addl 40(%rsp),%r9d + movl %r10d,%r15d + rorl $11,%r14d + xorl %r8d,%r12d + vpaddd 64(%rbp),%xmm2,%xmm6 + xorl %r11d,%r15d + rorl $6,%r13d + addl %r12d,%r9d + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%esi + addl %r9d,%ebx + rorl $2,%r14d + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%esi + rorl $11,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + rorl $6,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + rorl $2,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,32(%rsp) + vpalignr $4,%xmm3,%xmm0,%xmm4 + rorl $14,%r13d + movl %r14d,%r8d + vpalignr $4,%xmm1,%xmm2,%xmm7 + movl %ebx,%r12d + xorl %eax,%r13d +.byte 143,232,120,194,236,14 + rorl $9,%r14d + xorl %ecx,%r12d + vpsrld $3,%xmm4,%xmm4 + rorl $5,%r13d + xorl %r8d,%r14d + vpaddd %xmm7,%xmm3,%xmm3 + andl %eax,%r12d + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d +.byte 143,232,120,194,245,11 + rorl $11,%r14d + xorl %ecx,%r12d + vpxor %xmm5,%xmm4,%xmm4 + xorl %r9d,%r15d + rorl $6,%r13d + addl %r12d,%edx + andl %r15d,%esi +.byte 143,232,120,194,250,13 + xorl %r8d,%r14d + addl %r13d,%edx + vpxor %xmm6,%xmm4,%xmm4 + xorl %r9d,%esi + addl %edx,%r11d + vpsrld $10,%xmm2,%xmm6 + rorl $2,%r14d + addl %esi,%edx + vpaddd %xmm4,%xmm3,%xmm3 + movl %r11d,%r13d + addl %edx,%r14d +.byte 143,232,120,194,239,2 + rorl $14,%r13d + movl %r14d,%edx + vpxor %xmm6,%xmm7,%xmm7 + movl %eax,%r12d + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%r12d + vpxor %xmm5,%xmm7,%xmm7 + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r11d,%r13d + vpsrldq $8,%xmm7,%xmm7 + addl 52(%rsp),%ecx + movl %edx,%esi + rorl $11,%r14d + xorl %ebx,%r12d + vpaddd %xmm7,%xmm3,%xmm3 + xorl %r8d,%esi + rorl $6,%r13d + addl %r12d,%ecx + andl %esi,%r15d +.byte 143,232,120,194,251,13 + xorl %edx,%r14d + addl %r13d,%ecx + vpsrld $10,%xmm3,%xmm6 + xorl %r8d,%r15d + addl %ecx,%r10d +.byte 143,232,120,194,239,2 + rorl $2,%r14d + addl %r15d,%ecx + vpxor %xmm6,%xmm7,%xmm7 + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + vpxor %xmm5,%xmm7,%xmm7 + movl %r11d,%r12d + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r12d + vpslldq $8,%xmm7,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r10d,%r13d + vpaddd %xmm7,%xmm3,%xmm3 + addl 56(%rsp),%ebx + movl %ecx,%r15d + rorl $11,%r14d + xorl %eax,%r12d + vpaddd 96(%rbp),%xmm3,%xmm6 + xorl %edx,%r15d + rorl $6,%r13d + addl %r12d,%ebx + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%esi + addl %ebx,%r9d + rorl $2,%r14d + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%esi + rorl $11,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + rorl $6,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + rorl $2,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,48(%rsp) + movq 64+0(%rsp),%r12 + vpand %xmm14,%xmm11,%xmm11 + movq 64+8(%rsp),%r15 + vpor %xmm11,%xmm8,%xmm8 + vmovdqu %xmm8,(%r15,%r12,1) + leaq 16(%r12),%r12 + cmpb $0,131(%rbp) + jne .Lxop_00_47 + vmovdqu (%r12),%xmm9 + movq %r12,64+0(%rsp) + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + rorl $11,%r14d + xorl %r10d,%r12d + xorl %ebx,%r15d + rorl $6,%r13d + addl %r12d,%r11d + andl %r15d,%esi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%esi + addl %r11d,%edx + rorl $2,%r14d + addl %esi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + vpxor %xmm8,%xmm9,%xmm9 + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%esi + rorl $11,%r14d + xorl %r9d,%r12d + xorl %eax,%esi + rorl $6,%r13d + addl %r12d,%r10d + andl %esi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + addl %r10d,%ecx + rorl $2,%r14d + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + rorl $11,%r14d + xorl %r8d,%r12d + xorl %r11d,%r15d + rorl $6,%r13d + addl %r12d,%r9d + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%esi + addl %r9d,%ebx + rorl $2,%r14d + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%esi + rorl $11,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + rorl $6,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + rorl $2,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + rorl $11,%r14d + xorl %ecx,%r12d + xorl %r9d,%r15d + rorl $6,%r13d + addl %r12d,%edx + andl %r15d,%esi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%esi + addl %edx,%r11d + rorl $2,%r14d + addl %esi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%esi + rorl $11,%r14d + xorl %ebx,%r12d + xorl %r8d,%esi + rorl $6,%r13d + addl %r12d,%ecx + andl %esi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + addl %ecx,%r10d + rorl $2,%r14d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + rorl $11,%r14d + xorl %eax,%r12d + xorl %edx,%r15d + rorl $6,%r13d + addl %r12d,%ebx + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%esi + addl %ebx,%r9d + rorl $2,%r14d + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%esi + rorl $11,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + rorl $6,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + rorl $2,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + rorl $11,%r14d + xorl %r10d,%r12d + xorl %ebx,%r15d + rorl $6,%r13d + addl %r12d,%r11d + andl %r15d,%esi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%esi + addl %r11d,%edx + rorl $2,%r14d + addl %esi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%esi + rorl $11,%r14d + xorl %r9d,%r12d + xorl %eax,%esi + rorl $6,%r13d + addl %r12d,%r10d + andl %esi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + addl %r10d,%ecx + rorl $2,%r14d + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + rorl $11,%r14d + xorl %r8d,%r12d + xorl %r11d,%r15d + rorl $6,%r13d + addl %r12d,%r9d + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%esi + addl %r9d,%ebx + rorl $2,%r14d + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%esi + rorl $11,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + rorl $6,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + rorl $2,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + rorl $11,%r14d + xorl %ecx,%r12d + xorl %r9d,%r15d + rorl $6,%r13d + addl %r12d,%edx + andl %r15d,%esi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%esi + addl %edx,%r11d + rorl $2,%r14d + addl %esi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%esi + rorl $11,%r14d + xorl %ebx,%r12d + xorl %r8d,%esi + rorl $6,%r13d + addl %r12d,%ecx + andl %esi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + addl %ecx,%r10d + rorl $2,%r14d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + rorl $11,%r14d + xorl %eax,%r12d + xorl %edx,%r15d + rorl $6,%r13d + addl %r12d,%ebx + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%esi + addl %ebx,%r9d + rorl $2,%r14d + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%esi + rorl $11,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + rorl $6,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + rorl $2,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq 64+0(%rsp),%r12 + movq 64+8(%rsp),%r13 + movq 64+40(%rsp),%r15 + movq 64+48(%rsp),%rsi + + vpand %xmm14,%xmm11,%xmm11 + movl %r14d,%eax + vpor %xmm11,%xmm8,%xmm8 + vmovdqu %xmm8,(%r12,%r13,1) + leaq 16(%r12),%r12 + + addl 0(%r15),%eax + addl 4(%r15),%ebx + addl 8(%r15),%ecx + addl 12(%r15),%edx + addl 16(%r15),%r8d + addl 20(%r15),%r9d + addl 24(%r15),%r10d + addl 28(%r15),%r11d + + cmpq 64+16(%rsp),%r12 + + movl %eax,0(%r15) + movl %ebx,4(%r15) + movl %ecx,8(%r15) + movl %edx,12(%r15) + movl %r8d,16(%r15) + movl %r9d,20(%r15) + movl %r10d,24(%r15) + movl %r11d,28(%r15) + + jb .Lloop_xop + + movq 64+32(%rsp),%r8 + movq 120(%rsp),%rsi +.cfi_def_cfa %rsi,8 + vmovdqu %xmm8,(%r8) + vzeroall + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_xop: + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_cbc_sha256_enc_xop,.-aesni_cbc_sha256_enc_xop +.type aesni_cbc_sha256_enc_avx,@function +.align 64 +aesni_cbc_sha256_enc_avx: +.cfi_startproc +.Lavx_shortcut: + movq 8(%rsp),%r10 + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + subq $128,%rsp + andq $-64,%rsp + + shlq $6,%rdx + subq %rdi,%rsi + subq %rdi,%r10 + addq %rdi,%rdx + + + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + + movq %r8,64+32(%rsp) + movq %r9,64+40(%rsp) + movq %r10,64+48(%rsp) + movq %rax,120(%rsp) +.cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08 +.Lprologue_avx: + vzeroall + + movq %rdi,%r12 + leaq 128(%rcx),%rdi + leaq K256+544(%rip),%r13 + movl 240-128(%rdi),%r14d + movq %r9,%r15 + movq %r10,%rsi + vmovdqu (%r8),%xmm8 + subq $9,%r14 + + movl 0(%r15),%eax + movl 4(%r15),%ebx + movl 8(%r15),%ecx + movl 12(%r15),%edx + movl 16(%r15),%r8d + movl 20(%r15),%r9d + movl 24(%r15),%r10d + movl 28(%r15),%r11d + + vmovdqa 0(%r13,%r14,8),%xmm14 + vmovdqa 16(%r13,%r14,8),%xmm13 + vmovdqa 32(%r13,%r14,8),%xmm12 + vmovdqu 0-128(%rdi),%xmm10 + jmp .Lloop_avx +.align 16 +.Lloop_avx: + vmovdqa K256+512(%rip),%xmm7 + vmovdqu 0(%rsi,%r12,1),%xmm0 + vmovdqu 16(%rsi,%r12,1),%xmm1 + vmovdqu 32(%rsi,%r12,1),%xmm2 + vmovdqu 48(%rsi,%r12,1),%xmm3 + vpshufb %xmm7,%xmm0,%xmm0 + leaq K256(%rip),%rbp + vpshufb %xmm7,%xmm1,%xmm1 + vpshufb %xmm7,%xmm2,%xmm2 + vpaddd 0(%rbp),%xmm0,%xmm4 + vpshufb %xmm7,%xmm3,%xmm3 + vpaddd 32(%rbp),%xmm1,%xmm5 + vpaddd 64(%rbp),%xmm2,%xmm6 + vpaddd 96(%rbp),%xmm3,%xmm7 + vmovdqa %xmm4,0(%rsp) + movl %eax,%r14d + vmovdqa %xmm5,16(%rsp) + movl %ebx,%esi + vmovdqa %xmm6,32(%rsp) + xorl %ecx,%esi + vmovdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp .Lavx_00_47 + +.align 16 +.Lavx_00_47: + subq $-32*4,%rbp + vmovdqu (%r12),%xmm9 + movq %r12,64+0(%rsp) + vpalignr $4,%xmm0,%xmm1,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + vpalignr $4,%xmm2,%xmm3,%xmm7 + xorl %r8d,%r13d + shrdl $9,%r14d,%r14d + xorl %r10d,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpaddd %xmm7,%xmm0,%xmm0 + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + vpsrld $3,%xmm4,%xmm7 + shrdl $11,%r14d,%r14d + xorl %r10d,%r12d + xorl %ebx,%r15d + vpslld $14,%xmm4,%xmm5 + shrdl $6,%r13d,%r13d + addl %r12d,%r11d + andl %r15d,%esi + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%esi + vpshufd $250,%xmm3,%xmm7 + addl %r11d,%edx + shrdl $2,%r14d,%r14d + addl %esi,%r11d + vpsrld $11,%xmm6,%xmm6 + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%r11d + movl %r8d,%r12d + xorl %edx,%r13d + vpslld $11,%xmm5,%xmm5 + shrdl $9,%r14d,%r14d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %r11d,%r14d + andl %edx,%r12d + vpxor %xmm8,%xmm9,%xmm9 + xorl %edx,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 4(%rsp),%r10d + movl %r11d,%esi + shrdl $11,%r14d,%r14d + vpxor %xmm5,%xmm4,%xmm4 + xorl %r9d,%r12d + xorl %eax,%esi + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + addl %r12d,%r10d + andl %esi,%r15d + xorl %r11d,%r14d + vpaddd %xmm4,%xmm0,%xmm0 + addl %r13d,%r10d + xorl %eax,%r15d + addl %r10d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $2,%r14d,%r14d + addl %r15d,%r10d + movl %ecx,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + vpxor %xmm7,%xmm6,%xmm6 + movl %edx,%r12d + xorl %ecx,%r13d + shrdl $9,%r14d,%r14d + vpshufd $132,%xmm6,%xmm6 + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + vpsrldq $8,%xmm6,%xmm6 + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + vpaddd %xmm6,%xmm0,%xmm0 + movl %r10d,%r15d + shrdl $11,%r14d,%r14d + xorl %r8d,%r12d + vpshufd $80,%xmm0,%xmm7 + xorl %r11d,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%r9d + vpsrld $10,%xmm7,%xmm6 + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + vpsrlq $17,%xmm7,%xmm7 + xorl %r11d,%esi + addl %r9d,%ebx + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + vpsrlq $2,%xmm7,%xmm7 + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + vpxor %xmm7,%xmm6,%xmm6 + xorl %ebx,%r13d + shrdl $9,%r14d,%r14d + xorl %edx,%r12d + vpshufd $232,%xmm6,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vpslldq $8,%xmm6,%xmm6 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%esi + vpaddd %xmm6,%xmm0,%xmm0 + shrdl $11,%r14d,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + vpaddd 0(%rbp),%xmm0,%xmm6 + shrdl $6,%r13d,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + shrdl $2,%r14d,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,0(%rsp) + vpalignr $4,%xmm1,%xmm2,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + vpalignr $4,%xmm3,%xmm0,%xmm7 + xorl %eax,%r13d + shrdl $9,%r14d,%r14d + xorl %ecx,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpaddd %xmm7,%xmm1,%xmm1 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + vpsrld $3,%xmm4,%xmm7 + shrdl $11,%r14d,%r14d + xorl %ecx,%r12d + xorl %r9d,%r15d + vpslld $14,%xmm4,%xmm5 + shrdl $6,%r13d,%r13d + addl %r12d,%edx + andl %r15d,%esi + vpxor %xmm6,%xmm7,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%esi + vpshufd $250,%xmm0,%xmm7 + addl %edx,%r11d + shrdl $2,%r14d,%r14d + addl %esi,%edx + vpsrld $11,%xmm6,%xmm6 + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%edx + movl %eax,%r12d + xorl %r11d,%r13d + vpslld $11,%xmm5,%xmm5 + shrdl $9,%r14d,%r14d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %edx,%r14d + andl %r11d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r11d,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 20(%rsp),%ecx + movl %edx,%esi + shrdl $11,%r14d,%r14d + vpxor %xmm5,%xmm4,%xmm4 + xorl %ebx,%r12d + xorl %r8d,%esi + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + addl %r12d,%ecx + andl %esi,%r15d + xorl %edx,%r14d + vpaddd %xmm4,%xmm1,%xmm1 + addl %r13d,%ecx + xorl %r8d,%r15d + addl %ecx,%r10d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $2,%r14d,%r14d + addl %r15d,%ecx + movl %r10d,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + movl %r11d,%r12d + xorl %r10d,%r13d + shrdl $9,%r14d,%r14d + vpshufd $132,%xmm6,%xmm6 + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + vpsrldq $8,%xmm6,%xmm6 + andl %r10d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + vpaddd %xmm6,%xmm1,%xmm1 + movl %ecx,%r15d + shrdl $11,%r14d,%r14d + xorl %eax,%r12d + vpshufd $80,%xmm1,%xmm7 + xorl %edx,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%ebx + vpsrld $10,%xmm7,%xmm6 + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + vpsrlq $17,%xmm7,%xmm7 + xorl %edx,%esi + addl %ebx,%r9d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + vpsrlq $2,%xmm7,%xmm7 + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + vpxor %xmm7,%xmm6,%xmm6 + xorl %r9d,%r13d + shrdl $9,%r14d,%r14d + xorl %r11d,%r12d + vpshufd $232,%xmm6,%xmm6 + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpslldq $8,%xmm6,%xmm6 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%esi + vpaddd %xmm6,%xmm1,%xmm1 + shrdl $11,%r14d,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + vpaddd 32(%rbp),%xmm1,%xmm6 + shrdl $6,%r13d,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + shrdl $2,%r14d,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,16(%rsp) + vpalignr $4,%xmm2,%xmm3,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + vpalignr $4,%xmm0,%xmm1,%xmm7 + xorl %r8d,%r13d + shrdl $9,%r14d,%r14d + xorl %r10d,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpaddd %xmm7,%xmm2,%xmm2 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + vpsrld $3,%xmm4,%xmm7 + shrdl $11,%r14d,%r14d + xorl %r10d,%r12d + xorl %ebx,%r15d + vpslld $14,%xmm4,%xmm5 + shrdl $6,%r13d,%r13d + addl %r12d,%r11d + andl %r15d,%esi + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%esi + vpshufd $250,%xmm1,%xmm7 + addl %r11d,%edx + shrdl $2,%r14d,%r14d + addl %esi,%r11d + vpsrld $11,%xmm6,%xmm6 + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%r11d + movl %r8d,%r12d + xorl %edx,%r13d + vpslld $11,%xmm5,%xmm5 + shrdl $9,%r14d,%r14d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %r11d,%r14d + andl %edx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %edx,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 36(%rsp),%r10d + movl %r11d,%esi + shrdl $11,%r14d,%r14d + vpxor %xmm5,%xmm4,%xmm4 + xorl %r9d,%r12d + xorl %eax,%esi + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + addl %r12d,%r10d + andl %esi,%r15d + xorl %r11d,%r14d + vpaddd %xmm4,%xmm2,%xmm2 + addl %r13d,%r10d + xorl %eax,%r15d + addl %r10d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $2,%r14d,%r14d + addl %r15d,%r10d + movl %ecx,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + vpxor %xmm7,%xmm6,%xmm6 + movl %edx,%r12d + xorl %ecx,%r13d + shrdl $9,%r14d,%r14d + vpshufd $132,%xmm6,%xmm6 + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + vpsrldq $8,%xmm6,%xmm6 + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + vpaddd %xmm6,%xmm2,%xmm2 + movl %r10d,%r15d + shrdl $11,%r14d,%r14d + xorl %r8d,%r12d + vpshufd $80,%xmm2,%xmm7 + xorl %r11d,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%r9d + vpsrld $10,%xmm7,%xmm6 + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + vpsrlq $17,%xmm7,%xmm7 + xorl %r11d,%esi + addl %r9d,%ebx + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + vpsrlq $2,%xmm7,%xmm7 + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + vpxor %xmm7,%xmm6,%xmm6 + xorl %ebx,%r13d + shrdl $9,%r14d,%r14d + xorl %edx,%r12d + vpshufd $232,%xmm6,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vpslldq $8,%xmm6,%xmm6 + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%esi + vpaddd %xmm6,%xmm2,%xmm2 + shrdl $11,%r14d,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + vpaddd 64(%rbp),%xmm2,%xmm6 + shrdl $6,%r13d,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + shrdl $2,%r14d,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,32(%rsp) + vpalignr $4,%xmm3,%xmm0,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + vpalignr $4,%xmm1,%xmm2,%xmm7 + xorl %eax,%r13d + shrdl $9,%r14d,%r14d + xorl %ecx,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpaddd %xmm7,%xmm3,%xmm3 + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + vpsrld $3,%xmm4,%xmm7 + shrdl $11,%r14d,%r14d + xorl %ecx,%r12d + xorl %r9d,%r15d + vpslld $14,%xmm4,%xmm5 + shrdl $6,%r13d,%r13d + addl %r12d,%edx + andl %r15d,%esi + vpxor %xmm6,%xmm7,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%esi + vpshufd $250,%xmm2,%xmm7 + addl %edx,%r11d + shrdl $2,%r14d,%r14d + addl %esi,%edx + vpsrld $11,%xmm6,%xmm6 + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%edx + movl %eax,%r12d + xorl %r11d,%r13d + vpslld $11,%xmm5,%xmm5 + shrdl $9,%r14d,%r14d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %edx,%r14d + andl %r11d,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r11d,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 52(%rsp),%ecx + movl %edx,%esi + shrdl $11,%r14d,%r14d + vpxor %xmm5,%xmm4,%xmm4 + xorl %ebx,%r12d + xorl %r8d,%esi + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + addl %r12d,%ecx + andl %esi,%r15d + xorl %edx,%r14d + vpaddd %xmm4,%xmm3,%xmm3 + addl %r13d,%ecx + xorl %r8d,%r15d + addl %ecx,%r10d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $2,%r14d,%r14d + addl %r15d,%ecx + movl %r10d,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + movl %r11d,%r12d + xorl %r10d,%r13d + shrdl $9,%r14d,%r14d + vpshufd $132,%xmm6,%xmm6 + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + vpsrldq $8,%xmm6,%xmm6 + andl %r10d,%r12d + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + vpaddd %xmm6,%xmm3,%xmm3 + movl %ecx,%r15d + shrdl $11,%r14d,%r14d + xorl %eax,%r12d + vpshufd $80,%xmm3,%xmm7 + xorl %edx,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%ebx + vpsrld $10,%xmm7,%xmm6 + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + vpsrlq $17,%xmm7,%xmm7 + xorl %edx,%esi + addl %ebx,%r9d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + vpsrlq $2,%xmm7,%xmm7 + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + vpxor %xmm7,%xmm6,%xmm6 + xorl %r9d,%r13d + shrdl $9,%r14d,%r14d + xorl %r11d,%r12d + vpshufd $232,%xmm6,%xmm6 + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpslldq $8,%xmm6,%xmm6 + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%esi + vpaddd %xmm6,%xmm3,%xmm3 + shrdl $11,%r14d,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + vpaddd 96(%rbp),%xmm3,%xmm6 + shrdl $6,%r13d,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + shrdl $2,%r14d,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,48(%rsp) + movq 64+0(%rsp),%r12 + vpand %xmm14,%xmm11,%xmm11 + movq 64+8(%rsp),%r15 + vpor %xmm11,%xmm8,%xmm8 + vmovdqu %xmm8,(%r15,%r12,1) + leaq 16(%r12),%r12 + cmpb $0,131(%rbp) + jne .Lavx_00_47 + vmovdqu (%r12),%xmm9 + movq %r12,64+0(%rsp) + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + xorl %r8d,%r13d + shrdl $9,%r14d,%r14d + xorl %r10d,%r12d + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + shrdl $11,%r14d,%r14d + xorl %r10d,%r12d + xorl %ebx,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%r11d + andl %r15d,%esi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%esi + addl %r11d,%edx + shrdl $2,%r14d,%r14d + addl %esi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + xorl %edx,%r13d + shrdl $9,%r14d,%r14d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + vpxor %xmm8,%xmm9,%xmm9 + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%esi + shrdl $11,%r14d,%r14d + xorl %r9d,%r12d + xorl %eax,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%r10d + andl %esi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + addl %r10d,%ecx + shrdl $2,%r14d,%r14d + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + movl %edx,%r12d + xorl %ecx,%r13d + shrdl $9,%r14d,%r14d + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + shrdl $11,%r14d,%r14d + xorl %r8d,%r12d + xorl %r11d,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%r9d + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%esi + addl %r9d,%ebx + shrdl $2,%r14d,%r14d + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + xorl %ebx,%r13d + shrdl $9,%r14d,%r14d + xorl %edx,%r12d + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%esi + shrdl $11,%r14d,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + shrdl $2,%r14d,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + xorl %eax,%r13d + shrdl $9,%r14d,%r14d + xorl %ecx,%r12d + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + shrdl $11,%r14d,%r14d + xorl %ecx,%r12d + xorl %r9d,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%edx + andl %r15d,%esi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%esi + addl %edx,%r11d + shrdl $2,%r14d,%r14d + addl %esi,%edx + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%edx + movl %eax,%r12d + xorl %r11d,%r13d + shrdl $9,%r14d,%r14d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%esi + shrdl $11,%r14d,%r14d + xorl %ebx,%r12d + xorl %r8d,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%ecx + andl %esi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + addl %ecx,%r10d + shrdl $2,%r14d,%r14d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + xorl %r10d,%r13d + shrdl $9,%r14d,%r14d + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + shrdl $11,%r14d,%r14d + xorl %eax,%r12d + xorl %edx,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%ebx + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%esi + addl %ebx,%r9d + shrdl $2,%r14d,%r14d + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + xorl %r9d,%r13d + shrdl $9,%r14d,%r14d + xorl %r11d,%r12d + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%esi + shrdl $11,%r14d,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + shrdl $2,%r14d,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + xorl %r8d,%r13d + shrdl $9,%r14d,%r14d + xorl %r10d,%r12d + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + shrdl $11,%r14d,%r14d + xorl %r10d,%r12d + xorl %ebx,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%r11d + andl %r15d,%esi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%esi + addl %r11d,%edx + shrdl $2,%r14d,%r14d + addl %esi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + xorl %edx,%r13d + shrdl $9,%r14d,%r14d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%esi + shrdl $11,%r14d,%r14d + xorl %r9d,%r12d + xorl %eax,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%r10d + andl %esi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + addl %r10d,%ecx + shrdl $2,%r14d,%r14d + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + movl %edx,%r12d + xorl %ecx,%r13d + shrdl $9,%r14d,%r14d + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + shrdl $11,%r14d,%r14d + xorl %r8d,%r12d + xorl %r11d,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%r9d + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%esi + addl %r9d,%ebx + shrdl $2,%r14d,%r14d + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + xorl %ebx,%r13d + shrdl $9,%r14d,%r14d + xorl %edx,%r12d + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%esi + shrdl $11,%r14d,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + shrdl $2,%r14d,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + xorl %eax,%r13d + shrdl $9,%r14d,%r14d + xorl %ecx,%r12d + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + shrdl $11,%r14d,%r14d + xorl %ecx,%r12d + xorl %r9d,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%edx + andl %r15d,%esi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%esi + addl %edx,%r11d + shrdl $2,%r14d,%r14d + addl %esi,%edx + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%edx + movl %eax,%r12d + xorl %r11d,%r13d + shrdl $9,%r14d,%r14d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%esi + shrdl $11,%r14d,%r14d + xorl %ebx,%r12d + xorl %r8d,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%ecx + andl %esi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + addl %ecx,%r10d + shrdl $2,%r14d,%r14d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + xorl %r10d,%r13d + shrdl $9,%r14d,%r14d + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + shrdl $11,%r14d,%r14d + xorl %eax,%r12d + xorl %edx,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%ebx + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%esi + addl %ebx,%r9d + shrdl $2,%r14d,%r14d + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + xorl %r9d,%r13d + shrdl $9,%r14d,%r14d + xorl %r11d,%r12d + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%esi + shrdl $11,%r14d,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + shrdl $2,%r14d,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq 64+0(%rsp),%r12 + movq 64+8(%rsp),%r13 + movq 64+40(%rsp),%r15 + movq 64+48(%rsp),%rsi + + vpand %xmm14,%xmm11,%xmm11 + movl %r14d,%eax + vpor %xmm11,%xmm8,%xmm8 + vmovdqu %xmm8,(%r12,%r13,1) + leaq 16(%r12),%r12 + + addl 0(%r15),%eax + addl 4(%r15),%ebx + addl 8(%r15),%ecx + addl 12(%r15),%edx + addl 16(%r15),%r8d + addl 20(%r15),%r9d + addl 24(%r15),%r10d + addl 28(%r15),%r11d + + cmpq 64+16(%rsp),%r12 + + movl %eax,0(%r15) + movl %ebx,4(%r15) + movl %ecx,8(%r15) + movl %edx,12(%r15) + movl %r8d,16(%r15) + movl %r9d,20(%r15) + movl %r10d,24(%r15) + movl %r11d,28(%r15) + jb .Lloop_avx + + movq 64+32(%rsp),%r8 + movq 120(%rsp),%rsi +.cfi_def_cfa %rsi,8 + vmovdqu %xmm8,(%r8) + vzeroall + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_avx: + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_cbc_sha256_enc_avx,.-aesni_cbc_sha256_enc_avx +.type aesni_cbc_sha256_enc_avx2,@function +.align 64 +aesni_cbc_sha256_enc_avx2: +.cfi_startproc +.Lavx2_shortcut: + movq 8(%rsp),%r10 + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + subq $576,%rsp + andq $-1024,%rsp + addq $448,%rsp + + shlq $6,%rdx + subq %rdi,%rsi + subq %rdi,%r10 + addq %rdi,%rdx + + + + movq %rdx,64+16(%rsp) + + movq %r8,64+32(%rsp) + movq %r9,64+40(%rsp) + movq %r10,64+48(%rsp) + movq %rax,120(%rsp) +.cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08 +.Lprologue_avx2: + vzeroall + + movq %rdi,%r13 + vpinsrq $1,%rsi,%xmm15,%xmm15 + leaq 128(%rcx),%rdi + leaq K256+544(%rip),%r12 + movl 240-128(%rdi),%r14d + movq %r9,%r15 + movq %r10,%rsi + vmovdqu (%r8),%xmm8 + leaq -9(%r14),%r14 + + vmovdqa 0(%r12,%r14,8),%xmm14 + vmovdqa 16(%r12,%r14,8),%xmm13 + vmovdqa 32(%r12,%r14,8),%xmm12 + + subq $-64,%r13 + movl 0(%r15),%eax + leaq (%rsi,%r13,1),%r12 + movl 4(%r15),%ebx + cmpq %rdx,%r13 + movl 8(%r15),%ecx + cmoveq %rsp,%r12 + movl 12(%r15),%edx + movl 16(%r15),%r8d + movl 20(%r15),%r9d + movl 24(%r15),%r10d + movl 28(%r15),%r11d + vmovdqu 0-128(%rdi),%xmm10 + jmp .Loop_avx2 +.align 16 +.Loop_avx2: + vmovdqa K256+512(%rip),%ymm7 + vmovdqu -64+0(%rsi,%r13,1),%xmm0 + vmovdqu -64+16(%rsi,%r13,1),%xmm1 + vmovdqu -64+32(%rsi,%r13,1),%xmm2 + vmovdqu -64+48(%rsi,%r13,1),%xmm3 + + vinserti128 $1,(%r12),%ymm0,%ymm0 + vinserti128 $1,16(%r12),%ymm1,%ymm1 + vpshufb %ymm7,%ymm0,%ymm0 + vinserti128 $1,32(%r12),%ymm2,%ymm2 + vpshufb %ymm7,%ymm1,%ymm1 + vinserti128 $1,48(%r12),%ymm3,%ymm3 + + leaq K256(%rip),%rbp + vpshufb %ymm7,%ymm2,%ymm2 + leaq -64(%r13),%r13 + vpaddd 0(%rbp),%ymm0,%ymm4 + vpshufb %ymm7,%ymm3,%ymm3 + vpaddd 32(%rbp),%ymm1,%ymm5 + vpaddd 64(%rbp),%ymm2,%ymm6 + vpaddd 96(%rbp),%ymm3,%ymm7 + vmovdqa %ymm4,0(%rsp) + xorl %r14d,%r14d + vmovdqa %ymm5,32(%rsp) + leaq -64(%rsp),%rsp + movl %ebx,%esi + vmovdqa %ymm6,0(%rsp) + xorl %ecx,%esi + vmovdqa %ymm7,32(%rsp) + movl %r9d,%r12d + subq $-32*4,%rbp + jmp .Lavx2_00_47 + +.align 16 +.Lavx2_00_47: + vmovdqu (%r13),%xmm9 + vpinsrq $0,%r13,%xmm15,%xmm15 + leaq -64(%rsp),%rsp + vpalignr $4,%ymm0,%ymm1,%ymm4 + addl 0+128(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + vpalignr $4,%ymm2,%ymm3,%ymm7 + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + vpsrld $7,%ymm4,%ymm6 + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + vpaddd %ymm7,%ymm0,%ymm0 + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%esi + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ebx,%esi + vpshufd $250,%ymm3,%ymm7 + xorl %r13d,%r14d + leal (%r11,%rsi,1),%r11d + movl %r8d,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 4+128(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%edx,%esi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + vpslld $11,%ymm5,%ymm5 + andnl %r9d,%edx,%r12d + xorl %esi,%r13d + rorxl $6,%edx,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%esi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%esi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + vpsrlq $17,%ymm7,%ymm7 + andl %esi,%r15d + vpxor %xmm8,%xmm9,%xmm9 + xorl %r12d,%r14d + xorl %eax,%r15d + vpaddd %ymm4,%ymm0,%ymm0 + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 8+128(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + vpxor %ymm7,%ymm6,%ymm6 + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + vpshufd $132,%ymm6,%ymm6 + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + vpsrldq $8,%ymm6,%ymm6 + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + vpaddd %ymm6,%ymm0,%ymm0 + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + vpshufd $80,%ymm0,%ymm7 + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r11d,%esi + vpsrld $10,%ymm7,%ymm6 + xorl %r13d,%r14d + leal (%r9,%rsi,1),%r9d + movl %ecx,%r12d + vpsrlq $17,%ymm7,%ymm7 + addl 12+128(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + vpxor %ymm7,%ymm6,%ymm6 + rorxl $11,%ebx,%esi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + vpsrlq $2,%ymm7,%ymm7 + andnl %edx,%ebx,%r12d + xorl %esi,%r13d + rorxl $6,%ebx,%r14d + vpxor %ymm7,%ymm6,%ymm6 + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%esi + vpshufd $232,%ymm6,%ymm6 + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%esi + vpslldq $8,%ymm6,%ymm6 + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + vpaddd %ymm6,%ymm0,%ymm0 + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r10d,%r15d + vpaddd 0(%rbp),%ymm0,%ymm6 + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + vmovdqa %ymm6,0(%rsp) + vpalignr $4,%ymm1,%ymm2,%ymm4 + addl 32+128(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + vpalignr $4,%ymm3,%ymm0,%ymm7 + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + vpsrld $7,%ymm4,%ymm6 + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + vpaddd %ymm7,%ymm1,%ymm1 + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r9d,%esi + vpshufd $250,%ymm0,%ymm7 + xorl %r13d,%r14d + leal (%rdx,%rsi,1),%edx + movl %eax,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 36+128(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%r11d,%esi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + vpslld $11,%ymm5,%ymm5 + andnl %ebx,%r11d,%r12d + xorl %esi,%r13d + rorxl $6,%r11d,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%esi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%esi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + vpsrlq $17,%ymm7,%ymm7 + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r8d,%r15d + vpaddd %ymm4,%ymm1,%ymm1 + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 40+128(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + vpxor %ymm7,%ymm6,%ymm6 + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + vpshufd $132,%ymm6,%ymm6 + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + vpsrldq $8,%ymm6,%ymm6 + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + vpaddd %ymm6,%ymm1,%ymm1 + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + vpshufd $80,%ymm1,%ymm7 + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %edx,%esi + vpsrld $10,%ymm7,%ymm6 + xorl %r13d,%r14d + leal (%rbx,%rsi,1),%ebx + movl %r10d,%r12d + vpsrlq $17,%ymm7,%ymm7 + addl 44+128(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + vpxor %ymm7,%ymm6,%ymm6 + rorxl $11,%r9d,%esi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + vpsrlq $2,%ymm7,%ymm7 + andnl %r11d,%r9d,%r12d + xorl %esi,%r13d + rorxl $6,%r9d,%r14d + vpxor %ymm7,%ymm6,%ymm6 + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%esi + vpshufd $232,%ymm6,%ymm6 + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%esi + vpslldq $8,%ymm6,%ymm6 + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + vpaddd %ymm6,%ymm1,%ymm1 + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ecx,%r15d + vpaddd 32(%rbp),%ymm1,%ymm6 + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + vmovdqa %ymm6,32(%rsp) + leaq -64(%rsp),%rsp + vpalignr $4,%ymm2,%ymm3,%ymm4 + addl 0+128(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + vpalignr $4,%ymm0,%ymm1,%ymm7 + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + vpsrld $7,%ymm4,%ymm6 + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + vpaddd %ymm7,%ymm2,%ymm2 + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ebx,%esi + vpshufd $250,%ymm1,%ymm7 + xorl %r13d,%r14d + leal (%r11,%rsi,1),%r11d + movl %r8d,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 4+128(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%edx,%esi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + vpslld $11,%ymm5,%ymm5 + andnl %r9d,%edx,%r12d + xorl %esi,%r13d + rorxl $6,%edx,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%esi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%esi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + vpsrlq $17,%ymm7,%ymm7 + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %eax,%r15d + vpaddd %ymm4,%ymm2,%ymm2 + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 8+128(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + vpxor %ymm7,%ymm6,%ymm6 + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + vpshufd $132,%ymm6,%ymm6 + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + vpsrldq $8,%ymm6,%ymm6 + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + vpaddd %ymm6,%ymm2,%ymm2 + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + vpshufd $80,%ymm2,%ymm7 + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r11d,%esi + vpsrld $10,%ymm7,%ymm6 + xorl %r13d,%r14d + leal (%r9,%rsi,1),%r9d + movl %ecx,%r12d + vpsrlq $17,%ymm7,%ymm7 + addl 12+128(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + vpxor %ymm7,%ymm6,%ymm6 + rorxl $11,%ebx,%esi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + vpsrlq $2,%ymm7,%ymm7 + andnl %edx,%ebx,%r12d + xorl %esi,%r13d + rorxl $6,%ebx,%r14d + vpxor %ymm7,%ymm6,%ymm6 + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%esi + vpshufd $232,%ymm6,%ymm6 + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%esi + vpslldq $8,%ymm6,%ymm6 + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + vpaddd %ymm6,%ymm2,%ymm2 + andl %esi,%r15d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r10d,%r15d + vpaddd 64(%rbp),%ymm2,%ymm6 + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + vmovdqa %ymm6,0(%rsp) + vpalignr $4,%ymm3,%ymm0,%ymm4 + addl 32+128(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + vpalignr $4,%ymm1,%ymm2,%ymm7 + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + vpsrld $7,%ymm4,%ymm6 + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + vpaddd %ymm7,%ymm3,%ymm3 + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%esi + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r9d,%esi + vpshufd $250,%ymm2,%ymm7 + xorl %r13d,%r14d + leal (%rdx,%rsi,1),%edx + movl %eax,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 36+128(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%r11d,%esi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + vpslld $11,%ymm5,%ymm5 + andnl %ebx,%r11d,%r12d + xorl %esi,%r13d + rorxl $6,%r11d,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%esi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%esi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + vpsrlq $17,%ymm7,%ymm7 + andl %esi,%r15d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r8d,%r15d + vpaddd %ymm4,%ymm3,%ymm3 + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 40+128(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + vpxor %ymm7,%ymm6,%ymm6 + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + vpshufd $132,%ymm6,%ymm6 + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + vpsrldq $8,%ymm6,%ymm6 + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + vpaddd %ymm6,%ymm3,%ymm3 + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + vpshufd $80,%ymm3,%ymm7 + andl %r15d,%esi + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %edx,%esi + vpsrld $10,%ymm7,%ymm6 + xorl %r13d,%r14d + leal (%rbx,%rsi,1),%ebx + movl %r10d,%r12d + vpsrlq $17,%ymm7,%ymm7 + addl 44+128(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + vpxor %ymm7,%ymm6,%ymm6 + rorxl $11,%r9d,%esi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + vpsrlq $2,%ymm7,%ymm7 + andnl %r11d,%r9d,%r12d + xorl %esi,%r13d + rorxl $6,%r9d,%r14d + vpxor %ymm7,%ymm6,%ymm6 + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%esi + vpshufd $232,%ymm6,%ymm6 + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%esi + vpslldq $8,%ymm6,%ymm6 + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + vpaddd %ymm6,%ymm3,%ymm3 + andl %esi,%r15d + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ecx,%r15d + vpaddd 96(%rbp),%ymm3,%ymm6 + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + vmovdqa %ymm6,32(%rsp) + vmovq %xmm15,%r13 + vpextrq $1,%xmm15,%r15 + vpand %xmm14,%xmm11,%xmm11 + vpor %xmm11,%xmm8,%xmm8 + vmovdqu %xmm8,(%r15,%r13,1) + leaq 16(%r13),%r13 + leaq 128(%rbp),%rbp + cmpb $0,3(%rbp) + jne .Lavx2_00_47 + vmovdqu (%r13),%xmm9 + vpinsrq $0,%r13,%xmm15,%xmm15 + addl 0+64(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%esi + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ebx,%esi + xorl %r13d,%r14d + leal (%r11,%rsi,1),%r11d + movl %r8d,%r12d + addl 4+64(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%esi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %esi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%esi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%esi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %esi,%r15d + vpxor %xmm8,%xmm9,%xmm9 + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8+64(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r11d,%esi + xorl %r13d,%r14d + leal (%r9,%rsi,1),%r9d + movl %ecx,%r12d + addl 12+64(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%esi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %esi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%esi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%esi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32+64(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r9d,%esi + xorl %r13d,%r14d + leal (%rdx,%rsi,1),%edx + movl %eax,%r12d + addl 36+64(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%esi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %esi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%esi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%esi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40+64(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %edx,%esi + xorl %r13d,%r14d + leal (%rbx,%rsi,1),%ebx + movl %r10d,%r12d + addl 44+64(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%esi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %esi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%esi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%esi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + addl 0(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ebx,%esi + xorl %r13d,%r14d + leal (%r11,%rsi,1),%r11d + movl %r8d,%r12d + addl 4(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%esi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %esi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%esi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%esi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r11d,%esi + xorl %r13d,%r14d + leal (%r9,%rsi,1),%r9d + movl %ecx,%r12d + addl 12(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%esi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %esi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%esi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%esi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %esi,%r15d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%esi + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r9d,%esi + xorl %r13d,%r14d + leal (%rdx,%rsi,1),%edx + movl %eax,%r12d + addl 36(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%esi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %esi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%esi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%esi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %esi,%r15d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%esi + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %edx,%esi + xorl %r13d,%r14d + leal (%rbx,%rsi,1),%ebx + movl %r10d,%r12d + addl 44(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%esi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %esi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%esi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%esi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %esi,%r15d + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + vpextrq $1,%xmm15,%r12 + vmovq %xmm15,%r13 + movq 552(%rsp),%r15 + addl %r14d,%eax + leaq 448(%rsp),%rbp + + vpand %xmm14,%xmm11,%xmm11 + vpor %xmm11,%xmm8,%xmm8 + vmovdqu %xmm8,(%r12,%r13,1) + leaq 16(%r13),%r13 + + addl 0(%r15),%eax + addl 4(%r15),%ebx + addl 8(%r15),%ecx + addl 12(%r15),%edx + addl 16(%r15),%r8d + addl 20(%r15),%r9d + addl 24(%r15),%r10d + addl 28(%r15),%r11d + + movl %eax,0(%r15) + movl %ebx,4(%r15) + movl %ecx,8(%r15) + movl %edx,12(%r15) + movl %r8d,16(%r15) + movl %r9d,20(%r15) + movl %r10d,24(%r15) + movl %r11d,28(%r15) + + cmpq 80(%rbp),%r13 + je .Ldone_avx2 + + xorl %r14d,%r14d + movl %ebx,%esi + movl %r9d,%r12d + xorl %ecx,%esi + jmp .Lower_avx2 +.align 16 +.Lower_avx2: + vmovdqu (%r13),%xmm9 + vpinsrq $0,%r13,%xmm15,%xmm15 + addl 0+16(%rbp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%esi + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ebx,%esi + xorl %r13d,%r14d + leal (%r11,%rsi,1),%r11d + movl %r8d,%r12d + addl 4+16(%rbp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%esi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %esi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%esi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%esi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %esi,%r15d + vpxor %xmm8,%xmm9,%xmm9 + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8+16(%rbp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r11d,%esi + xorl %r13d,%r14d + leal (%r9,%rsi,1),%r9d + movl %ecx,%r12d + addl 12+16(%rbp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%esi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %esi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%esi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%esi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32+16(%rbp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r9d,%esi + xorl %r13d,%r14d + leal (%rdx,%rsi,1),%edx + movl %eax,%r12d + addl 36+16(%rbp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%esi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %esi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%esi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%esi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40+16(%rbp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %edx,%esi + xorl %r13d,%r14d + leal (%rbx,%rsi,1),%ebx + movl %r10d,%r12d + addl 44+16(%rbp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%esi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %esi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%esi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%esi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + leaq -64(%rbp),%rbp + addl 0+16(%rbp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ebx,%esi + xorl %r13d,%r14d + leal (%r11,%rsi,1),%r11d + movl %r8d,%r12d + addl 4+16(%rbp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%esi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %esi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%esi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%esi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8+16(%rbp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r11d,%esi + xorl %r13d,%r14d + leal (%r9,%rsi,1),%r9d + movl %ecx,%r12d + addl 12+16(%rbp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%esi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %esi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%esi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%esi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %esi,%r15d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32+16(%rbp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%esi + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r9d,%esi + xorl %r13d,%r14d + leal (%rdx,%rsi,1),%edx + movl %eax,%r12d + addl 36+16(%rbp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%esi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %esi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%esi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%esi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %esi,%r15d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40+16(%rbp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%esi + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %edx,%esi + xorl %r13d,%r14d + leal (%rbx,%rsi,1),%ebx + movl %r10d,%r12d + addl 44+16(%rbp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%esi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %esi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%esi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%esi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %esi,%r15d + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + vmovq %xmm15,%r13 + vpextrq $1,%xmm15,%r15 + vpand %xmm14,%xmm11,%xmm11 + vpor %xmm11,%xmm8,%xmm8 + leaq -64(%rbp),%rbp + vmovdqu %xmm8,(%r15,%r13,1) + leaq 16(%r13),%r13 + cmpq %rsp,%rbp + jae .Lower_avx2 + + movq 552(%rsp),%r15 + leaq 64(%r13),%r13 + movq 560(%rsp),%rsi + addl %r14d,%eax + leaq 448(%rsp),%rsp + + addl 0(%r15),%eax + addl 4(%r15),%ebx + addl 8(%r15),%ecx + addl 12(%r15),%edx + addl 16(%r15),%r8d + addl 20(%r15),%r9d + addl 24(%r15),%r10d + leaq (%rsi,%r13,1),%r12 + addl 28(%r15),%r11d + + cmpq 64+16(%rsp),%r13 + + movl %eax,0(%r15) + cmoveq %rsp,%r12 + movl %ebx,4(%r15) + movl %ecx,8(%r15) + movl %edx,12(%r15) + movl %r8d,16(%r15) + movl %r9d,20(%r15) + movl %r10d,24(%r15) + movl %r11d,28(%r15) + + jbe .Loop_avx2 + leaq (%rsp),%rbp + +.Ldone_avx2: + leaq (%rbp),%rsp + movq 64+32(%rsp),%r8 + movq 120(%rsp),%rsi +.cfi_def_cfa %rsi,8 + vmovdqu %xmm8,(%r8) + vzeroall + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_avx2: + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_cbc_sha256_enc_avx2,.-aesni_cbc_sha256_enc_avx2 +.type aesni_cbc_sha256_enc_shaext,@function +.align 32 +aesni_cbc_sha256_enc_shaext: + movq 8(%rsp),%r10 + leaq K256+128(%rip),%rax + movdqu (%r9),%xmm1 + movdqu 16(%r9),%xmm2 + movdqa 512-128(%rax),%xmm3 + + movl 240(%rcx),%r11d + subq %rdi,%rsi + movups (%rcx),%xmm15 + movups (%r8),%xmm6 + movups 16(%rcx),%xmm4 + leaq 112(%rcx),%rcx + + pshufd $0x1b,%xmm1,%xmm0 + pshufd $0xb1,%xmm1,%xmm1 + pshufd $0x1b,%xmm2,%xmm2 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,202,8 + punpcklqdq %xmm0,%xmm2 + + jmp .Loop_shaext + +.align 16 +.Loop_shaext: + movdqu (%r10),%xmm10 + movdqu 16(%r10),%xmm11 + movdqu 32(%r10),%xmm12 +.byte 102,68,15,56,0,211 + movdqu 48(%r10),%xmm13 + + movdqa 0-128(%rax),%xmm0 + paddd %xmm10,%xmm0 +.byte 102,68,15,56,0,219 + movdqa %xmm2,%xmm9 + movdqa %xmm1,%xmm8 + movups 0(%rdi),%xmm14 + xorps %xmm15,%xmm14 + xorps %xmm14,%xmm6 + movups -80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movups -64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,202 + + movdqa 32-128(%rax),%xmm0 + paddd %xmm11,%xmm0 +.byte 102,68,15,56,0,227 + leaq 64(%r10),%r10 + movups -48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movups -32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,202 + + movdqa 64-128(%rax),%xmm0 + paddd %xmm12,%xmm0 +.byte 102,68,15,56,0,235 +.byte 69,15,56,204,211 + movups -16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm13,%xmm3 +.byte 102,65,15,58,15,220,4 + paddd %xmm3,%xmm10 + movups 0(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,202 + + movdqa 96-128(%rax),%xmm0 + paddd %xmm13,%xmm0 +.byte 69,15,56,205,213 +.byte 69,15,56,204,220 + movups 16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movups 32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movdqa %xmm10,%xmm3 +.byte 102,65,15,58,15,221,4 + paddd %xmm3,%xmm11 +.byte 15,56,203,202 + movdqa 128-128(%rax),%xmm0 + paddd %xmm10,%xmm0 +.byte 69,15,56,205,218 +.byte 69,15,56,204,229 + movups 48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm11,%xmm3 +.byte 102,65,15,58,15,218,4 + paddd %xmm3,%xmm12 + cmpl $11,%r11d + jb .Laesenclast1 + movups 64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + je .Laesenclast1 + movups 96(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 112(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.Laesenclast1: + aesenclast %xmm5,%xmm6 + movups 16-112(%rcx),%xmm4 + nop +.byte 15,56,203,202 + movups 16(%rdi),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm6,0(%rsi,%rdi,1) + xorps %xmm14,%xmm6 + movups -80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + movdqa 160-128(%rax),%xmm0 + paddd %xmm11,%xmm0 +.byte 69,15,56,205,227 +.byte 69,15,56,204,234 + movups -64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm12,%xmm3 +.byte 102,65,15,58,15,219,4 + paddd %xmm3,%xmm13 + movups -48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 192-128(%rax),%xmm0 + paddd %xmm12,%xmm0 +.byte 69,15,56,205,236 +.byte 69,15,56,204,211 + movups -32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm13,%xmm3 +.byte 102,65,15,58,15,220,4 + paddd %xmm3,%xmm10 + movups -16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 224-128(%rax),%xmm0 + paddd %xmm13,%xmm0 +.byte 69,15,56,205,213 +.byte 69,15,56,204,220 + movups 0(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm10,%xmm3 +.byte 102,65,15,58,15,221,4 + paddd %xmm3,%xmm11 + movups 16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 256-128(%rax),%xmm0 + paddd %xmm10,%xmm0 +.byte 69,15,56,205,218 +.byte 69,15,56,204,229 + movups 32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm11,%xmm3 +.byte 102,65,15,58,15,218,4 + paddd %xmm3,%xmm12 + movups 48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + cmpl $11,%r11d + jb .Laesenclast2 + movups 64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + je .Laesenclast2 + movups 96(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 112(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.Laesenclast2: + aesenclast %xmm5,%xmm6 + movups 16-112(%rcx),%xmm4 + nop +.byte 15,56,203,202 + movups 32(%rdi),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm6,16(%rsi,%rdi,1) + xorps %xmm14,%xmm6 + movups -80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + movdqa 288-128(%rax),%xmm0 + paddd %xmm11,%xmm0 +.byte 69,15,56,205,227 +.byte 69,15,56,204,234 + movups -64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm12,%xmm3 +.byte 102,65,15,58,15,219,4 + paddd %xmm3,%xmm13 + movups -48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 320-128(%rax),%xmm0 + paddd %xmm12,%xmm0 +.byte 69,15,56,205,236 +.byte 69,15,56,204,211 + movups -32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm13,%xmm3 +.byte 102,65,15,58,15,220,4 + paddd %xmm3,%xmm10 + movups -16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 352-128(%rax),%xmm0 + paddd %xmm13,%xmm0 +.byte 69,15,56,205,213 +.byte 69,15,56,204,220 + movups 0(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm10,%xmm3 +.byte 102,65,15,58,15,221,4 + paddd %xmm3,%xmm11 + movups 16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 384-128(%rax),%xmm0 + paddd %xmm10,%xmm0 +.byte 69,15,56,205,218 +.byte 69,15,56,204,229 + movups 32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm11,%xmm3 +.byte 102,65,15,58,15,218,4 + paddd %xmm3,%xmm12 + movups 48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 416-128(%rax),%xmm0 + paddd %xmm11,%xmm0 +.byte 69,15,56,205,227 +.byte 69,15,56,204,234 + cmpl $11,%r11d + jb .Laesenclast3 + movups 64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + je .Laesenclast3 + movups 96(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 112(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.Laesenclast3: + aesenclast %xmm5,%xmm6 + movups 16-112(%rcx),%xmm4 + nop +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm12,%xmm3 +.byte 102,65,15,58,15,219,4 + paddd %xmm3,%xmm13 + movups 48(%rdi),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm6,32(%rsi,%rdi,1) + xorps %xmm14,%xmm6 + movups -80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + movups -64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,202 + + movdqa 448-128(%rax),%xmm0 + paddd %xmm12,%xmm0 +.byte 69,15,56,205,236 + movdqa %xmm7,%xmm3 + movups -48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movups -32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,202 + + movdqa 480-128(%rax),%xmm0 + paddd %xmm13,%xmm0 + movups -16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + movups 0(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movups 16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + + movups 32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + cmpl $11,%r11d + jb .Laesenclast4 + movups 64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + je .Laesenclast4 + movups 96(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 112(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.Laesenclast4: + aesenclast %xmm5,%xmm6 + movups 16-112(%rcx),%xmm4 + nop + + paddd %xmm9,%xmm2 + paddd %xmm8,%xmm1 + + decq %rdx + movups %xmm6,48(%rsi,%rdi,1) + leaq 64(%rdi),%rdi + jnz .Loop_shaext + + pshufd $0xb1,%xmm2,%xmm2 + pshufd $0x1b,%xmm1,%xmm3 + pshufd $0xb1,%xmm1,%xmm1 + punpckhqdq %xmm2,%xmm1 +.byte 102,15,58,15,211,8 + + movups %xmm6,(%r8) + movdqu %xmm1,(%r9) + movdqu %xmm2,16(%r9) + .byte 0xf3,0xc3 +.size aesni_cbc_sha256_enc_shaext,.-aesni_cbc_sha256_enc_shaext -- cgit v1.2.3