.text .globl _aesni_cbc_sha256_enc .p2align 4 _aesni_cbc_sha256_enc: leaq _OPENSSL_ia32cap_P(%rip),%r11 movl $1,%eax cmpq $0,%rdi je L$probe movl 0(%r11),%eax movq 4(%r11),%r10 btq $61,%r10 jc aesni_cbc_sha256_enc_shaext movq %r10,%r11 shrq $32,%r11 testl $2048,%r10d jnz aesni_cbc_sha256_enc_xop andl $296,%r11d cmpl $296,%r11d je aesni_cbc_sha256_enc_avx2 andl $268435456,%r10d jnz aesni_cbc_sha256_enc_avx ud2 xorl %eax,%eax cmpq $0,%rdi je L$probe ud2 L$probe: .byte 0xf3,0xc3 .p2align 6 K256: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f .long 0,0,0,0, 0,0,0,0, -1,-1,-1,-1 .long 0,0,0,0, 0,0,0,0 .byte 65,69,83,78,73,45,67,66,67,43,83,72,65,50,53,54,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .p2align 6 .p2align 6 aesni_cbc_sha256_enc_xop: L$xop_shortcut: movq 8(%rsp),%r10 movq %rsp,%rax pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 subq $128,%rsp andq $-64,%rsp shlq $6,%rdx subq %rdi,%rsi subq %rdi,%r10 addq %rdi,%rdx movq %rsi,64+8(%rsp) movq %rdx,64+16(%rsp) movq %r8,64+32(%rsp) movq %r9,64+40(%rsp) movq %r10,64+48(%rsp) movq %rax,120(%rsp) L$prologue_xop: vzeroall movq %rdi,%r12 leaq 128(%rcx),%rdi leaq K256+544(%rip),%r13 movl 240-128(%rdi),%r14d movq %r9,%r15 movq %r10,%rsi vmovdqu (%r8),%xmm8 subq $9,%r14 movl 0(%r15),%eax movl 4(%r15),%ebx movl 8(%r15),%ecx movl 12(%r15),%edx movl 16(%r15),%r8d movl 20(%r15),%r9d movl 24(%r15),%r10d movl 28(%r15),%r11d vmovdqa 0(%r13,%r14,8),%xmm14 vmovdqa 16(%r13,%r14,8),%xmm13 vmovdqa 32(%r13,%r14,8),%xmm12 vmovdqu 0-128(%rdi),%xmm10 jmp L$loop_xop .p2align 4 L$loop_xop: vmovdqa K256+512(%rip),%xmm7 vmovdqu 0(%rsi,%r12,1),%xmm0 vmovdqu 16(%rsi,%r12,1),%xmm1 vmovdqu 32(%rsi,%r12,1),%xmm2 vmovdqu 48(%rsi,%r12,1),%xmm3 vpshufb %xmm7,%xmm0,%xmm0 leaq K256(%rip),%rbp vpshufb %xmm7,%xmm1,%xmm1 vpshufb %xmm7,%xmm2,%xmm2 vpaddd 0(%rbp),%xmm0,%xmm4 vpshufb %xmm7,%xmm3,%xmm3 vpaddd 32(%rbp),%xmm1,%xmm5 vpaddd 64(%rbp),%xmm2,%xmm6 vpaddd 96(%rbp),%xmm3,%xmm7 vmovdqa %xmm4,0(%rsp) movl %eax,%r14d vmovdqa %xmm5,16(%rsp) movl %ebx,%esi vmovdqa %xmm6,32(%rsp) xorl %ecx,%esi vmovdqa %xmm7,48(%rsp) movl %r8d,%r13d jmp L$xop_00_47 .p2align 4 L$xop_00_47: subq $-32*4,%rbp vmovdqu (%r12),%xmm9 movq %r12,64+0(%rsp) vpalignr $4,%xmm0,%xmm1,%xmm4 rorl $14,%r13d movl %r14d,%eax vpalignr $4,%xmm2,%xmm3,%xmm7 movl %r9d,%r12d xorl %r8d,%r13d .byte 143,232,120,194,236,14 rorl $9,%r14d xorl %r10d,%r12d vpsrld $3,%xmm4,%xmm4 rorl $5,%r13d xorl %eax,%r14d vpaddd %xmm7,%xmm0,%xmm0 andl %r8d,%r12d vpxor %xmm10,%xmm9,%xmm9 vmovdqu 16-128(%rdi),%xmm10 xorl %r8d,%r13d addl 0(%rsp),%r11d movl %eax,%r15d .byte 143,232,120,194,245,11 rorl $11,%r14d xorl %r10d,%r12d vpxor %xmm5,%xmm4,%xmm4 xorl %ebx,%r15d rorl $6,%r13d addl %r12d,%r11d andl %r15d,%esi .byte 143,232,120,194,251,13 xorl %eax,%r14d addl %r13d,%r11d vpxor %xmm6,%xmm4,%xmm4 xorl %ebx,%esi addl %r11d,%edx vpsrld $10,%xmm3,%xmm6 rorl $2,%r14d addl %esi,%r11d vpaddd %xmm4,%xmm0,%xmm0 movl %edx,%r13d addl %r11d,%r14d .byte 143,232,120,194,239,2 rorl $14,%r13d movl %r14d,%r11d vpxor %xmm6,%xmm7,%xmm7 movl %r8d,%r12d xorl %edx,%r13d rorl $9,%r14d xorl %r9d,%r12d vpxor %xmm5,%xmm7,%xmm7 rorl $5,%r13d xorl %r11d,%r14d andl %edx,%r12d vpxor %xmm8,%xmm9,%xmm9 xorl %edx,%r13d vpsrldq $8,%xmm7,%xmm7 addl 4(%rsp),%r10d movl %r11d,%esi rorl $11,%r14d xorl %r9d,%r12d vpaddd %xmm7,%xmm0,%xmm0 xorl %eax,%esi rorl $6,%r13d addl %r12d,%r10d andl %esi,%r15d .byte 143,232,120,194,248,13 xorl %r11d,%r14d addl %r13d,%r10d vpsrld $10,%xmm0,%xmm6 xorl %eax,%r15d addl %r10d,%ecx .byte 143,232,120,194,239,2 rorl $2,%r14d addl %r15d,%r10d vpxor %xmm6,%xmm7,%xmm7 movl %ecx,%r13d addl %r10d,%r14d rorl $14,%r13d movl %r14d,%r10d vpxor %xmm5,%xmm7,%xmm7 movl %edx,%r12d xorl %ecx,%r13d rorl $9,%r14d xorl %r8d,%r12d vpslldq $8,%xmm7,%xmm7 rorl $5,%r13d xorl %r10d,%r14d andl %ecx,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 32-128(%rdi),%xmm10 xorl %ecx,%r13d vpaddd %xmm7,%xmm0,%xmm0 addl 8(%rsp),%r9d movl %r10d,%r15d rorl $11,%r14d xorl %r8d,%r12d vpaddd 0(%rbp),%xmm0,%xmm6 xorl %r11d,%r15d rorl $6,%r13d addl %r12d,%r9d andl %r15d,%esi xorl %r10d,%r14d addl %r13d,%r9d xorl %r11d,%esi addl %r9d,%ebx rorl $2,%r14d addl %esi,%r9d movl %ebx,%r13d addl %r9d,%r14d rorl $14,%r13d movl %r14d,%r9d movl %ecx,%r12d xorl %ebx,%r13d rorl $9,%r14d xorl %edx,%r12d rorl $5,%r13d xorl %r9d,%r14d andl %ebx,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 48-128(%rdi),%xmm10 xorl %ebx,%r13d addl 12(%rsp),%r8d movl %r9d,%esi rorl $11,%r14d xorl %edx,%r12d xorl %r10d,%esi rorl $6,%r13d addl %r12d,%r8d andl %esi,%r15d xorl %r9d,%r14d addl %r13d,%r8d xorl %r10d,%r15d addl %r8d,%eax rorl $2,%r14d addl %r15d,%r8d movl %eax,%r13d addl %r8d,%r14d vmovdqa %xmm6,0(%rsp) vpalignr $4,%xmm1,%xmm2,%xmm4 rorl $14,%r13d movl %r14d,%r8d vpalignr $4,%xmm3,%xmm0,%xmm7 movl %ebx,%r12d xorl %eax,%r13d .byte 143,232,120,194,236,14 rorl $9,%r14d xorl %ecx,%r12d vpsrld $3,%xmm4,%xmm4 rorl $5,%r13d xorl %r8d,%r14d vpaddd %xmm7,%xmm1,%xmm1 andl %eax,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 64-128(%rdi),%xmm10 xorl %eax,%r13d addl 16(%rsp),%edx movl %r8d,%r15d .byte 143,232,120,194,245,11 rorl $11,%r14d xorl %ecx,%r12d vpxor %xmm5,%xmm4,%xmm4 xorl %r9d,%r15d rorl $6,%r13d addl %r12d,%edx andl %r15d,%esi .byte 143,232,120,194,248,13 xorl %r8d,%r14d addl %r13d,%edx vpxor %xmm6,%xmm4,%xmm4 xorl %r9d,%esi addl %edx,%r11d vpsrld $10,%xmm0,%xmm6 rorl $2,%r14d addl %esi,%edx vpaddd %xmm4,%xmm1,%xmm1 movl %r11d,%r13d addl %edx,%r14d .byte 143,232,120,194,239,2 rorl $14,%r13d movl %r14d,%edx vpxor %xmm6,%xmm7,%xmm7 movl %eax,%r12d xorl %r11d,%r13d rorl $9,%r14d xorl %ebx,%r12d vpxor %xmm5,%xmm7,%xmm7 rorl $5,%r13d xorl %edx,%r14d andl %r11d,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 80-128(%rdi),%xmm10 xorl %r11d,%r13d vpsrldq $8,%xmm7,%xmm7 addl 20(%rsp),%ecx movl %edx,%esi rorl $11,%r14d xorl %ebx,%r12d vpaddd %xmm7,%xmm1,%xmm1 xorl %r8d,%esi rorl $6,%r13d addl %r12d,%ecx andl %esi,%r15d .byte 143,232,120,194,249,13 xorl %edx,%r14d addl %r13d,%ecx vpsrld $10,%xmm1,%xmm6 xorl %r8d,%r15d addl %ecx,%r10d .byte 143,232,120,194,239,2 rorl $2,%r14d addl %r15d,%ecx vpxor %xmm6,%xmm7,%xmm7 movl %r10d,%r13d addl %ecx,%r14d rorl $14,%r13d movl %r14d,%ecx vpxor %xmm5,%xmm7,%xmm7 movl %r11d,%r12d xorl %r10d,%r13d rorl $9,%r14d xorl %eax,%r12d vpslldq $8,%xmm7,%xmm7 rorl $5,%r13d xorl %ecx,%r14d andl %r10d,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 96-128(%rdi),%xmm10 xorl %r10d,%r13d vpaddd %xmm7,%xmm1,%xmm1 addl 24(%rsp),%ebx movl %ecx,%r15d rorl $11,%r14d xorl %eax,%r12d vpaddd 32(%rbp),%xmm1,%xmm6 xorl %edx,%r15d rorl $6,%r13d addl %r12d,%ebx andl %r15d,%esi xorl %ecx,%r14d addl %r13d,%ebx xorl %edx,%esi addl %ebx,%r9d rorl $2,%r14d addl %esi,%ebx movl %r9d,%r13d addl %ebx,%r14d rorl $14,%r13d movl %r14d,%ebx movl %r10d,%r12d xorl %r9d,%r13d rorl $9,%r14d xorl %r11d,%r12d rorl $5,%r13d xorl %ebx,%r14d andl %r9d,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 112-128(%rdi),%xmm10 xorl %r9d,%r13d addl 28(%rsp),%eax movl %ebx,%esi rorl $11,%r14d xorl %r11d,%r12d xorl %ecx,%esi rorl $6,%r13d addl %r12d,%eax andl %esi,%r15d xorl %ebx,%r14d addl %r13d,%eax xorl %ecx,%r15d addl %eax,%r8d rorl $2,%r14d addl %r15d,%eax movl %r8d,%r13d addl %eax,%r14d vmovdqa %xmm6,16(%rsp) vpalignr $4,%xmm2,%xmm3,%xmm4 rorl $14,%r13d movl %r14d,%eax vpalignr $4,%xmm0,%xmm1,%xmm7 movl %r9d,%r12d xorl %r8d,%r13d .byte 143,232,120,194,236,14 rorl $9,%r14d xorl %r10d,%r12d vpsrld $3,%xmm4,%xmm4 rorl $5,%r13d xorl %eax,%r14d vpaddd %xmm7,%xmm2,%xmm2 andl %r8d,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 128-128(%rdi),%xmm10 xorl %r8d,%r13d addl 32(%rsp),%r11d movl %eax,%r15d .byte 143,232,120,194,245,11 rorl $11,%r14d xorl %r10d,%r12d vpxor %xmm5,%xmm4,%xmm4 xorl %ebx,%r15d rorl $6,%r13d addl %r12d,%r11d andl %r15d,%esi .byte 143,232,120,194,249,13 xorl %eax,%r14d addl %r13d,%r11d vpxor %xmm6,%xmm4,%xmm4 xorl %ebx,%esi addl %r11d,%edx vpsrld $10,%xmm1,%xmm6 rorl $2,%r14d addl %esi,%r11d vpaddd %xmm4,%xmm2,%xmm2 movl %edx,%r13d addl %r11d,%r14d .byte 143,232,120,194,239,2 rorl $14,%r13d movl %r14d,%r11d vpxor %xmm6,%xmm7,%xmm7 movl %r8d,%r12d xorl %edx,%r13d rorl $9,%r14d xorl %r9d,%r12d vpxor %xmm5,%xmm7,%xmm7 rorl $5,%r13d xorl %r11d,%r14d andl %edx,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 144-128(%rdi),%xmm10 xorl %edx,%r13d vpsrldq $8,%xmm7,%xmm7 addl 36(%rsp),%r10d movl %r11d,%esi rorl $11,%r14d xorl %r9d,%r12d vpaddd %xmm7,%xmm2,%xmm2 xorl %eax,%esi rorl $6,%r13d addl %r12d,%r10d andl %esi,%r15d .byte 143,232,120,194,250,13 xorl %r11d,%r14d addl %r13d,%r10d vpsrld $10,%xmm2,%xmm6 xorl %eax,%r15d addl %r10d,%ecx .byte 143,232,120,194,239,2 rorl $2,%r14d addl %r15d,%r10d vpxor %xmm6,%xmm7,%xmm7 movl %ecx,%r13d addl %r10d,%r14d rorl $14,%r13d movl %r14d,%r10d vpxor %xmm5,%xmm7,%xmm7 movl %edx,%r12d xorl %ecx,%r13d rorl $9,%r14d xorl %r8d,%r12d vpslldq $8,%xmm7,%xmm7 rorl $5,%r13d xorl %r10d,%r14d andl %ecx,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 160-128(%rdi),%xmm10 xorl %ecx,%r13d vpaddd %xmm7,%xmm2,%xmm2 addl 40(%rsp),%r9d movl %r10d,%r15d rorl $11,%r14d xorl %r8d,%r12d vpaddd 64(%rbp),%xmm2,%xmm6 xorl %r11d,%r15d rorl $6,%r13d addl %r12d,%r9d andl %r15d,%esi xorl %r10d,%r14d addl %r13d,%r9d xorl %r11d,%esi addl %r9d,%ebx rorl $2,%r14d addl %esi,%r9d movl %ebx,%r13d addl %r9d,%r14d rorl $14,%r13d movl %r14d,%r9d movl %ecx,%r12d xorl %ebx,%r13d rorl $9,%r14d xorl %edx,%r12d rorl $5,%r13d xorl %r9d,%r14d andl %ebx,%r12d vaesenclast %xmm10,%xmm9,%xmm11 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 176-128(%rdi),%xmm10 xorl %ebx,%r13d addl 44(%rsp),%r8d movl %r9d,%esi rorl $11,%r14d xorl %edx,%r12d xorl %r10d,%esi rorl $6,%r13d addl %r12d,%r8d andl %esi,%r15d xorl %r9d,%r14d addl %r13d,%r8d xorl %r10d,%r15d addl %r8d,%eax rorl $2,%r14d addl %r15d,%r8d movl %eax,%r13d addl %r8d,%r14d vmovdqa %xmm6,32(%rsp) vpalignr $4,%xmm3,%xmm0,%xmm4 rorl $14,%r13d movl %r14d,%r8d vpalignr $4,%xmm1,%xmm2,%xmm7 movl %ebx,%r12d xorl %eax,%r13d .byte 143,232,120,194,236,14 rorl $9,%r14d xorl %ecx,%r12d vpsrld $3,%xmm4,%xmm4 rorl $5,%r13d xorl %r8d,%r14d vpaddd %xmm7,%xmm3,%xmm3 andl %eax,%r12d vpand %xmm12,%xmm11,%xmm8 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 192-128(%rdi),%xmm10 xorl %eax,%r13d addl 48(%rsp),%edx movl %r8d,%r15d .byte 143,232,120,194,245,11 rorl $11,%r14d xorl %ecx,%r12d vpxor %xmm5,%xmm4,%xmm4 xorl %r9d,%r15d rorl $6,%r13d addl %r12d,%edx andl %r15d,%esi .byte 143,232,120,194,250,13 xorl %r8d,%r14d addl %r13d,%edx vpxor %xmm6,%xmm4,%xmm4 xorl %r9d,%esi addl %edx,%r11d vpsrld $10,%xmm2,%xmm6 rorl $2,%r14d addl %esi,%edx vpaddd %xmm4,%xmm3,%xmm3 movl %r11d,%r13d addl %edx,%r14d .byte 143,232,120,194,239,2 rorl $14,%r13d movl %r14d,%edx vpxor %xmm6,%xmm7,%xmm7 movl %eax,%r12d xorl %r11d,%r13d rorl $9,%r14d xorl %ebx,%r12d vpxor %xmm5,%xmm7,%xmm7 rorl $5,%r13d xorl %edx,%r14d andl %r11d,%r12d vaesenclast %xmm10,%xmm9,%xmm11 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 208-128(%rdi),%xmm10 xorl %r11d,%r13d vpsrldq $8,%xmm7,%xmm7 addl 52(%rsp),%ecx movl %edx,%esi rorl $11,%r14d xorl %ebx,%r12d vpaddd %xmm7,%xmm3,%xmm3 xorl %r8d,%esi rorl $6,%r13d addl %r12d,%ecx andl %esi,%r15d .byte 143,232,120,194,251,13 xorl %edx,%r14d addl %r13d,%ecx vpsrld $10,%xmm3,%xmm6 xorl %r8d,%r15d addl %ecx,%r10d .byte 143,232,120,194,239,2 rorl $2,%r14d addl %r15d,%ecx vpxor %xmm6,%xmm7,%xmm7 movl %r10d,%r13d addl %ecx,%r14d rorl $14,%r13d movl %r14d,%ecx vpxor %xmm5,%xmm7,%xmm7 movl %r11d,%r12d xorl %r10d,%r13d rorl $9,%r14d xorl %eax,%r12d vpslldq $8,%xmm7,%xmm7 rorl $5,%r13d xorl %ecx,%r14d andl %r10d,%r12d vpand %xmm13,%xmm11,%xmm11 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 224-128(%rdi),%xmm10 xorl %r10d,%r13d vpaddd %xmm7,%xmm3,%xmm3 addl 56(%rsp),%ebx movl %ecx,%r15d rorl $11,%r14d xorl %eax,%r12d vpaddd 96(%rbp),%xmm3,%xmm6 xorl %edx,%r15d rorl $6,%r13d addl %r12d,%ebx andl %r15d,%esi xorl %ecx,%r14d addl %r13d,%ebx xorl %edx,%esi addl %ebx,%r9d rorl $2,%r14d addl %esi,%ebx movl %r9d,%r13d addl %ebx,%r14d rorl $14,%r13d movl %r14d,%ebx movl %r10d,%r12d xorl %r9d,%r13d rorl $9,%r14d xorl %r11d,%r12d rorl $5,%r13d xorl %ebx,%r14d andl %r9d,%r12d vpor %xmm11,%xmm8,%xmm8 vaesenclast %xmm10,%xmm9,%xmm11 vmovdqu 0-128(%rdi),%xmm10 xorl %r9d,%r13d addl 60(%rsp),%eax movl %ebx,%esi rorl $11,%r14d xorl %r11d,%r12d xorl %ecx,%esi rorl $6,%r13d addl %r12d,%eax andl %esi,%r15d xorl %ebx,%r14d addl %r13d,%eax xorl %ecx,%r15d addl %eax,%r8d rorl $2,%r14d addl %r15d,%eax movl %r8d,%r13d addl %eax,%r14d vmovdqa %xmm6,48(%rsp) movq 64+0(%rsp),%r12 vpand %xmm14,%xmm11,%xmm11 movq 64+8(%rsp),%r15 vpor %xmm11,%xmm8,%xmm8 vmovdqu %xmm8,(%r15,%r12,1) leaq 16(%r12),%r12 cmpb $0,131(%rbp) jne L$xop_00_47 vmovdqu (%r12),%xmm9 movq %r12,64+0(%rsp) rorl $14,%r13d movl %r14d,%eax movl %r9d,%r12d xorl %r8d,%r13d rorl $9,%r14d xorl %r10d,%r12d rorl $5,%r13d xorl %eax,%r14d andl %r8d,%r12d vpxor %xmm10,%xmm9,%xmm9 vmovdqu 16-128(%rdi),%xmm10 xorl %r8d,%r13d addl 0(%rsp),%r11d movl %eax,%r15d rorl $11,%r14d xorl %r10d,%r12d xorl %ebx,%r15d rorl $6,%r13d addl %r12d,%r11d andl %r15d,%esi xorl %eax,%r14d addl %r13d,%r11d xorl %ebx,%esi addl %r11d,%edx rorl $2,%r14d addl %esi,%r11d movl %edx,%r13d addl %r11d,%r14d rorl $14,%r13d movl %r14d,%r11d movl %r8d,%r12d xorl %edx,%r13d rorl $9,%r14d xorl %r9d,%r12d rorl $5,%r13d xorl %r11d,%r14d andl %edx,%r12d vpxor %xmm8,%xmm9,%xmm9 xorl %edx,%r13d addl 4(%rsp),%r10d movl %r11d,%esi rorl $11,%r14d xorl %r9d,%r12d xorl %eax,%esi rorl $6,%r13d addl %r12d,%r10d andl %esi,%r15d xorl %r11d,%r14d addl %r13d,%r10d xorl %eax,%r15d addl %r10d,%ecx rorl $2,%r14d addl %r15d,%r10d movl %ecx,%r13d addl %r10d,%r14d rorl $14,%r13d movl %r14d,%r10d movl %edx,%r12d xorl %ecx,%r13d rorl $9,%r14d xorl %r8d,%r12d rorl $5,%r13d xorl %r10d,%r14d andl %ecx,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 32-128(%rdi),%xmm10 xorl %ecx,%r13d addl 8(%rsp),%r9d movl %r10d,%r15d rorl $11,%r14d xorl %r8d,%r12d xorl %r11d,%r15d rorl $6,%r13d addl %r12d,%r9d andl %r15d,%esi xorl %r10d,%r14d addl %r13d,%r9d xorl %r11d,%esi addl %r9d,%ebx rorl $2,%r14d addl %esi,%r9d movl %ebx,%r13d addl %r9d,%r14d rorl $14,%r13d movl %r14d,%r9d movl %ecx,%r12d xorl %ebx,%r13d rorl $9,%r14d xorl %edx,%r12d rorl $5,%r13d xorl %r9d,%r14d andl %ebx,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 48-128(%rdi),%xmm10 xorl %ebx,%r13d addl 12(%rsp),%r8d movl %r9d,%esi rorl $11,%r14d xorl %edx,%r12d xorl %r10d,%esi rorl $6,%r13d addl %r12d,%r8d andl %esi,%r15d xorl %r9d,%r14d addl %r13d,%r8d xorl %r10d,%r15d addl %r8d,%eax rorl $2,%r14d addl %r15d,%r8d movl %eax,%r13d addl %r8d,%r14d rorl $14,%r13d movl %r14d,%r8d movl %ebx,%r12d xorl %eax,%r13d rorl $9,%r14d xorl %ecx,%r12d rorl $5,%r13d xorl %r8d,%r14d andl %eax,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 64-128(%rdi),%xmm10 xorl %eax,%r13d addl 16(%rsp),%edx movl %r8d,%r15d rorl $11,%r14d xorl %ecx,%r12d xorl %r9d,%r15d rorl $6,%r13d addl %r12d,%edx andl %r15d,%esi xorl %r8d,%r14d addl %r13d,%edx xorl %r9d,%esi addl %edx,%r11d rorl $2,%r14d addl %esi,%edx movl %r11d,%r13d addl %edx,%r14d rorl $14,%r13d movl %r14d,%edx movl %eax,%r12d xorl %r11d,%r13d rorl $9,%r14d xorl %ebx,%r12d rorl $5,%r13d xorl %edx,%r14d andl %r11d,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 80-128(%rdi),%xmm10 xorl %r11d,%r13d addl 20(%rsp),%ecx movl %edx,%esi rorl $11,%r14d xorl %ebx,%r12d xorl %r8d,%esi rorl $6,%r13d addl %r12d,%ecx andl %esi,%r15d xorl %edx,%r14d addl %r13d,%ecx xorl %r8d,%r15d addl %ecx,%r10d rorl $2,%r14d addl %r15d,%ecx movl %r10d,%r13d addl %ecx,%r14d rorl $14,%r13d movl %r14d,%ecx movl %r11d,%r12d xorl %r10d,%r13d rorl $9,%r14d xorl %eax,%r12d rorl $5,%r13d xorl %ecx,%r14d andl %r10d,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 96-128(%rdi),%xmm10 xorl %r10d,%r13d addl 24(%rsp),%ebx movl %ecx,%r15d rorl $11,%r14d xorl %eax,%r12d xorl %edx,%r15d rorl $6,%r13d addl %r12d,%ebx andl %r15d,%esi xorl %ecx,%r14d addl %r13d,%ebx xorl %edx,%esi addl %ebx,%r9d rorl $2,%r14d addl %esi,%ebx movl %r9d,%r13d addl %ebx,%r14d rorl $14,%r13d movl %r14d,%ebx movl %r10d,%r12d xorl %r9d,%r13d rorl $9,%r14d xorl %r11d,%r12d rorl $5,%r13d xorl %ebx,%r14d andl %r9d,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 112-128(%rdi),%xmm10 xorl %r9d,%r13d addl 28(%rsp),%eax movl %ebx,%esi rorl $11,%r14d xorl %r11d,%r12d xorl %ecx,%esi rorl $6,%r13d addl %r12d,%eax andl %esi,%r15d xorl %ebx,%r14d addl %r13d,%eax xorl %ecx,%r15d addl %eax,%r8d rorl $2,%r14d addl %r15d,%eax movl %r8d,%r13d addl %eax,%r14d rorl $14,%r13d movl %r14d,%eax movl %r9d,%r12d xorl %r8d,%r13d rorl $9,%r14d xorl %r10d,%r12d rorl $5,%r13d xorl %eax,%r14d andl %r8d,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 128-128(%rdi),%xmm10 xorl %r8d,%r13d addl 32(%rsp),%r11d movl %eax,%r15d rorl $11,%r14d xorl %r10d,%r12d xorl %ebx,%r15d rorl $6,%r13d addl %r12d,%r11d andl %r15d,%esi xorl %eax,%r14d addl %r13d,%r11d xorl %ebx,%esi addl %r11d,%edx rorl $2,%r14d addl %esi,%r11d movl %edx,%r13d addl %r11d,%r14d rorl $14,%r13d movl %r14d,%r11d movl %r8d,%r12d xorl %edx,%r13d rorl $9,%r14d xorl %r9d,%r12d rorl $5,%r13d xorl %r11d,%r14d andl %edx,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 144-128(%rdi),%xmm10 xorl %edx,%r13d addl 36(%rsp),%r10d movl %r11d,%esi rorl $11,%r14d xorl %r9d,%r12d xorl %eax,%esi rorl $6,%r13d addl %r12d,%r10d andl %esi,%r15d xorl %r11d,%r14d addl %r13d,%r10d xorl %eax,%r15d addl %r10d,%ecx rorl $2,%r14d addl %r15d,%r10d movl %ecx,%r13d addl %r10d,%r14d rorl $14,%r13d movl %r14d,%r10d movl %edx,%r12d xorl %ecx,%r13d rorl $9,%r14d xorl %r8d,%r12d rorl $5,%r13d xorl %r10d,%r14d andl %ecx,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 160-128(%rdi),%xmm10 xorl %ecx,%r13d addl 40(%rsp),%r9d movl %r10d,%r15d rorl $11,%r14d xorl %r8d,%r12d xorl %r11d,%r15d rorl $6,%r13d addl %r12d,%r9d andl %r15d,%esi xorl %r10d,%r14d addl %r13d,%r9d xorl %r11d,%esi addl %r9d,%ebx rorl $2,%r14d addl %esi,%r9d movl %ebx,%r13d addl %r9d,%r14d rorl $14,%r13d movl %r14d,%r9d movl %ecx,%r12d xorl %ebx,%r13d rorl $9,%r14d xorl %edx,%r12d rorl $5,%r13d xorl %r9d,%r14d andl %ebx,%r12d vaesenclast %xmm10,%xmm9,%xmm11 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 176-128(%rdi),%xmm10 xorl %ebx,%r13d addl 44(%rsp),%r8d movl %r9d,%esi rorl $11,%r14d xorl %edx,%r12d xorl %r10d,%esi rorl $6,%r13d addl %r12d,%r8d andl %esi,%r15d xorl %r9d,%r14d addl %r13d,%r8d xorl %r10d,%r15d addl %r8d,%eax rorl $2,%r14d addl %r15d,%r8d movl %eax,%r13d addl %r8d,%r14d rorl $14,%r13d movl %r14d,%r8d movl %ebx,%r12d xorl %eax,%r13d rorl $9,%r14d xorl %ecx,%r12d rorl $5,%r13d xorl %r8d,%r14d andl %eax,%r12d vpand %xmm12,%xmm11,%xmm8 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 192-128(%rdi),%xmm10 xorl %eax,%r13d addl 48(%rsp),%edx movl %r8d,%r15d rorl $11,%r14d xorl %ecx,%r12d xorl %r9d,%r15d rorl $6,%r13d addl %r12d,%edx andl %r15d,%esi xorl %r8d,%r14d addl %r13d,%edx xorl %r9d,%esi addl %edx,%r11d rorl $2,%r14d addl %esi,%edx movl %r11d,%r13d addl %edx,%r14d rorl $14,%r13d movl %r14d,%edx movl %eax,%r12d xorl %r11d,%r13d rorl $9,%r14d xorl %ebx,%r12d rorl $5,%r13d xorl %edx,%r14d andl %r11d,%r12d vaesenclast %xmm10,%xmm9,%xmm11 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 208-128(%rdi),%xmm10 xorl %r11d,%r13d addl 52(%rsp),%ecx movl %edx,%esi rorl $11,%r14d xorl %ebx,%r12d xorl %r8d,%esi rorl $6,%r13d addl %r12d,%ecx andl %esi,%r15d xorl %edx,%r14d addl %r13d,%ecx xorl %r8d,%r15d addl %ecx,%r10d rorl $2,%r14d addl %r15d,%ecx movl %r10d,%r13d addl %ecx,%r14d rorl $14,%r13d movl %r14d,%ecx movl %r11d,%r12d xorl %r10d,%r13d rorl $9,%r14d xorl %eax,%r12d rorl $5,%r13d xorl %ecx,%r14d andl %r10d,%r12d vpand %xmm13,%xmm11,%xmm11 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 224-128(%rdi),%xmm10 xorl %r10d,%r13d addl 56(%rsp),%ebx movl %ecx,%r15d rorl $11,%r14d xorl %eax,%r12d xorl %edx,%r15d rorl $6,%r13d addl %r12d,%ebx andl %r15d,%esi xorl %ecx,%r14d addl %r13d,%ebx xorl %edx,%esi addl %ebx,%r9d rorl $2,%r14d addl %esi,%ebx movl %r9d,%r13d addl %ebx,%r14d rorl $14,%r13d movl %r14d,%ebx movl %r10d,%r12d xorl %r9d,%r13d rorl $9,%r14d xorl %r11d,%r12d rorl $5,%r13d xorl %ebx,%r14d andl %r9d,%r12d vpor %xmm11,%xmm8,%xmm8 vaesenclast %xmm10,%xmm9,%xmm11 vmovdqu 0-128(%rdi),%xmm10 xorl %r9d,%r13d addl 60(%rsp),%eax movl %ebx,%esi rorl $11,%r14d xorl %r11d,%r12d xorl %ecx,%esi rorl $6,%r13d addl %r12d,%eax andl %esi,%r15d xorl %ebx,%r14d addl %r13d,%eax xorl %ecx,%r15d addl %eax,%r8d rorl $2,%r14d addl %r15d,%eax movl %r8d,%r13d addl %eax,%r14d movq 64+0(%rsp),%r12 movq 64+8(%rsp),%r13 movq 64+40(%rsp),%r15 movq 64+48(%rsp),%rsi vpand %xmm14,%xmm11,%xmm11 movl %r14d,%eax vpor %xmm11,%xmm8,%xmm8 vmovdqu %xmm8,(%r12,%r13,1) leaq 16(%r12),%r12 addl 0(%r15),%eax addl 4(%r15),%ebx addl 8(%r15),%ecx addl 12(%r15),%edx addl 16(%r15),%r8d addl 20(%r15),%r9d addl 24(%r15),%r10d addl 28(%r15),%r11d cmpq 64+16(%rsp),%r12 movl %eax,0(%r15) movl %ebx,4(%r15) movl %ecx,8(%r15) movl %edx,12(%r15) movl %r8d,16(%r15) movl %r9d,20(%r15) movl %r10d,24(%r15) movl %r11d,28(%r15) jb L$loop_xop movq 64+32(%rsp),%r8 movq 120(%rsp),%rsi vmovdqu %xmm8,(%r8) vzeroall movq -48(%rsi),%r15 movq -40(%rsi),%r14 movq -32(%rsi),%r13 movq -24(%rsi),%r12 movq -16(%rsi),%rbp movq -8(%rsi),%rbx leaq (%rsi),%rsp L$epilogue_xop: .byte 0xf3,0xc3 .p2align 6 aesni_cbc_sha256_enc_avx: L$avx_shortcut: movq 8(%rsp),%r10 movq %rsp,%rax pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 subq $128,%rsp andq $-64,%rsp shlq $6,%rdx subq %rdi,%rsi subq %rdi,%r10 addq %rdi,%rdx movq %rsi,64+8(%rsp) movq %rdx,64+16(%rsp) movq %r8,64+32(%rsp) movq %r9,64+40(%rsp) movq %r10,64+48(%rsp) movq %rax,120(%rsp) L$prologue_avx: vzeroall movq %rdi,%r12 leaq 128(%rcx),%rdi leaq K256+544(%rip),%r13 movl 240-128(%rdi),%r14d movq %r9,%r15 movq %r10,%rsi vmovdqu (%r8),%xmm8 subq $9,%r14 movl 0(%r15),%eax movl 4(%r15),%ebx movl 8(%r15),%ecx movl 12(%r15),%edx movl 16(%r15),%r8d movl 20(%r15),%r9d movl 24(%r15),%r10d movl 28(%r15),%r11d vmovdqa 0(%r13,%r14,8),%xmm14 vmovdqa 16(%r13,%r14,8),%xmm13 vmovdqa 32(%r13,%r14,8),%xmm12 vmovdqu 0-128(%rdi),%xmm10 jmp L$loop_avx .p2align 4 L$loop_avx: vmovdqa K256+512(%rip),%xmm7 vmovdqu 0(%rsi,%r12,1),%xmm0 vmovdqu 16(%rsi,%r12,1),%xmm1 vmovdqu 32(%rsi,%r12,1),%xmm2 vmovdqu 48(%rsi,%r12,1),%xmm3 vpshufb %xmm7,%xmm0,%xmm0 leaq K256(%rip),%rbp vpshufb %xmm7,%xmm1,%xmm1 vpshufb %xmm7,%xmm2,%xmm2 vpaddd 0(%rbp),%xmm0,%xmm4 vpshufb %xmm7,%xmm3,%xmm3 vpaddd 32(%rbp),%xmm1,%xmm5 vpaddd 64(%rbp),%xmm2,%xmm6 vpaddd 96(%rbp),%xmm3,%xmm7 vmovdqa %xmm4,0(%rsp) movl %eax,%r14d vmovdqa %xmm5,16(%rsp) movl %ebx,%esi vmovdqa %xmm6,32(%rsp) xorl %ecx,%esi vmovdqa %xmm7,48(%rsp) movl %r8d,%r13d jmp L$avx_00_47 .p2align 4 L$avx_00_47: subq $-32*4,%rbp vmovdqu (%r12),%xmm9 movq %r12,64+0(%rsp) vpalignr $4,%xmm0,%xmm1,%xmm4 shrdl $14,%r13d,%r13d movl %r14d,%eax movl %r9d,%r12d vpalignr $4,%xmm2,%xmm3,%xmm7 xorl %r8d,%r13d shrdl $9,%r14d,%r14d xorl %r10d,%r12d vpsrld $7,%xmm4,%xmm6 shrdl $5,%r13d,%r13d xorl %eax,%r14d andl %r8d,%r12d vpaddd %xmm7,%xmm0,%xmm0 vpxor %xmm10,%xmm9,%xmm9 vmovdqu 16-128(%rdi),%xmm10 xorl %r8d,%r13d addl 0(%rsp),%r11d movl %eax,%r15d vpsrld $3,%xmm4,%xmm7 shrdl $11,%r14d,%r14d xorl %r10d,%r12d xorl %ebx,%r15d vpslld $14,%xmm4,%xmm5 shrdl $6,%r13d,%r13d addl %r12d,%r11d andl %r15d,%esi vpxor %xmm6,%xmm7,%xmm4 xorl %eax,%r14d addl %r13d,%r11d xorl %ebx,%esi vpshufd $250,%xmm3,%xmm7 addl %r11d,%edx shrdl $2,%r14d,%r14d addl %esi,%r11d vpsrld $11,%xmm6,%xmm6 movl %edx,%r13d addl %r11d,%r14d shrdl $14,%r13d,%r13d vpxor %xmm5,%xmm4,%xmm4 movl %r14d,%r11d movl %r8d,%r12d xorl %edx,%r13d vpslld $11,%xmm5,%xmm5 shrdl $9,%r14d,%r14d xorl %r9d,%r12d shrdl $5,%r13d,%r13d vpxor %xmm6,%xmm4,%xmm4 xorl %r11d,%r14d andl %edx,%r12d vpxor %xmm8,%xmm9,%xmm9 xorl %edx,%r13d vpsrld $10,%xmm7,%xmm6 addl 4(%rsp),%r10d movl %r11d,%esi shrdl $11,%r14d,%r14d vpxor %xmm5,%xmm4,%xmm4 xorl %r9d,%r12d xorl %eax,%esi shrdl $6,%r13d,%r13d vpsrlq $17,%xmm7,%xmm7 addl %r12d,%r10d andl %esi,%r15d xorl %r11d,%r14d vpaddd %xmm4,%xmm0,%xmm0 addl %r13d,%r10d xorl %eax,%r15d addl %r10d,%ecx vpxor %xmm7,%xmm6,%xmm6 shrdl $2,%r14d,%r14d addl %r15d,%r10d movl %ecx,%r13d vpsrlq $2,%xmm7,%xmm7 addl %r10d,%r14d shrdl $14,%r13d,%r13d movl %r14d,%r10d vpxor %xmm7,%xmm6,%xmm6 movl %edx,%r12d xorl %ecx,%r13d shrdl $9,%r14d,%r14d vpshufd $132,%xmm6,%xmm6 xorl %r8d,%r12d shrdl $5,%r13d,%r13d xorl %r10d,%r14d vpsrldq $8,%xmm6,%xmm6 andl %ecx,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 32-128(%rdi),%xmm10 xorl %ecx,%r13d addl 8(%rsp),%r9d vpaddd %xmm6,%xmm0,%xmm0 movl %r10d,%r15d shrdl $11,%r14d,%r14d xorl %r8d,%r12d vpshufd $80,%xmm0,%xmm7 xorl %r11d,%r15d shrdl $6,%r13d,%r13d addl %r12d,%r9d vpsrld $10,%xmm7,%xmm6 andl %r15d,%esi xorl %r10d,%r14d addl %r13d,%r9d vpsrlq $17,%xmm7,%xmm7 xorl %r11d,%esi addl %r9d,%ebx shrdl $2,%r14d,%r14d vpxor %xmm7,%xmm6,%xmm6 addl %esi,%r9d movl %ebx,%r13d addl %r9d,%r14d vpsrlq $2,%xmm7,%xmm7 shrdl $14,%r13d,%r13d movl %r14d,%r9d movl %ecx,%r12d vpxor %xmm7,%xmm6,%xmm6 xorl %ebx,%r13d shrdl $9,%r14d,%r14d xorl %edx,%r12d vpshufd $232,%xmm6,%xmm6 shrdl $5,%r13d,%r13d xorl %r9d,%r14d andl %ebx,%r12d vpslldq $8,%xmm6,%xmm6 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 48-128(%rdi),%xmm10 xorl %ebx,%r13d addl 12(%rsp),%r8d movl %r9d,%esi vpaddd %xmm6,%xmm0,%xmm0 shrdl $11,%r14d,%r14d xorl %edx,%r12d xorl %r10d,%esi vpaddd 0(%rbp),%xmm0,%xmm6 shrdl $6,%r13d,%r13d addl %r12d,%r8d andl %esi,%r15d xorl %r9d,%r14d addl %r13d,%r8d xorl %r10d,%r15d addl %r8d,%eax shrdl $2,%r14d,%r14d addl %r15d,%r8d movl %eax,%r13d addl %r8d,%r14d vmovdqa %xmm6,0(%rsp) vpalignr $4,%xmm1,%xmm2,%xmm4 shrdl $14,%r13d,%r13d movl %r14d,%r8d movl %ebx,%r12d vpalignr $4,%xmm3,%xmm0,%xmm7 xorl %eax,%r13d shrdl $9,%r14d,%r14d xorl %ecx,%r12d vpsrld $7,%xmm4,%xmm6 shrdl $5,%r13d,%r13d xorl %r8d,%r14d andl %eax,%r12d vpaddd %xmm7,%xmm1,%xmm1 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 64-128(%rdi),%xmm10 xorl %eax,%r13d addl 16(%rsp),%edx movl %r8d,%r15d vpsrld $3,%xmm4,%xmm7 shrdl $11,%r14d,%r14d xorl %ecx,%r12d xorl %r9d,%r15d vpslld $14,%xmm4,%xmm5 shrdl $6,%r13d,%r13d addl %r12d,%edx andl %r15d,%esi vpxor %xmm6,%xmm7,%xmm4 xorl %r8d,%r14d addl %r13d,%edx xorl %r9d,%esi vpshufd $250,%xmm0,%xmm7 addl %edx,%r11d shrdl $2,%r14d,%r14d addl %esi,%edx vpsrld $11,%xmm6,%xmm6 movl %r11d,%r13d addl %edx,%r14d shrdl $14,%r13d,%r13d vpxor %xmm5,%xmm4,%xmm4 movl %r14d,%edx movl %eax,%r12d xorl %r11d,%r13d vpslld $11,%xmm5,%xmm5 shrdl $9,%r14d,%r14d xorl %ebx,%r12d shrdl $5,%r13d,%r13d vpxor %xmm6,%xmm4,%xmm4 xorl %edx,%r14d andl %r11d,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 80-128(%rdi),%xmm10 xorl %r11d,%r13d vpsrld $10,%xmm7,%xmm6 addl 20(%rsp),%ecx movl %edx,%esi shrdl $11,%r14d,%r14d vpxor %xmm5,%xmm4,%xmm4 xorl %ebx,%r12d xorl %r8d,%esi shrdl $6,%r13d,%r13d vpsrlq $17,%xmm7,%xmm7 addl %r12d,%ecx andl %esi,%r15d xorl %edx,%r14d vpaddd %xmm4,%xmm1,%xmm1 addl %r13d,%ecx xorl %r8d,%r15d addl %ecx,%r10d vpxor %xmm7,%xmm6,%xmm6 shrdl $2,%r14d,%r14d addl %r15d,%ecx movl %r10d,%r13d vpsrlq $2,%xmm7,%xmm7 addl %ecx,%r14d shrdl $14,%r13d,%r13d movl %r14d,%ecx vpxor %xmm7,%xmm6,%xmm6 movl %r11d,%r12d xorl %r10d,%r13d shrdl $9,%r14d,%r14d vpshufd $132,%xmm6,%xmm6 xorl %eax,%r12d shrdl $5,%r13d,%r13d xorl %ecx,%r14d vpsrldq $8,%xmm6,%xmm6 andl %r10d,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 96-128(%rdi),%xmm10 xorl %r10d,%r13d addl 24(%rsp),%ebx vpaddd %xmm6,%xmm1,%xmm1 movl %ecx,%r15d shrdl $11,%r14d,%r14d xorl %eax,%r12d vpshufd $80,%xmm1,%xmm7 xorl %edx,%r15d shrdl $6,%r13d,%r13d addl %r12d,%ebx vpsrld $10,%xmm7,%xmm6 andl %r15d,%esi xorl %ecx,%r14d addl %r13d,%ebx vpsrlq $17,%xmm7,%xmm7 xorl %edx,%esi addl %ebx,%r9d shrdl $2,%r14d,%r14d vpxor %xmm7,%xmm6,%xmm6 addl %esi,%ebx movl %r9d,%r13d addl %ebx,%r14d vpsrlq $2,%xmm7,%xmm7 shrdl $14,%r13d,%r13d movl %r14d,%ebx movl %r10d,%r12d vpxor %xmm7,%xmm6,%xmm6 xorl %r9d,%r13d shrdl $9,%r14d,%r14d xorl %r11d,%r12d vpshufd $232,%xmm6,%xmm6 shrdl $5,%r13d,%r13d xorl %ebx,%r14d andl %r9d,%r12d vpslldq $8,%xmm6,%xmm6 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 112-128(%rdi),%xmm10 xorl %r9d,%r13d addl 28(%rsp),%eax movl %ebx,%esi vpaddd %xmm6,%xmm1,%xmm1 shrdl $11,%r14d,%r14d xorl %r11d,%r12d xorl %ecx,%esi vpaddd 32(%rbp),%xmm1,%xmm6 shrdl $6,%r13d,%r13d addl %r12d,%eax andl %esi,%r15d xorl %ebx,%r14d addl %r13d,%eax xorl %ecx,%r15d addl %eax,%r8d shrdl $2,%r14d,%r14d addl %r15d,%eax movl %r8d,%r13d addl %eax,%r14d vmovdqa %xmm6,16(%rsp) vpalignr $4,%xmm2,%xmm3,%xmm4 shrdl $14,%r13d,%r13d movl %r14d,%eax movl %r9d,%r12d vpalignr $4,%xmm0,%xmm1,%xmm7 xorl %r8d,%r13d shrdl $9,%r14d,%r14d xorl %r10d,%r12d vpsrld $7,%xmm4,%xmm6 shrdl $5,%r13d,%r13d xorl %eax,%r14d andl %r8d,%r12d vpaddd %xmm7,%xmm2,%xmm2 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 128-128(%rdi),%xmm10 xorl %r8d,%r13d addl 32(%rsp),%r11d movl %eax,%r15d vpsrld $3,%xmm4,%xmm7 shrdl $11,%r14d,%r14d xorl %r10d,%r12d xorl %ebx,%r15d vpslld $14,%xmm4,%xmm5 shrdl $6,%r13d,%r13d addl %r12d,%r11d andl %r15d,%esi vpxor %xmm6,%xmm7,%xmm4 xorl %eax,%r14d addl %r13d,%r11d xorl %ebx,%esi vpshufd $250,%xmm1,%xmm7 addl %r11d,%edx shrdl $2,%r14d,%r14d addl %esi,%r11d vpsrld $11,%xmm6,%xmm6 movl %edx,%r13d addl %r11d,%r14d shrdl $14,%r13d,%r13d vpxor %xmm5,%xmm4,%xmm4 movl %r14d,%r11d movl %r8d,%r12d xorl %edx,%r13d vpslld $11,%xmm5,%xmm5 shrdl $9,%r14d,%r14d xorl %r9d,%r12d shrdl $5,%r13d,%r13d vpxor %xmm6,%xmm4,%xmm4 xorl %r11d,%r14d andl %edx,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 144-128(%rdi),%xmm10 xorl %edx,%r13d vpsrld $10,%xmm7,%xmm6 addl 36(%rsp),%r10d movl %r11d,%esi shrdl $11,%r14d,%r14d vpxor %xmm5,%xmm4,%xmm4 xorl %r9d,%r12d xorl %eax,%esi shrdl $6,%r13d,%r13d vpsrlq $17,%xmm7,%xmm7 addl %r12d,%r10d andl %esi,%r15d xorl %r11d,%r14d vpaddd %xmm4,%xmm2,%xmm2 addl %r13d,%r10d xorl %eax,%r15d addl %r10d,%ecx vpxor %xmm7,%xmm6,%xmm6 shrdl $2,%r14d,%r14d addl %r15d,%r10d movl %ecx,%r13d vpsrlq $2,%xmm7,%xmm7 addl %r10d,%r14d shrdl $14,%r13d,%r13d movl %r14d,%r10d vpxor %xmm7,%xmm6,%xmm6 movl %edx,%r12d xorl %ecx,%r13d shrdl $9,%r14d,%r14d vpshufd $132,%xmm6,%xmm6 xorl %r8d,%r12d shrdl $5,%r13d,%r13d xorl %r10d,%r14d vpsrldq $8,%xmm6,%xmm6 andl %ecx,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 160-128(%rdi),%xmm10 xorl %ecx,%r13d addl 40(%rsp),%r9d vpaddd %xmm6,%xmm2,%xmm2 movl %r10d,%r15d shrdl $11,%r14d,%r14d xorl %r8d,%r12d vpshufd $80,%xmm2,%xmm7 xorl %r11d,%r15d shrdl $6,%r13d,%r13d addl %r12d,%r9d vpsrld $10,%xmm7,%xmm6 andl %r15d,%esi xorl %r10d,%r14d addl %r13d,%r9d vpsrlq $17,%xmm7,%xmm7 xorl %r11d,%esi addl %r9d,%ebx shrdl $2,%r14d,%r14d vpxor %xmm7,%xmm6,%xmm6 addl %esi,%r9d movl %ebx,%r13d addl %r9d,%r14d vpsrlq $2,%xmm7,%xmm7 shrdl $14,%r13d,%r13d movl %r14d,%r9d movl %ecx,%r12d vpxor %xmm7,%xmm6,%xmm6 xorl %ebx,%r13d shrdl $9,%r14d,%r14d xorl %edx,%r12d vpshufd $232,%xmm6,%xmm6 shrdl $5,%r13d,%r13d xorl %r9d,%r14d andl %ebx,%r12d vpslldq $8,%xmm6,%xmm6 vaesenclast %xmm10,%xmm9,%xmm11 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 176-128(%rdi),%xmm10 xorl %ebx,%r13d addl 44(%rsp),%r8d movl %r9d,%esi vpaddd %xmm6,%xmm2,%xmm2 shrdl $11,%r14d,%r14d xorl %edx,%r12d xorl %r10d,%esi vpaddd 64(%rbp),%xmm2,%xmm6 shrdl $6,%r13d,%r13d addl %r12d,%r8d andl %esi,%r15d xorl %r9d,%r14d addl %r13d,%r8d xorl %r10d,%r15d addl %r8d,%eax shrdl $2,%r14d,%r14d addl %r15d,%r8d movl %eax,%r13d addl %r8d,%r14d vmovdqa %xmm6,32(%rsp) vpalignr $4,%xmm3,%xmm0,%xmm4 shrdl $14,%r13d,%r13d movl %r14d,%r8d movl %ebx,%r12d vpalignr $4,%xmm1,%xmm2,%xmm7 xorl %eax,%r13d shrdl $9,%r14d,%r14d xorl %ecx,%r12d vpsrld $7,%xmm4,%xmm6 shrdl $5,%r13d,%r13d xorl %r8d,%r14d andl %eax,%r12d vpaddd %xmm7,%xmm3,%xmm3 vpand %xmm12,%xmm11,%xmm8 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 192-128(%rdi),%xmm10 xorl %eax,%r13d addl 48(%rsp),%edx movl %r8d,%r15d vpsrld $3,%xmm4,%xmm7 shrdl $11,%r14d,%r14d xorl %ecx,%r12d xorl %r9d,%r15d vpslld $14,%xmm4,%xmm5 shrdl $6,%r13d,%r13d addl %r12d,%edx andl %r15d,%esi vpxor %xmm6,%xmm7,%xmm4 xorl %r8d,%r14d addl %r13d,%edx xorl %r9d,%esi vpshufd $250,%xmm2,%xmm7 addl %edx,%r11d shrdl $2,%r14d,%r14d addl %esi,%edx vpsrld $11,%xmm6,%xmm6 movl %r11d,%r13d addl %edx,%r14d shrdl $14,%r13d,%r13d vpxor %xmm5,%xmm4,%xmm4 movl %r14d,%edx movl %eax,%r12d xorl %r11d,%r13d vpslld $11,%xmm5,%xmm5 shrdl $9,%r14d,%r14d xorl %ebx,%r12d shrdl $5,%r13d,%r13d vpxor %xmm6,%xmm4,%xmm4 xorl %edx,%r14d andl %r11d,%r12d vaesenclast %xmm10,%xmm9,%xmm11 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 208-128(%rdi),%xmm10 xorl %r11d,%r13d vpsrld $10,%xmm7,%xmm6 addl 52(%rsp),%ecx movl %edx,%esi shrdl $11,%r14d,%r14d vpxor %xmm5,%xmm4,%xmm4 xorl %ebx,%r12d xorl %r8d,%esi shrdl $6,%r13d,%r13d vpsrlq $17,%xmm7,%xmm7 addl %r12d,%ecx andl %esi,%r15d xorl %edx,%r14d vpaddd %xmm4,%xmm3,%xmm3 addl %r13d,%ecx xorl %r8d,%r15d addl %ecx,%r10d vpxor %xmm7,%xmm6,%xmm6 shrdl $2,%r14d,%r14d addl %r15d,%ecx movl %r10d,%r13d vpsrlq $2,%xmm7,%xmm7 addl %ecx,%r14d shrdl $14,%r13d,%r13d movl %r14d,%ecx vpxor %xmm7,%xmm6,%xmm6 movl %r11d,%r12d xorl %r10d,%r13d shrdl $9,%r14d,%r14d vpshufd $132,%xmm6,%xmm6 xorl %eax,%r12d shrdl $5,%r13d,%r13d xorl %ecx,%r14d vpsrldq $8,%xmm6,%xmm6 andl %r10d,%r12d vpand %xmm13,%xmm11,%xmm11 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 224-128(%rdi),%xmm10 xorl %r10d,%r13d addl 56(%rsp),%ebx vpaddd %xmm6,%xmm3,%xmm3 movl %ecx,%r15d shrdl $11,%r14d,%r14d xorl %eax,%r12d vpshufd $80,%xmm3,%xmm7 xorl %edx,%r15d shrdl $6,%r13d,%r13d addl %r12d,%ebx vpsrld $10,%xmm7,%xmm6 andl %r15d,%esi xorl %ecx,%r14d addl %r13d,%ebx vpsrlq $17,%xmm7,%xmm7 xorl %edx,%esi addl %ebx,%r9d shrdl $2,%r14d,%r14d vpxor %xmm7,%xmm6,%xmm6 addl %esi,%ebx movl %r9d,%r13d addl %ebx,%r14d vpsrlq $2,%xmm7,%xmm7 shrdl $14,%r13d,%r13d movl %r14d,%ebx movl %r10d,%r12d vpxor %xmm7,%xmm6,%xmm6 xorl %r9d,%r13d shrdl $9,%r14d,%r14d xorl %r11d,%r12d vpshufd $232,%xmm6,%xmm6 shrdl $5,%r13d,%r13d xorl %ebx,%r14d andl %r9d,%r12d vpslldq $8,%xmm6,%xmm6 vpor %xmm11,%xmm8,%xmm8 vaesenclast %xmm10,%xmm9,%xmm11 vmovdqu 0-128(%rdi),%xmm10 xorl %r9d,%r13d addl 60(%rsp),%eax movl %ebx,%esi vpaddd %xmm6,%xmm3,%xmm3 shrdl $11,%r14d,%r14d xorl %r11d,%r12d xorl %ecx,%esi vpaddd 96(%rbp),%xmm3,%xmm6 shrdl $6,%r13d,%r13d addl %r12d,%eax andl %esi,%r15d xorl %ebx,%r14d addl %r13d,%eax xorl %ecx,%r15d addl %eax,%r8d shrdl $2,%r14d,%r14d addl %r15d,%eax movl %r8d,%r13d addl %eax,%r14d vmovdqa %xmm6,48(%rsp) movq 64+0(%rsp),%r12 vpand %xmm14,%xmm11,%xmm11 movq 64+8(%rsp),%r15 vpor %xmm11,%xmm8,%xmm8 vmovdqu %xmm8,(%r15,%r12,1) leaq 16(%r12),%r12 cmpb $0,131(%rbp) jne L$avx_00_47 vmovdqu (%r12),%xmm9 movq %r12,64+0(%rsp) shrdl $14,%r13d,%r13d movl %r14d,%eax movl %r9d,%r12d xorl %r8d,%r13d shrdl $9,%r14d,%r14d xorl %r10d,%r12d shrdl $5,%r13d,%r13d xorl %eax,%r14d andl %r8d,%r12d vpxor %xmm10,%xmm9,%xmm9 vmovdqu 16-128(%rdi),%xmm10 xorl %r8d,%r13d addl 0(%rsp),%r11d movl %eax,%r15d shrdl $11,%r14d,%r14d xorl %r10d,%r12d xorl %ebx,%r15d shrdl $6,%r13d,%r13d addl %r12d,%r11d andl %r15d,%esi xorl %eax,%r14d addl %r13d,%r11d xorl %ebx,%esi addl %r11d,%edx shrdl $2,%r14d,%r14d addl %esi,%r11d movl %edx,%r13d addl %r11d,%r14d shrdl $14,%r13d,%r13d movl %r14d,%r11d movl %r8d,%r12d xorl %edx,%r13d shrdl $9,%r14d,%r14d xorl %r9d,%r12d shrdl $5,%r13d,%r13d xorl %r11d,%r14d andl %edx,%r12d vpxor %xmm8,%xmm9,%xmm9 xorl %edx,%r13d addl 4(%rsp),%r10d movl %r11d,%esi shrdl $11,%r14d,%r14d xorl %r9d,%r12d xorl %eax,%esi shrdl $6,%r13d,%r13d addl %r12d,%r10d andl %esi,%r15d xorl %r11d,%r14d addl %r13d,%r10d xorl %eax,%r15d addl %r10d,%ecx shrdl $2,%r14d,%r14d addl %r15d,%r10d movl %ecx,%r13d addl %r10d,%r14d shrdl $14,%r13d,%r13d movl %r14d,%r10d movl %edx,%r12d xorl %ecx,%r13d shrdl $9,%r14d,%r14d xorl %r8d,%r12d shrdl $5,%r13d,%r13d xorl %r10d,%r14d andl %ecx,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 32-128(%rdi),%xmm10 xorl %ecx,%r13d addl 8(%rsp),%r9d movl %r10d,%r15d shrdl $11,%r14d,%r14d xorl %r8d,%r12d xorl %r11d,%r15d shrdl $6,%r13d,%r13d addl %r12d,%r9d andl %r15d,%esi xorl %r10d,%r14d addl %r13d,%r9d xorl %r11d,%esi addl %r9d,%ebx shrdl $2,%r14d,%r14d addl %esi,%r9d movl %ebx,%r13d addl %r9d,%r14d shrdl $14,%r13d,%r13d movl %r14d,%r9d movl %ecx,%r12d xorl %ebx,%r13d shrdl $9,%r14d,%r14d xorl %edx,%r12d shrdl $5,%r13d,%r13d xorl %r9d,%r14d andl %ebx,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 48-128(%rdi),%xmm10 xorl %ebx,%r13d addl 12(%rsp),%r8d movl %r9d,%esi shrdl $11,%r14d,%r14d xorl %edx,%r12d xorl %r10d,%esi shrdl $6,%r13d,%r13d addl %r12d,%r8d andl %esi,%r15d xorl %r9d,%r14d addl %r13d,%r8d xorl %r10d,%r15d addl %r8d,%eax shrdl $2,%r14d,%r14d addl %r15d,%r8d movl %eax,%r13d addl %r8d,%r14d shrdl $14,%r13d,%r13d movl %r14d,%r8d movl %ebx,%r12d xorl %eax,%r13d shrdl $9,%r14d,%r14d xorl %ecx,%r12d shrdl $5,%r13d,%r13d xorl %r8d,%r14d andl %eax,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 64-128(%rdi),%xmm10 xorl %eax,%r13d addl 16(%rsp),%edx movl %r8d,%r15d shrdl $11,%r14d,%r14d xorl %ecx,%r12d xorl %r9d,%r15d shrdl $6,%r13d,%r13d addl %r12d,%edx andl %r15d,%esi xorl %r8d,%r14d addl %r13d,%edx xorl %r9d,%esi addl %edx,%r11d shrdl $2,%r14d,%r14d addl %esi,%edx movl %r11d,%r13d addl %edx,%r14d shrdl $14,%r13d,%r13d movl %r14d,%edx movl %eax,%r12d xorl %r11d,%r13d shrdl $9,%r14d,%r14d xorl %ebx,%r12d shrdl $5,%r13d,%r13d xorl %edx,%r14d andl %r11d,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 80-128(%rdi),%xmm10 xorl %r11d,%r13d addl 20(%rsp),%ecx movl %edx,%esi shrdl $11,%r14d,%r14d xorl %ebx,%r12d xorl %r8d,%esi shrdl $6,%r13d,%r13d addl %r12d,%ecx andl %esi,%r15d xorl %edx,%r14d addl %r13d,%ecx xorl %r8d,%r15d addl %ecx,%r10d shrdl $2,%r14d,%r14d addl %r15d,%ecx movl %r10d,%r13d addl %ecx,%r14d shrdl $14,%r13d,%r13d movl %r14d,%ecx movl %r11d,%r12d xorl %r10d,%r13d shrdl $9,%r14d,%r14d xorl %eax,%r12d shrdl $5,%r13d,%r13d xorl %ecx,%r14d andl %r10d,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 96-128(%rdi),%xmm10 xorl %r10d,%r13d addl 24(%rsp),%ebx movl %ecx,%r15d shrdl $11,%r14d,%r14d xorl %eax,%r12d xorl %edx,%r15d shrdl $6,%r13d,%r13d addl %r12d,%ebx andl %r15d,%esi xorl %ecx,%r14d addl %r13d,%ebx xorl %edx,%esi addl %ebx,%r9d shrdl $2,%r14d,%r14d addl %esi,%ebx movl %r9d,%r13d addl %ebx,%r14d shrdl $14,%r13d,%r13d movl %r14d,%ebx movl %r10d,%r12d xorl %r9d,%r13d shrdl $9,%r14d,%r14d xorl %r11d,%r12d shrdl $5,%r13d,%r13d xorl %ebx,%r14d andl %r9d,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 112-128(%rdi),%xmm10 xorl %r9d,%r13d addl 28(%rsp),%eax movl %ebx,%esi shrdl $11,%r14d,%r14d xorl %r11d,%r12d xorl %ecx,%esi shrdl $6,%r13d,%r13d addl %r12d,%eax andl %esi,%r15d xorl %ebx,%r14d addl %r13d,%eax xorl %ecx,%r15d addl %eax,%r8d shrdl $2,%r14d,%r14d addl %r15d,%eax movl %r8d,%r13d addl %eax,%r14d shrdl $14,%r13d,%r13d movl %r14d,%eax movl %r9d,%r12d xorl %r8d,%r13d shrdl $9,%r14d,%r14d xorl %r10d,%r12d shrdl $5,%r13d,%r13d xorl %eax,%r14d andl %r8d,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 128-128(%rdi),%xmm10 xorl %r8d,%r13d addl 32(%rsp),%r11d movl %eax,%r15d shrdl $11,%r14d,%r14d xorl %r10d,%r12d xorl %ebx,%r15d shrdl $6,%r13d,%r13d addl %r12d,%r11d andl %r15d,%esi xorl %eax,%r14d addl %r13d,%r11d xorl %ebx,%esi addl %r11d,%edx shrdl $2,%r14d,%r14d addl %esi,%r11d movl %edx,%r13d addl %r11d,%r14d shrdl $14,%r13d,%r13d movl %r14d,%r11d movl %r8d,%r12d xorl %edx,%r13d shrdl $9,%r14d,%r14d xorl %r9d,%r12d shrdl $5,%r13d,%r13d xorl %r11d,%r14d andl %edx,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 144-128(%rdi),%xmm10 xorl %edx,%r13d addl 36(%rsp),%r10d movl %r11d,%esi shrdl $11,%r14d,%r14d xorl %r9d,%r12d xorl %eax,%esi shrdl $6,%r13d,%r13d addl %r12d,%r10d andl %esi,%r15d xorl %r11d,%r14d addl %r13d,%r10d xorl %eax,%r15d addl %r10d,%ecx shrdl $2,%r14d,%r14d addl %r15d,%r10d movl %ecx,%r13d addl %r10d,%r14d shrdl $14,%r13d,%r13d movl %r14d,%r10d movl %edx,%r12d xorl %ecx,%r13d shrdl $9,%r14d,%r14d xorl %r8d,%r12d shrdl $5,%r13d,%r13d xorl %r10d,%r14d andl %ecx,%r12d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 160-128(%rdi),%xmm10 xorl %ecx,%r13d addl 40(%rsp),%r9d movl %r10d,%r15d shrdl $11,%r14d,%r14d xorl %r8d,%r12d xorl %r11d,%r15d shrdl $6,%r13d,%r13d addl %r12d,%r9d andl %r15d,%esi xorl %r10d,%r14d addl %r13d,%r9d xorl %r11d,%esi addl %r9d,%ebx shrdl $2,%r14d,%r14d addl %esi,%r9d movl %ebx,%r13d addl %r9d,%r14d shrdl $14,%r13d,%r13d movl %r14d,%r9d movl %ecx,%r12d xorl %ebx,%r13d shrdl $9,%r14d,%r14d xorl %edx,%r12d shrdl $5,%r13d,%r13d xorl %r9d,%r14d andl %ebx,%r12d vaesenclast %xmm10,%xmm9,%xmm11 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 176-128(%rdi),%xmm10 xorl %ebx,%r13d addl 44(%rsp),%r8d movl %r9d,%esi shrdl $11,%r14d,%r14d xorl %edx,%r12d xorl %r10d,%esi shrdl $6,%r13d,%r13d addl %r12d,%r8d andl %esi,%r15d xorl %r9d,%r14d addl %r13d,%r8d xorl %r10d,%r15d addl %r8d,%eax shrdl $2,%r14d,%r14d addl %r15d,%r8d movl %eax,%r13d addl %r8d,%r14d shrdl $14,%r13d,%r13d movl %r14d,%r8d movl %ebx,%r12d xorl %eax,%r13d shrdl $9,%r14d,%r14d xorl %ecx,%r12d shrdl $5,%r13d,%r13d xorl %r8d,%r14d andl %eax,%r12d vpand %xmm12,%xmm11,%xmm8 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 192-128(%rdi),%xmm10 xorl %eax,%r13d addl 48(%rsp),%edx movl %r8d,%r15d shrdl $11,%r14d,%r14d xorl %ecx,%r12d xorl %r9d,%r15d shrdl $6,%r13d,%r13d addl %r12d,%edx andl %r15d,%esi xorl %r8d,%r14d addl %r13d,%edx xorl %r9d,%esi addl %edx,%r11d shrdl $2,%r14d,%r14d addl %esi,%edx movl %r11d,%r13d addl %edx,%r14d shrdl $14,%r13d,%r13d movl %r14d,%edx movl %eax,%r12d xorl %r11d,%r13d shrdl $9,%r14d,%r14d xorl %ebx,%r12d shrdl $5,%r13d,%r13d xorl %edx,%r14d andl %r11d,%r12d vaesenclast %xmm10,%xmm9,%xmm11 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 208-128(%rdi),%xmm10 xorl %r11d,%r13d addl 52(%rsp),%ecx movl %edx,%esi shrdl $11,%r14d,%r14d xorl %ebx,%r12d xorl %r8d,%esi shrdl $6,%r13d,%r13d addl %r12d,%ecx andl %esi,%r15d xorl %edx,%r14d addl %r13d,%ecx xorl %r8d,%r15d addl %ecx,%r10d shrdl $2,%r14d,%r14d addl %r15d,%ecx movl %r10d,%r13d addl %ecx,%r14d shrdl $14,%r13d,%r13d movl %r14d,%ecx movl %r11d,%r12d xorl %r10d,%r13d shrdl $9,%r14d,%r14d xorl %eax,%r12d shrdl $5,%r13d,%r13d xorl %ecx,%r14d andl %r10d,%r12d vpand %xmm13,%xmm11,%xmm11 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 224-128(%rdi),%xmm10 xorl %r10d,%r13d addl 56(%rsp),%ebx movl %ecx,%r15d shrdl $11,%r14d,%r14d xorl %eax,%r12d xorl %edx,%r15d shrdl $6,%r13d,%r13d addl %r12d,%ebx andl %r15d,%esi xorl %ecx,%r14d addl %r13d,%ebx xorl %edx,%esi addl %ebx,%r9d shrdl $2,%r14d,%r14d addl %esi,%ebx movl %r9d,%r13d addl %ebx,%r14d shrdl $14,%r13d,%r13d movl %r14d,%ebx movl %r10d,%r12d xorl %r9d,%r13d shrdl $9,%r14d,%r14d xorl %r11d,%r12d shrdl $5,%r13d,%r13d xorl %ebx,%r14d andl %r9d,%r12d vpor %xmm11,%xmm8,%xmm8 vaesenclast %xmm10,%xmm9,%xmm11 vmovdqu 0-128(%rdi),%xmm10 xorl %r9d,%r13d addl 60(%rsp),%eax movl %ebx,%esi shrdl $11,%r14d,%r14d xorl %r11d,%r12d xorl %ecx,%esi shrdl $6,%r13d,%r13d addl %r12d,%eax andl %esi,%r15d xorl %ebx,%r14d addl %r13d,%eax xorl %ecx,%r15d addl %eax,%r8d shrdl $2,%r14d,%r14d addl %r15d,%eax movl %r8d,%r13d addl %eax,%r14d movq 64+0(%rsp),%r12 movq 64+8(%rsp),%r13 movq 64+40(%rsp),%r15 movq 64+48(%rsp),%rsi vpand %xmm14,%xmm11,%xmm11 movl %r14d,%eax vpor %xmm11,%xmm8,%xmm8 vmovdqu %xmm8,(%r12,%r13,1) leaq 16(%r12),%r12 addl 0(%r15),%eax addl 4(%r15),%ebx addl 8(%r15),%ecx addl 12(%r15),%edx addl 16(%r15),%r8d addl 20(%r15),%r9d addl 24(%r15),%r10d addl 28(%r15),%r11d cmpq 64+16(%rsp),%r12 movl %eax,0(%r15) movl %ebx,4(%r15) movl %ecx,8(%r15) movl %edx,12(%r15) movl %r8d,16(%r15) movl %r9d,20(%r15) movl %r10d,24(%r15) movl %r11d,28(%r15) jb L$loop_avx movq 64+32(%rsp),%r8 movq 120(%rsp),%rsi vmovdqu %xmm8,(%r8) vzeroall movq -48(%rsi),%r15 movq -40(%rsi),%r14 movq -32(%rsi),%r13 movq -24(%rsi),%r12 movq -16(%rsi),%rbp movq -8(%rsi),%rbx leaq (%rsi),%rsp L$epilogue_avx: .byte 0xf3,0xc3 .p2align 6 aesni_cbc_sha256_enc_avx2: L$avx2_shortcut: movq 8(%rsp),%r10 movq %rsp,%rax pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 subq $576,%rsp andq $-1024,%rsp addq $448,%rsp shlq $6,%rdx subq %rdi,%rsi subq %rdi,%r10 addq %rdi,%rdx movq %rdx,64+16(%rsp) movq %r8,64+32(%rsp) movq %r9,64+40(%rsp) movq %r10,64+48(%rsp) movq %rax,120(%rsp) L$prologue_avx2: vzeroall movq %rdi,%r13 vpinsrq $1,%rsi,%xmm15,%xmm15 leaq 128(%rcx),%rdi leaq K256+544(%rip),%r12 movl 240-128(%rdi),%r14d movq %r9,%r15 movq %r10,%rsi vmovdqu (%r8),%xmm8 leaq -9(%r14),%r14 vmovdqa 0(%r12,%r14,8),%xmm14 vmovdqa 16(%r12,%r14,8),%xmm13 vmovdqa 32(%r12,%r14,8),%xmm12 subq $-64,%r13 movl 0(%r15),%eax leaq (%rsi,%r13,1),%r12 movl 4(%r15),%ebx cmpq %rdx,%r13 movl 8(%r15),%ecx cmoveq %rsp,%r12 movl 12(%r15),%edx movl 16(%r15),%r8d movl 20(%r15),%r9d movl 24(%r15),%r10d movl 28(%r15),%r11d vmovdqu 0-128(%rdi),%xmm10 jmp L$oop_avx2 .p2align 4 L$oop_avx2: vmovdqa K256+512(%rip),%ymm7 vmovdqu -64+0(%rsi,%r13,1),%xmm0 vmovdqu -64+16(%rsi,%r13,1),%xmm1 vmovdqu -64+32(%rsi,%r13,1),%xmm2 vmovdqu -64+48(%rsi,%r13,1),%xmm3 vinserti128 $1,(%r12),%ymm0,%ymm0 vinserti128 $1,16(%r12),%ymm1,%ymm1 vpshufb %ymm7,%ymm0,%ymm0 vinserti128 $1,32(%r12),%ymm2,%ymm2 vpshufb %ymm7,%ymm1,%ymm1 vinserti128 $1,48(%r12),%ymm3,%ymm3 leaq K256(%rip),%rbp vpshufb %ymm7,%ymm2,%ymm2 leaq -64(%r13),%r13 vpaddd 0(%rbp),%ymm0,%ymm4 vpshufb %ymm7,%ymm3,%ymm3 vpaddd 32(%rbp),%ymm1,%ymm5 vpaddd 64(%rbp),%ymm2,%ymm6 vpaddd 96(%rbp),%ymm3,%ymm7 vmovdqa %ymm4,0(%rsp) xorl %r14d,%r14d vmovdqa %ymm5,32(%rsp) leaq -64(%rsp),%rsp movl %ebx,%esi vmovdqa %ymm6,0(%rsp) xorl %ecx,%esi vmovdqa %ymm7,32(%rsp) movl %r9d,%r12d subq $-32*4,%rbp jmp L$avx2_00_47 .p2align 4 L$avx2_00_47: vmovdqu (%r13),%xmm9 vpinsrq $0,%r13,%xmm15,%xmm15 leaq -64(%rsp),%rsp vpalignr $4,%ymm0,%ymm1,%ymm4 addl 0+128(%rsp),%r11d andl %r8d,%r12d rorxl $25,%r8d,%r13d vpalignr $4,%ymm2,%ymm3,%ymm7 rorxl $11,%r8d,%r15d leal (%rax,%r14,1),%eax leal (%r11,%r12,1),%r11d vpsrld $7,%ymm4,%ymm6 andnl %r10d,%r8d,%r12d xorl %r15d,%r13d rorxl $6,%r8d,%r14d vpaddd %ymm7,%ymm0,%ymm0 leal (%r11,%r12,1),%r11d xorl %r14d,%r13d movl %eax,%r15d vpsrld $3,%ymm4,%ymm7 rorxl $22,%eax,%r12d leal (%r11,%r13,1),%r11d xorl %ebx,%r15d vpslld $14,%ymm4,%ymm5 rorxl $13,%eax,%r14d rorxl $2,%eax,%r13d leal (%rdx,%r11,1),%edx vpxor %ymm6,%ymm7,%ymm4 andl %r15d,%esi vpxor %xmm10,%xmm9,%xmm9 vmovdqu 16-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %ebx,%esi vpshufd $250,%ymm3,%ymm7 xorl %r13d,%r14d leal (%r11,%rsi,1),%r11d movl %r8d,%r12d vpsrld $11,%ymm6,%ymm6 addl 4+128(%rsp),%r10d andl %edx,%r12d rorxl $25,%edx,%r13d vpxor %ymm5,%ymm4,%ymm4 rorxl $11,%edx,%esi leal (%r11,%r14,1),%r11d leal (%r10,%r12,1),%r10d vpslld $11,%ymm5,%ymm5 andnl %r9d,%edx,%r12d xorl %esi,%r13d rorxl $6,%edx,%r14d vpxor %ymm6,%ymm4,%ymm4 leal (%r10,%r12,1),%r10d xorl %r14d,%r13d movl %r11d,%esi vpsrld $10,%ymm7,%ymm6 rorxl $22,%r11d,%r12d leal (%r10,%r13,1),%r10d xorl %eax,%esi vpxor %ymm5,%ymm4,%ymm4 rorxl $13,%r11d,%r14d rorxl $2,%r11d,%r13d leal (%rcx,%r10,1),%ecx vpsrlq $17,%ymm7,%ymm7 andl %esi,%r15d vpxor %xmm8,%xmm9,%xmm9 xorl %r12d,%r14d xorl %eax,%r15d vpaddd %ymm4,%ymm0,%ymm0 xorl %r13d,%r14d leal (%r10,%r15,1),%r10d movl %edx,%r12d vpxor %ymm7,%ymm6,%ymm6 addl 8+128(%rsp),%r9d andl %ecx,%r12d rorxl $25,%ecx,%r13d vpsrlq $2,%ymm7,%ymm7 rorxl $11,%ecx,%r15d leal (%r10,%r14,1),%r10d leal (%r9,%r12,1),%r9d vpxor %ymm7,%ymm6,%ymm6 andnl %r8d,%ecx,%r12d xorl %r15d,%r13d rorxl $6,%ecx,%r14d vpshufd $132,%ymm6,%ymm6 leal (%r9,%r12,1),%r9d xorl %r14d,%r13d movl %r10d,%r15d vpsrldq $8,%ymm6,%ymm6 rorxl $22,%r10d,%r12d leal (%r9,%r13,1),%r9d xorl %r11d,%r15d vpaddd %ymm6,%ymm0,%ymm0 rorxl $13,%r10d,%r14d rorxl $2,%r10d,%r13d leal (%rbx,%r9,1),%ebx vpshufd $80,%ymm0,%ymm7 andl %r15d,%esi vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 32-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %r11d,%esi vpsrld $10,%ymm7,%ymm6 xorl %r13d,%r14d leal (%r9,%rsi,1),%r9d movl %ecx,%r12d vpsrlq $17,%ymm7,%ymm7 addl 12+128(%rsp),%r8d andl %ebx,%r12d rorxl $25,%ebx,%r13d vpxor %ymm7,%ymm6,%ymm6 rorxl $11,%ebx,%esi leal (%r9,%r14,1),%r9d leal (%r8,%r12,1),%r8d vpsrlq $2,%ymm7,%ymm7 andnl %edx,%ebx,%r12d xorl %esi,%r13d rorxl $6,%ebx,%r14d vpxor %ymm7,%ymm6,%ymm6 leal (%r8,%r12,1),%r8d xorl %r14d,%r13d movl %r9d,%esi vpshufd $232,%ymm6,%ymm6 rorxl $22,%r9d,%r12d leal (%r8,%r13,1),%r8d xorl %r10d,%esi vpslldq $8,%ymm6,%ymm6 rorxl $13,%r9d,%r14d rorxl $2,%r9d,%r13d leal (%rax,%r8,1),%eax vpaddd %ymm6,%ymm0,%ymm0 andl %esi,%r15d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 48-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %r10d,%r15d vpaddd 0(%rbp),%ymm0,%ymm6 xorl %r13d,%r14d leal (%r8,%r15,1),%r8d movl %ebx,%r12d vmovdqa %ymm6,0(%rsp) vpalignr $4,%ymm1,%ymm2,%ymm4 addl 32+128(%rsp),%edx andl %eax,%r12d rorxl $25,%eax,%r13d vpalignr $4,%ymm3,%ymm0,%ymm7 rorxl $11,%eax,%r15d leal (%r8,%r14,1),%r8d leal (%rdx,%r12,1),%edx vpsrld $7,%ymm4,%ymm6 andnl %ecx,%eax,%r12d xorl %r15d,%r13d rorxl $6,%eax,%r14d vpaddd %ymm7,%ymm1,%ymm1 leal (%rdx,%r12,1),%edx xorl %r14d,%r13d movl %r8d,%r15d vpsrld $3,%ymm4,%ymm7 rorxl $22,%r8d,%r12d leal (%rdx,%r13,1),%edx xorl %r9d,%r15d vpslld $14,%ymm4,%ymm5 rorxl $13,%r8d,%r14d rorxl $2,%r8d,%r13d leal (%r11,%rdx,1),%r11d vpxor %ymm6,%ymm7,%ymm4 andl %r15d,%esi vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 64-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %r9d,%esi vpshufd $250,%ymm0,%ymm7 xorl %r13d,%r14d leal (%rdx,%rsi,1),%edx movl %eax,%r12d vpsrld $11,%ymm6,%ymm6 addl 36+128(%rsp),%ecx andl %r11d,%r12d rorxl $25,%r11d,%r13d vpxor %ymm5,%ymm4,%ymm4 rorxl $11,%r11d,%esi leal (%rdx,%r14,1),%edx leal (%rcx,%r12,1),%ecx vpslld $11,%ymm5,%ymm5 andnl %ebx,%r11d,%r12d xorl %esi,%r13d rorxl $6,%r11d,%r14d vpxor %ymm6,%ymm4,%ymm4 leal (%rcx,%r12,1),%ecx xorl %r14d,%r13d movl %edx,%esi vpsrld $10,%ymm7,%ymm6 rorxl $22,%edx,%r12d leal (%rcx,%r13,1),%ecx xorl %r8d,%esi vpxor %ymm5,%ymm4,%ymm4 rorxl $13,%edx,%r14d rorxl $2,%edx,%r13d leal (%r10,%rcx,1),%r10d vpsrlq $17,%ymm7,%ymm7 andl %esi,%r15d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 80-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %r8d,%r15d vpaddd %ymm4,%ymm1,%ymm1 xorl %r13d,%r14d leal (%rcx,%r15,1),%ecx movl %r11d,%r12d vpxor %ymm7,%ymm6,%ymm6 addl 40+128(%rsp),%ebx andl %r10d,%r12d rorxl $25,%r10d,%r13d vpsrlq $2,%ymm7,%ymm7 rorxl $11,%r10d,%r15d leal (%rcx,%r14,1),%ecx leal (%rbx,%r12,1),%ebx vpxor %ymm7,%ymm6,%ymm6 andnl %eax,%r10d,%r12d xorl %r15d,%r13d rorxl $6,%r10d,%r14d vpshufd $132,%ymm6,%ymm6 leal (%rbx,%r12,1),%ebx xorl %r14d,%r13d movl %ecx,%r15d vpsrldq $8,%ymm6,%ymm6 rorxl $22,%ecx,%r12d leal (%rbx,%r13,1),%ebx xorl %edx,%r15d vpaddd %ymm6,%ymm1,%ymm1 rorxl $13,%ecx,%r14d rorxl $2,%ecx,%r13d leal (%r9,%rbx,1),%r9d vpshufd $80,%ymm1,%ymm7 andl %r15d,%esi vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 96-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %edx,%esi vpsrld $10,%ymm7,%ymm6 xorl %r13d,%r14d leal (%rbx,%rsi,1),%ebx movl %r10d,%r12d vpsrlq $17,%ymm7,%ymm7 addl 44+128(%rsp),%eax andl %r9d,%r12d rorxl $25,%r9d,%r13d vpxor %ymm7,%ymm6,%ymm6 rorxl $11,%r9d,%esi leal (%rbx,%r14,1),%ebx leal (%rax,%r12,1),%eax vpsrlq $2,%ymm7,%ymm7 andnl %r11d,%r9d,%r12d xorl %esi,%r13d rorxl $6,%r9d,%r14d vpxor %ymm7,%ymm6,%ymm6 leal (%rax,%r12,1),%eax xorl %r14d,%r13d movl %ebx,%esi vpshufd $232,%ymm6,%ymm6 rorxl $22,%ebx,%r12d leal (%rax,%r13,1),%eax xorl %ecx,%esi vpslldq $8,%ymm6,%ymm6 rorxl $13,%ebx,%r14d rorxl $2,%ebx,%r13d leal (%r8,%rax,1),%r8d vpaddd %ymm6,%ymm1,%ymm1 andl %esi,%r15d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 112-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %ecx,%r15d vpaddd 32(%rbp),%ymm1,%ymm6 xorl %r13d,%r14d leal (%rax,%r15,1),%eax movl %r9d,%r12d vmovdqa %ymm6,32(%rsp) leaq -64(%rsp),%rsp vpalignr $4,%ymm2,%ymm3,%ymm4 addl 0+128(%rsp),%r11d andl %r8d,%r12d rorxl $25,%r8d,%r13d vpalignr $4,%ymm0,%ymm1,%ymm7 rorxl $11,%r8d,%r15d leal (%rax,%r14,1),%eax leal (%r11,%r12,1),%r11d vpsrld $7,%ymm4,%ymm6 andnl %r10d,%r8d,%r12d xorl %r15d,%r13d rorxl $6,%r8d,%r14d vpaddd %ymm7,%ymm2,%ymm2 leal (%r11,%r12,1),%r11d xorl %r14d,%r13d movl %eax,%r15d vpsrld $3,%ymm4,%ymm7 rorxl $22,%eax,%r12d leal (%r11,%r13,1),%r11d xorl %ebx,%r15d vpslld $14,%ymm4,%ymm5 rorxl $13,%eax,%r14d rorxl $2,%eax,%r13d leal (%rdx,%r11,1),%edx vpxor %ymm6,%ymm7,%ymm4 andl %r15d,%esi vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 128-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %ebx,%esi vpshufd $250,%ymm1,%ymm7 xorl %r13d,%r14d leal (%r11,%rsi,1),%r11d movl %r8d,%r12d vpsrld $11,%ymm6,%ymm6 addl 4+128(%rsp),%r10d andl %edx,%r12d rorxl $25,%edx,%r13d vpxor %ymm5,%ymm4,%ymm4 rorxl $11,%edx,%esi leal (%r11,%r14,1),%r11d leal (%r10,%r12,1),%r10d vpslld $11,%ymm5,%ymm5 andnl %r9d,%edx,%r12d xorl %esi,%r13d rorxl $6,%edx,%r14d vpxor %ymm6,%ymm4,%ymm4 leal (%r10,%r12,1),%r10d xorl %r14d,%r13d movl %r11d,%esi vpsrld $10,%ymm7,%ymm6 rorxl $22,%r11d,%r12d leal (%r10,%r13,1),%r10d xorl %eax,%esi vpxor %ymm5,%ymm4,%ymm4 rorxl $13,%r11d,%r14d rorxl $2,%r11d,%r13d leal (%rcx,%r10,1),%ecx vpsrlq $17,%ymm7,%ymm7 andl %esi,%r15d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 144-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %eax,%r15d vpaddd %ymm4,%ymm2,%ymm2 xorl %r13d,%r14d leal (%r10,%r15,1),%r10d movl %edx,%r12d vpxor %ymm7,%ymm6,%ymm6 addl 8+128(%rsp),%r9d andl %ecx,%r12d rorxl $25,%ecx,%r13d vpsrlq $2,%ymm7,%ymm7 rorxl $11,%ecx,%r15d leal (%r10,%r14,1),%r10d leal (%r9,%r12,1),%r9d vpxor %ymm7,%ymm6,%ymm6 andnl %r8d,%ecx,%r12d xorl %r15d,%r13d rorxl $6,%ecx,%r14d vpshufd $132,%ymm6,%ymm6 leal (%r9,%r12,1),%r9d xorl %r14d,%r13d movl %r10d,%r15d vpsrldq $8,%ymm6,%ymm6 rorxl $22,%r10d,%r12d leal (%r9,%r13,1),%r9d xorl %r11d,%r15d vpaddd %ymm6,%ymm2,%ymm2 rorxl $13,%r10d,%r14d rorxl $2,%r10d,%r13d leal (%rbx,%r9,1),%ebx vpshufd $80,%ymm2,%ymm7 andl %r15d,%esi vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 160-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %r11d,%esi vpsrld $10,%ymm7,%ymm6 xorl %r13d,%r14d leal (%r9,%rsi,1),%r9d movl %ecx,%r12d vpsrlq $17,%ymm7,%ymm7 addl 12+128(%rsp),%r8d andl %ebx,%r12d rorxl $25,%ebx,%r13d vpxor %ymm7,%ymm6,%ymm6 rorxl $11,%ebx,%esi leal (%r9,%r14,1),%r9d leal (%r8,%r12,1),%r8d vpsrlq $2,%ymm7,%ymm7 andnl %edx,%ebx,%r12d xorl %esi,%r13d rorxl $6,%ebx,%r14d vpxor %ymm7,%ymm6,%ymm6 leal (%r8,%r12,1),%r8d xorl %r14d,%r13d movl %r9d,%esi vpshufd $232,%ymm6,%ymm6 rorxl $22,%r9d,%r12d leal (%r8,%r13,1),%r8d xorl %r10d,%esi vpslldq $8,%ymm6,%ymm6 rorxl $13,%r9d,%r14d rorxl $2,%r9d,%r13d leal (%rax,%r8,1),%eax vpaddd %ymm6,%ymm2,%ymm2 andl %esi,%r15d vaesenclast %xmm10,%xmm9,%xmm11 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 176-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %r10d,%r15d vpaddd 64(%rbp),%ymm2,%ymm6 xorl %r13d,%r14d leal (%r8,%r15,1),%r8d movl %ebx,%r12d vmovdqa %ymm6,0(%rsp) vpalignr $4,%ymm3,%ymm0,%ymm4 addl 32+128(%rsp),%edx andl %eax,%r12d rorxl $25,%eax,%r13d vpalignr $4,%ymm1,%ymm2,%ymm7 rorxl $11,%eax,%r15d leal (%r8,%r14,1),%r8d leal (%rdx,%r12,1),%edx vpsrld $7,%ymm4,%ymm6 andnl %ecx,%eax,%r12d xorl %r15d,%r13d rorxl $6,%eax,%r14d vpaddd %ymm7,%ymm3,%ymm3 leal (%rdx,%r12,1),%edx xorl %r14d,%r13d movl %r8d,%r15d vpsrld $3,%ymm4,%ymm7 rorxl $22,%r8d,%r12d leal (%rdx,%r13,1),%edx xorl %r9d,%r15d vpslld $14,%ymm4,%ymm5 rorxl $13,%r8d,%r14d rorxl $2,%r8d,%r13d leal (%r11,%rdx,1),%r11d vpxor %ymm6,%ymm7,%ymm4 andl %r15d,%esi vpand %xmm12,%xmm11,%xmm8 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 192-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %r9d,%esi vpshufd $250,%ymm2,%ymm7 xorl %r13d,%r14d leal (%rdx,%rsi,1),%edx movl %eax,%r12d vpsrld $11,%ymm6,%ymm6 addl 36+128(%rsp),%ecx andl %r11d,%r12d rorxl $25,%r11d,%r13d vpxor %ymm5,%ymm4,%ymm4 rorxl $11,%r11d,%esi leal (%rdx,%r14,1),%edx leal (%rcx,%r12,1),%ecx vpslld $11,%ymm5,%ymm5 andnl %ebx,%r11d,%r12d xorl %esi,%r13d rorxl $6,%r11d,%r14d vpxor %ymm6,%ymm4,%ymm4 leal (%rcx,%r12,1),%ecx xorl %r14d,%r13d movl %edx,%esi vpsrld $10,%ymm7,%ymm6 rorxl $22,%edx,%r12d leal (%rcx,%r13,1),%ecx xorl %r8d,%esi vpxor %ymm5,%ymm4,%ymm4 rorxl $13,%edx,%r14d rorxl $2,%edx,%r13d leal (%r10,%rcx,1),%r10d vpsrlq $17,%ymm7,%ymm7 andl %esi,%r15d vaesenclast %xmm10,%xmm9,%xmm11 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 208-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %r8d,%r15d vpaddd %ymm4,%ymm3,%ymm3 xorl %r13d,%r14d leal (%rcx,%r15,1),%ecx movl %r11d,%r12d vpxor %ymm7,%ymm6,%ymm6 addl 40+128(%rsp),%ebx andl %r10d,%r12d rorxl $25,%r10d,%r13d vpsrlq $2,%ymm7,%ymm7 rorxl $11,%r10d,%r15d leal (%rcx,%r14,1),%ecx leal (%rbx,%r12,1),%ebx vpxor %ymm7,%ymm6,%ymm6 andnl %eax,%r10d,%r12d xorl %r15d,%r13d rorxl $6,%r10d,%r14d vpshufd $132,%ymm6,%ymm6 leal (%rbx,%r12,1),%ebx xorl %r14d,%r13d movl %ecx,%r15d vpsrldq $8,%ymm6,%ymm6 rorxl $22,%ecx,%r12d leal (%rbx,%r13,1),%ebx xorl %edx,%r15d vpaddd %ymm6,%ymm3,%ymm3 rorxl $13,%ecx,%r14d rorxl $2,%ecx,%r13d leal (%r9,%rbx,1),%r9d vpshufd $80,%ymm3,%ymm7 andl %r15d,%esi vpand %xmm13,%xmm11,%xmm11 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 224-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %edx,%esi vpsrld $10,%ymm7,%ymm6 xorl %r13d,%r14d leal (%rbx,%rsi,1),%ebx movl %r10d,%r12d vpsrlq $17,%ymm7,%ymm7 addl 44+128(%rsp),%eax andl %r9d,%r12d rorxl $25,%r9d,%r13d vpxor %ymm7,%ymm6,%ymm6 rorxl $11,%r9d,%esi leal (%rbx,%r14,1),%ebx leal (%rax,%r12,1),%eax vpsrlq $2,%ymm7,%ymm7 andnl %r11d,%r9d,%r12d xorl %esi,%r13d rorxl $6,%r9d,%r14d vpxor %ymm7,%ymm6,%ymm6 leal (%rax,%r12,1),%eax xorl %r14d,%r13d movl %ebx,%esi vpshufd $232,%ymm6,%ymm6 rorxl $22,%ebx,%r12d leal (%rax,%r13,1),%eax xorl %ecx,%esi vpslldq $8,%ymm6,%ymm6 rorxl $13,%ebx,%r14d rorxl $2,%ebx,%r13d leal (%r8,%rax,1),%r8d vpaddd %ymm6,%ymm3,%ymm3 andl %esi,%r15d vpor %xmm11,%xmm8,%xmm8 vaesenclast %xmm10,%xmm9,%xmm11 vmovdqu 0-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %ecx,%r15d vpaddd 96(%rbp),%ymm3,%ymm6 xorl %r13d,%r14d leal (%rax,%r15,1),%eax movl %r9d,%r12d vmovdqa %ymm6,32(%rsp) vmovq %xmm15,%r13 vpextrq $1,%xmm15,%r15 vpand %xmm14,%xmm11,%xmm11 vpor %xmm11,%xmm8,%xmm8 vmovdqu %xmm8,(%r15,%r13,1) leaq 16(%r13),%r13 leaq 128(%rbp),%rbp cmpb $0,3(%rbp) jne L$avx2_00_47 vmovdqu (%r13),%xmm9 vpinsrq $0,%r13,%xmm15,%xmm15 addl 0+64(%rsp),%r11d andl %r8d,%r12d rorxl $25,%r8d,%r13d rorxl $11,%r8d,%r15d leal (%rax,%r14,1),%eax leal (%r11,%r12,1),%r11d andnl %r10d,%r8d,%r12d xorl %r15d,%r13d rorxl $6,%r8d,%r14d leal (%r11,%r12,1),%r11d xorl %r14d,%r13d movl %eax,%r15d rorxl $22,%eax,%r12d leal (%r11,%r13,1),%r11d xorl %ebx,%r15d rorxl $13,%eax,%r14d rorxl $2,%eax,%r13d leal (%rdx,%r11,1),%edx andl %r15d,%esi vpxor %xmm10,%xmm9,%xmm9 vmovdqu 16-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %ebx,%esi xorl %r13d,%r14d leal (%r11,%rsi,1),%r11d movl %r8d,%r12d addl 4+64(%rsp),%r10d andl %edx,%r12d rorxl $25,%edx,%r13d rorxl $11,%edx,%esi leal (%r11,%r14,1),%r11d leal (%r10,%r12,1),%r10d andnl %r9d,%edx,%r12d xorl %esi,%r13d rorxl $6,%edx,%r14d leal (%r10,%r12,1),%r10d xorl %r14d,%r13d movl %r11d,%esi rorxl $22,%r11d,%r12d leal (%r10,%r13,1),%r10d xorl %eax,%esi rorxl $13,%r11d,%r14d rorxl $2,%r11d,%r13d leal (%rcx,%r10,1),%ecx andl %esi,%r15d vpxor %xmm8,%xmm9,%xmm9 xorl %r12d,%r14d xorl %eax,%r15d xorl %r13d,%r14d leal (%r10,%r15,1),%r10d movl %edx,%r12d addl 8+64(%rsp),%r9d andl %ecx,%r12d rorxl $25,%ecx,%r13d rorxl $11,%ecx,%r15d leal (%r10,%r14,1),%r10d leal (%r9,%r12,1),%r9d andnl %r8d,%ecx,%r12d xorl %r15d,%r13d rorxl $6,%ecx,%r14d leal (%r9,%r12,1),%r9d xorl %r14d,%r13d movl %r10d,%r15d rorxl $22,%r10d,%r12d leal (%r9,%r13,1),%r9d xorl %r11d,%r15d rorxl $13,%r10d,%r14d rorxl $2,%r10d,%r13d leal (%rbx,%r9,1),%ebx andl %r15d,%esi vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 32-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %r11d,%esi xorl %r13d,%r14d leal (%r9,%rsi,1),%r9d movl %ecx,%r12d addl 12+64(%rsp),%r8d andl %ebx,%r12d rorxl $25,%ebx,%r13d rorxl $11,%ebx,%esi leal (%r9,%r14,1),%r9d leal (%r8,%r12,1),%r8d andnl %edx,%ebx,%r12d xorl %esi,%r13d rorxl $6,%ebx,%r14d leal (%r8,%r12,1),%r8d xorl %r14d,%r13d movl %r9d,%esi rorxl $22,%r9d,%r12d leal (%r8,%r13,1),%r8d xorl %r10d,%esi rorxl $13,%r9d,%r14d rorxl $2,%r9d,%r13d leal (%rax,%r8,1),%eax andl %esi,%r15d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 48-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %r10d,%r15d xorl %r13d,%r14d leal (%r8,%r15,1),%r8d movl %ebx,%r12d addl 32+64(%rsp),%edx andl %eax,%r12d rorxl $25,%eax,%r13d rorxl $11,%eax,%r15d leal (%r8,%r14,1),%r8d leal (%rdx,%r12,1),%edx andnl %ecx,%eax,%r12d xorl %r15d,%r13d rorxl $6,%eax,%r14d leal (%rdx,%r12,1),%edx xorl %r14d,%r13d movl %r8d,%r15d rorxl $22,%r8d,%r12d leal (%rdx,%r13,1),%edx xorl %r9d,%r15d rorxl $13,%r8d,%r14d rorxl $2,%r8d,%r13d leal (%r11,%rdx,1),%r11d andl %r15d,%esi vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 64-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %r9d,%esi xorl %r13d,%r14d leal (%rdx,%rsi,1),%edx movl %eax,%r12d addl 36+64(%rsp),%ecx andl %r11d,%r12d rorxl $25,%r11d,%r13d rorxl $11,%r11d,%esi leal (%rdx,%r14,1),%edx leal (%rcx,%r12,1),%ecx andnl %ebx,%r11d,%r12d xorl %esi,%r13d rorxl $6,%r11d,%r14d leal (%rcx,%r12,1),%ecx xorl %r14d,%r13d movl %edx,%esi rorxl $22,%edx,%r12d leal (%rcx,%r13,1),%ecx xorl %r8d,%esi rorxl $13,%edx,%r14d rorxl $2,%edx,%r13d leal (%r10,%rcx,1),%r10d andl %esi,%r15d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 80-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %r8d,%r15d xorl %r13d,%r14d leal (%rcx,%r15,1),%ecx movl %r11d,%r12d addl 40+64(%rsp),%ebx andl %r10d,%r12d rorxl $25,%r10d,%r13d rorxl $11,%r10d,%r15d leal (%rcx,%r14,1),%ecx leal (%rbx,%r12,1),%ebx andnl %eax,%r10d,%r12d xorl %r15d,%r13d rorxl $6,%r10d,%r14d leal (%rbx,%r12,1),%ebx xorl %r14d,%r13d movl %ecx,%r15d rorxl $22,%ecx,%r12d leal (%rbx,%r13,1),%ebx xorl %edx,%r15d rorxl $13,%ecx,%r14d rorxl $2,%ecx,%r13d leal (%r9,%rbx,1),%r9d andl %r15d,%esi vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 96-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %edx,%esi xorl %r13d,%r14d leal (%rbx,%rsi,1),%ebx movl %r10d,%r12d addl 44+64(%rsp),%eax andl %r9d,%r12d rorxl $25,%r9d,%r13d rorxl $11,%r9d,%esi leal (%rbx,%r14,1),%ebx leal (%rax,%r12,1),%eax andnl %r11d,%r9d,%r12d xorl %esi,%r13d rorxl $6,%r9d,%r14d leal (%rax,%r12,1),%eax xorl %r14d,%r13d movl %ebx,%esi rorxl $22,%ebx,%r12d leal (%rax,%r13,1),%eax xorl %ecx,%esi rorxl $13,%ebx,%r14d rorxl $2,%ebx,%r13d leal (%r8,%rax,1),%r8d andl %esi,%r15d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 112-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %ecx,%r15d xorl %r13d,%r14d leal (%rax,%r15,1),%eax movl %r9d,%r12d addl 0(%rsp),%r11d andl %r8d,%r12d rorxl $25,%r8d,%r13d rorxl $11,%r8d,%r15d leal (%rax,%r14,1),%eax leal (%r11,%r12,1),%r11d andnl %r10d,%r8d,%r12d xorl %r15d,%r13d rorxl $6,%r8d,%r14d leal (%r11,%r12,1),%r11d xorl %r14d,%r13d movl %eax,%r15d rorxl $22,%eax,%r12d leal (%r11,%r13,1),%r11d xorl %ebx,%r15d rorxl $13,%eax,%r14d rorxl $2,%eax,%r13d leal (%rdx,%r11,1),%edx andl %r15d,%esi vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 128-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %ebx,%esi xorl %r13d,%r14d leal (%r11,%rsi,1),%r11d movl %r8d,%r12d addl 4(%rsp),%r10d andl %edx,%r12d rorxl $25,%edx,%r13d rorxl $11,%edx,%esi leal (%r11,%r14,1),%r11d leal (%r10,%r12,1),%r10d andnl %r9d,%edx,%r12d xorl %esi,%r13d rorxl $6,%edx,%r14d leal (%r10,%r12,1),%r10d xorl %r14d,%r13d movl %r11d,%esi rorxl $22,%r11d,%r12d leal (%r10,%r13,1),%r10d xorl %eax,%esi rorxl $13,%r11d,%r14d rorxl $2,%r11d,%r13d leal (%rcx,%r10,1),%ecx andl %esi,%r15d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 144-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %eax,%r15d xorl %r13d,%r14d leal (%r10,%r15,1),%r10d movl %edx,%r12d addl 8(%rsp),%r9d andl %ecx,%r12d rorxl $25,%ecx,%r13d rorxl $11,%ecx,%r15d leal (%r10,%r14,1),%r10d leal (%r9,%r12,1),%r9d andnl %r8d,%ecx,%r12d xorl %r15d,%r13d rorxl $6,%ecx,%r14d leal (%r9,%r12,1),%r9d xorl %r14d,%r13d movl %r10d,%r15d rorxl $22,%r10d,%r12d leal (%r9,%r13,1),%r9d xorl %r11d,%r15d rorxl $13,%r10d,%r14d rorxl $2,%r10d,%r13d leal (%rbx,%r9,1),%ebx andl %r15d,%esi vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 160-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %r11d,%esi xorl %r13d,%r14d leal (%r9,%rsi,1),%r9d movl %ecx,%r12d addl 12(%rsp),%r8d andl %ebx,%r12d rorxl $25,%ebx,%r13d rorxl $11,%ebx,%esi leal (%r9,%r14,1),%r9d leal (%r8,%r12,1),%r8d andnl %edx,%ebx,%r12d xorl %esi,%r13d rorxl $6,%ebx,%r14d leal (%r8,%r12,1),%r8d xorl %r14d,%r13d movl %r9d,%esi rorxl $22,%r9d,%r12d leal (%r8,%r13,1),%r8d xorl %r10d,%esi rorxl $13,%r9d,%r14d rorxl $2,%r9d,%r13d leal (%rax,%r8,1),%eax andl %esi,%r15d vaesenclast %xmm10,%xmm9,%xmm11 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 176-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %r10d,%r15d xorl %r13d,%r14d leal (%r8,%r15,1),%r8d movl %ebx,%r12d addl 32(%rsp),%edx andl %eax,%r12d rorxl $25,%eax,%r13d rorxl $11,%eax,%r15d leal (%r8,%r14,1),%r8d leal (%rdx,%r12,1),%edx andnl %ecx,%eax,%r12d xorl %r15d,%r13d rorxl $6,%eax,%r14d leal (%rdx,%r12,1),%edx xorl %r14d,%r13d movl %r8d,%r15d rorxl $22,%r8d,%r12d leal (%rdx,%r13,1),%edx xorl %r9d,%r15d rorxl $13,%r8d,%r14d rorxl $2,%r8d,%r13d leal (%r11,%rdx,1),%r11d andl %r15d,%esi vpand %xmm12,%xmm11,%xmm8 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 192-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %r9d,%esi xorl %r13d,%r14d leal (%rdx,%rsi,1),%edx movl %eax,%r12d addl 36(%rsp),%ecx andl %r11d,%r12d rorxl $25,%r11d,%r13d rorxl $11,%r11d,%esi leal (%rdx,%r14,1),%edx leal (%rcx,%r12,1),%ecx andnl %ebx,%r11d,%r12d xorl %esi,%r13d rorxl $6,%r11d,%r14d leal (%rcx,%r12,1),%ecx xorl %r14d,%r13d movl %edx,%esi rorxl $22,%edx,%r12d leal (%rcx,%r13,1),%ecx xorl %r8d,%esi rorxl $13,%edx,%r14d rorxl $2,%edx,%r13d leal (%r10,%rcx,1),%r10d andl %esi,%r15d vaesenclast %xmm10,%xmm9,%xmm11 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 208-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %r8d,%r15d xorl %r13d,%r14d leal (%rcx,%r15,1),%ecx movl %r11d,%r12d addl 40(%rsp),%ebx andl %r10d,%r12d rorxl $25,%r10d,%r13d rorxl $11,%r10d,%r15d leal (%rcx,%r14,1),%ecx leal (%rbx,%r12,1),%ebx andnl %eax,%r10d,%r12d xorl %r15d,%r13d rorxl $6,%r10d,%r14d leal (%rbx,%r12,1),%ebx xorl %r14d,%r13d movl %ecx,%r15d rorxl $22,%ecx,%r12d leal (%rbx,%r13,1),%ebx xorl %edx,%r15d rorxl $13,%ecx,%r14d rorxl $2,%ecx,%r13d leal (%r9,%rbx,1),%r9d andl %r15d,%esi vpand %xmm13,%xmm11,%xmm11 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 224-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %edx,%esi xorl %r13d,%r14d leal (%rbx,%rsi,1),%ebx movl %r10d,%r12d addl 44(%rsp),%eax andl %r9d,%r12d rorxl $25,%r9d,%r13d rorxl $11,%r9d,%esi leal (%rbx,%r14,1),%ebx leal (%rax,%r12,1),%eax andnl %r11d,%r9d,%r12d xorl %esi,%r13d rorxl $6,%r9d,%r14d leal (%rax,%r12,1),%eax xorl %r14d,%r13d movl %ebx,%esi rorxl $22,%ebx,%r12d leal (%rax,%r13,1),%eax xorl %ecx,%esi rorxl $13,%ebx,%r14d rorxl $2,%ebx,%r13d leal (%r8,%rax,1),%r8d andl %esi,%r15d vpor %xmm11,%xmm8,%xmm8 vaesenclast %xmm10,%xmm9,%xmm11 vmovdqu 0-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %ecx,%r15d xorl %r13d,%r14d leal (%rax,%r15,1),%eax movl %r9d,%r12d vpextrq $1,%xmm15,%r12 vmovq %xmm15,%r13 movq 552(%rsp),%r15 addl %r14d,%eax leaq 448(%rsp),%rbp vpand %xmm14,%xmm11,%xmm11 vpor %xmm11,%xmm8,%xmm8 vmovdqu %xmm8,(%r12,%r13,1) leaq 16(%r13),%r13 addl 0(%r15),%eax addl 4(%r15),%ebx addl 8(%r15),%ecx addl 12(%r15),%edx addl 16(%r15),%r8d addl 20(%r15),%r9d addl 24(%r15),%r10d addl 28(%r15),%r11d movl %eax,0(%r15) movl %ebx,4(%r15) movl %ecx,8(%r15) movl %edx,12(%r15) movl %r8d,16(%r15) movl %r9d,20(%r15) movl %r10d,24(%r15) movl %r11d,28(%r15) cmpq 80(%rbp),%r13 je L$done_avx2 xorl %r14d,%r14d movl %ebx,%esi movl %r9d,%r12d xorl %ecx,%esi jmp L$ower_avx2 .p2align 4 L$ower_avx2: vmovdqu (%r13),%xmm9 vpinsrq $0,%r13,%xmm15,%xmm15 addl 0+16(%rbp),%r11d andl %r8d,%r12d rorxl $25,%r8d,%r13d rorxl $11,%r8d,%r15d leal (%rax,%r14,1),%eax leal (%r11,%r12,1),%r11d andnl %r10d,%r8d,%r12d xorl %r15d,%r13d rorxl $6,%r8d,%r14d leal (%r11,%r12,1),%r11d xorl %r14d,%r13d movl %eax,%r15d rorxl $22,%eax,%r12d leal (%r11,%r13,1),%r11d xorl %ebx,%r15d rorxl $13,%eax,%r14d rorxl $2,%eax,%r13d leal (%rdx,%r11,1),%edx andl %r15d,%esi vpxor %xmm10,%xmm9,%xmm9 vmovdqu 16-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %ebx,%esi xorl %r13d,%r14d leal (%r11,%rsi,1),%r11d movl %r8d,%r12d addl 4+16(%rbp),%r10d andl %edx,%r12d rorxl $25,%edx,%r13d rorxl $11,%edx,%esi leal (%r11,%r14,1),%r11d leal (%r10,%r12,1),%r10d andnl %r9d,%edx,%r12d xorl %esi,%r13d rorxl $6,%edx,%r14d leal (%r10,%r12,1),%r10d xorl %r14d,%r13d movl %r11d,%esi rorxl $22,%r11d,%r12d leal (%r10,%r13,1),%r10d xorl %eax,%esi rorxl $13,%r11d,%r14d rorxl $2,%r11d,%r13d leal (%rcx,%r10,1),%ecx andl %esi,%r15d vpxor %xmm8,%xmm9,%xmm9 xorl %r12d,%r14d xorl %eax,%r15d xorl %r13d,%r14d leal (%r10,%r15,1),%r10d movl %edx,%r12d addl 8+16(%rbp),%r9d andl %ecx,%r12d rorxl $25,%ecx,%r13d rorxl $11,%ecx,%r15d leal (%r10,%r14,1),%r10d leal (%r9,%r12,1),%r9d andnl %r8d,%ecx,%r12d xorl %r15d,%r13d rorxl $6,%ecx,%r14d leal (%r9,%r12,1),%r9d xorl %r14d,%r13d movl %r10d,%r15d rorxl $22,%r10d,%r12d leal (%r9,%r13,1),%r9d xorl %r11d,%r15d rorxl $13,%r10d,%r14d rorxl $2,%r10d,%r13d leal (%rbx,%r9,1),%ebx andl %r15d,%esi vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 32-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %r11d,%esi xorl %r13d,%r14d leal (%r9,%rsi,1),%r9d movl %ecx,%r12d addl 12+16(%rbp),%r8d andl %ebx,%r12d rorxl $25,%ebx,%r13d rorxl $11,%ebx,%esi leal (%r9,%r14,1),%r9d leal (%r8,%r12,1),%r8d andnl %edx,%ebx,%r12d xorl %esi,%r13d rorxl $6,%ebx,%r14d leal (%r8,%r12,1),%r8d xorl %r14d,%r13d movl %r9d,%esi rorxl $22,%r9d,%r12d leal (%r8,%r13,1),%r8d xorl %r10d,%esi rorxl $13,%r9d,%r14d rorxl $2,%r9d,%r13d leal (%rax,%r8,1),%eax andl %esi,%r15d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 48-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %r10d,%r15d xorl %r13d,%r14d leal (%r8,%r15,1),%r8d movl %ebx,%r12d addl 32+16(%rbp),%edx andl %eax,%r12d rorxl $25,%eax,%r13d rorxl $11,%eax,%r15d leal (%r8,%r14,1),%r8d leal (%rdx,%r12,1),%edx andnl %ecx,%eax,%r12d xorl %r15d,%r13d rorxl $6,%eax,%r14d leal (%rdx,%r12,1),%edx xorl %r14d,%r13d movl %r8d,%r15d rorxl $22,%r8d,%r12d leal (%rdx,%r13,1),%edx xorl %r9d,%r15d rorxl $13,%r8d,%r14d rorxl $2,%r8d,%r13d leal (%r11,%rdx,1),%r11d andl %r15d,%esi vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 64-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %r9d,%esi xorl %r13d,%r14d leal (%rdx,%rsi,1),%edx movl %eax,%r12d addl 36+16(%rbp),%ecx andl %r11d,%r12d rorxl $25,%r11d,%r13d rorxl $11,%r11d,%esi leal (%rdx,%r14,1),%edx leal (%rcx,%r12,1),%ecx andnl %ebx,%r11d,%r12d xorl %esi,%r13d rorxl $6,%r11d,%r14d leal (%rcx,%r12,1),%ecx xorl %r14d,%r13d movl %edx,%esi rorxl $22,%edx,%r12d leal (%rcx,%r13,1),%ecx xorl %r8d,%esi rorxl $13,%edx,%r14d rorxl $2,%edx,%r13d leal (%r10,%rcx,1),%r10d andl %esi,%r15d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 80-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %r8d,%r15d xorl %r13d,%r14d leal (%rcx,%r15,1),%ecx movl %r11d,%r12d addl 40+16(%rbp),%ebx andl %r10d,%r12d rorxl $25,%r10d,%r13d rorxl $11,%r10d,%r15d leal (%rcx,%r14,1),%ecx leal (%rbx,%r12,1),%ebx andnl %eax,%r10d,%r12d xorl %r15d,%r13d rorxl $6,%r10d,%r14d leal (%rbx,%r12,1),%ebx xorl %r14d,%r13d movl %ecx,%r15d rorxl $22,%ecx,%r12d leal (%rbx,%r13,1),%ebx xorl %edx,%r15d rorxl $13,%ecx,%r14d rorxl $2,%ecx,%r13d leal (%r9,%rbx,1),%r9d andl %r15d,%esi vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 96-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %edx,%esi xorl %r13d,%r14d leal (%rbx,%rsi,1),%ebx movl %r10d,%r12d addl 44+16(%rbp),%eax andl %r9d,%r12d rorxl $25,%r9d,%r13d rorxl $11,%r9d,%esi leal (%rbx,%r14,1),%ebx leal (%rax,%r12,1),%eax andnl %r11d,%r9d,%r12d xorl %esi,%r13d rorxl $6,%r9d,%r14d leal (%rax,%r12,1),%eax xorl %r14d,%r13d movl %ebx,%esi rorxl $22,%ebx,%r12d leal (%rax,%r13,1),%eax xorl %ecx,%esi rorxl $13,%ebx,%r14d rorxl $2,%ebx,%r13d leal (%r8,%rax,1),%r8d andl %esi,%r15d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 112-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %ecx,%r15d xorl %r13d,%r14d leal (%rax,%r15,1),%eax movl %r9d,%r12d leaq -64(%rbp),%rbp addl 0+16(%rbp),%r11d andl %r8d,%r12d rorxl $25,%r8d,%r13d rorxl $11,%r8d,%r15d leal (%rax,%r14,1),%eax leal (%r11,%r12,1),%r11d andnl %r10d,%r8d,%r12d xorl %r15d,%r13d rorxl $6,%r8d,%r14d leal (%r11,%r12,1),%r11d xorl %r14d,%r13d movl %eax,%r15d rorxl $22,%eax,%r12d leal (%r11,%r13,1),%r11d xorl %ebx,%r15d rorxl $13,%eax,%r14d rorxl $2,%eax,%r13d leal (%rdx,%r11,1),%edx andl %r15d,%esi vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 128-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %ebx,%esi xorl %r13d,%r14d leal (%r11,%rsi,1),%r11d movl %r8d,%r12d addl 4+16(%rbp),%r10d andl %edx,%r12d rorxl $25,%edx,%r13d rorxl $11,%edx,%esi leal (%r11,%r14,1),%r11d leal (%r10,%r12,1),%r10d andnl %r9d,%edx,%r12d xorl %esi,%r13d rorxl $6,%edx,%r14d leal (%r10,%r12,1),%r10d xorl %r14d,%r13d movl %r11d,%esi rorxl $22,%r11d,%r12d leal (%r10,%r13,1),%r10d xorl %eax,%esi rorxl $13,%r11d,%r14d rorxl $2,%r11d,%r13d leal (%rcx,%r10,1),%ecx andl %esi,%r15d vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 144-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %eax,%r15d xorl %r13d,%r14d leal (%r10,%r15,1),%r10d movl %edx,%r12d addl 8+16(%rbp),%r9d andl %ecx,%r12d rorxl $25,%ecx,%r13d rorxl $11,%ecx,%r15d leal (%r10,%r14,1),%r10d leal (%r9,%r12,1),%r9d andnl %r8d,%ecx,%r12d xorl %r15d,%r13d rorxl $6,%ecx,%r14d leal (%r9,%r12,1),%r9d xorl %r14d,%r13d movl %r10d,%r15d rorxl $22,%r10d,%r12d leal (%r9,%r13,1),%r9d xorl %r11d,%r15d rorxl $13,%r10d,%r14d rorxl $2,%r10d,%r13d leal (%rbx,%r9,1),%ebx andl %r15d,%esi vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 160-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %r11d,%esi xorl %r13d,%r14d leal (%r9,%rsi,1),%r9d movl %ecx,%r12d addl 12+16(%rbp),%r8d andl %ebx,%r12d rorxl $25,%ebx,%r13d rorxl $11,%ebx,%esi leal (%r9,%r14,1),%r9d leal (%r8,%r12,1),%r8d andnl %edx,%ebx,%r12d xorl %esi,%r13d rorxl $6,%ebx,%r14d leal (%r8,%r12,1),%r8d xorl %r14d,%r13d movl %r9d,%esi rorxl $22,%r9d,%r12d leal (%r8,%r13,1),%r8d xorl %r10d,%esi rorxl $13,%r9d,%r14d rorxl $2,%r9d,%r13d leal (%rax,%r8,1),%eax andl %esi,%r15d vaesenclast %xmm10,%xmm9,%xmm11 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 176-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %r10d,%r15d xorl %r13d,%r14d leal (%r8,%r15,1),%r8d movl %ebx,%r12d addl 32+16(%rbp),%edx andl %eax,%r12d rorxl $25,%eax,%r13d rorxl $11,%eax,%r15d leal (%r8,%r14,1),%r8d leal (%rdx,%r12,1),%edx andnl %ecx,%eax,%r12d xorl %r15d,%r13d rorxl $6,%eax,%r14d leal (%rdx,%r12,1),%edx xorl %r14d,%r13d movl %r8d,%r15d rorxl $22,%r8d,%r12d leal (%rdx,%r13,1),%edx xorl %r9d,%r15d rorxl $13,%r8d,%r14d rorxl $2,%r8d,%r13d leal (%r11,%rdx,1),%r11d andl %r15d,%esi vpand %xmm12,%xmm11,%xmm8 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 192-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %r9d,%esi xorl %r13d,%r14d leal (%rdx,%rsi,1),%edx movl %eax,%r12d addl 36+16(%rbp),%ecx andl %r11d,%r12d rorxl $25,%r11d,%r13d rorxl $11,%r11d,%esi leal (%rdx,%r14,1),%edx leal (%rcx,%r12,1),%ecx andnl %ebx,%r11d,%r12d xorl %esi,%r13d rorxl $6,%r11d,%r14d leal (%rcx,%r12,1),%ecx xorl %r14d,%r13d movl %edx,%esi rorxl $22,%edx,%r12d leal (%rcx,%r13,1),%ecx xorl %r8d,%esi rorxl $13,%edx,%r14d rorxl $2,%edx,%r13d leal (%r10,%rcx,1),%r10d andl %esi,%r15d vaesenclast %xmm10,%xmm9,%xmm11 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 208-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %r8d,%r15d xorl %r13d,%r14d leal (%rcx,%r15,1),%ecx movl %r11d,%r12d addl 40+16(%rbp),%ebx andl %r10d,%r12d rorxl $25,%r10d,%r13d rorxl $11,%r10d,%r15d leal (%rcx,%r14,1),%ecx leal (%rbx,%r12,1),%ebx andnl %eax,%r10d,%r12d xorl %r15d,%r13d rorxl $6,%r10d,%r14d leal (%rbx,%r12,1),%ebx xorl %r14d,%r13d movl %ecx,%r15d rorxl $22,%ecx,%r12d leal (%rbx,%r13,1),%ebx xorl %edx,%r15d rorxl $13,%ecx,%r14d rorxl $2,%ecx,%r13d leal (%r9,%rbx,1),%r9d andl %r15d,%esi vpand %xmm13,%xmm11,%xmm11 vaesenc %xmm10,%xmm9,%xmm9 vmovdqu 224-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %edx,%esi xorl %r13d,%r14d leal (%rbx,%rsi,1),%ebx movl %r10d,%r12d addl 44+16(%rbp),%eax andl %r9d,%r12d rorxl $25,%r9d,%r13d rorxl $11,%r9d,%esi leal (%rbx,%r14,1),%ebx leal (%rax,%r12,1),%eax andnl %r11d,%r9d,%r12d xorl %esi,%r13d rorxl $6,%r9d,%r14d leal (%rax,%r12,1),%eax xorl %r14d,%r13d movl %ebx,%esi rorxl $22,%ebx,%r12d leal (%rax,%r13,1),%eax xorl %ecx,%esi rorxl $13,%ebx,%r14d rorxl $2,%ebx,%r13d leal (%r8,%rax,1),%r8d andl %esi,%r15d vpor %xmm11,%xmm8,%xmm8 vaesenclast %xmm10,%xmm9,%xmm11 vmovdqu 0-128(%rdi),%xmm10 xorl %r12d,%r14d xorl %ecx,%r15d xorl %r13d,%r14d leal (%rax,%r15,1),%eax movl %r9d,%r12d vmovq %xmm15,%r13 vpextrq $1,%xmm15,%r15 vpand %xmm14,%xmm11,%xmm11 vpor %xmm11,%xmm8,%xmm8 leaq -64(%rbp),%rbp vmovdqu %xmm8,(%r15,%r13,1) leaq 16(%r13),%r13 cmpq %rsp,%rbp jae L$ower_avx2 movq 552(%rsp),%r15 leaq 64(%r13),%r13 movq 560(%rsp),%rsi addl %r14d,%eax leaq 448(%rsp),%rsp addl 0(%r15),%eax addl 4(%r15),%ebx addl 8(%r15),%ecx addl 12(%r15),%edx addl 16(%r15),%r8d addl 20(%r15),%r9d addl 24(%r15),%r10d leaq (%rsi,%r13,1),%r12 addl 28(%r15),%r11d cmpq 64+16(%rsp),%r13 movl %eax,0(%r15) cmoveq %rsp,%r12 movl %ebx,4(%r15) movl %ecx,8(%r15) movl %edx,12(%r15) movl %r8d,16(%r15) movl %r9d,20(%r15) movl %r10d,24(%r15) movl %r11d,28(%r15) jbe L$oop_avx2 leaq (%rsp),%rbp L$done_avx2: leaq (%rbp),%rsp movq 64+32(%rsp),%r8 movq 120(%rsp),%rsi vmovdqu %xmm8,(%r8) vzeroall movq -48(%rsi),%r15 movq -40(%rsi),%r14 movq -32(%rsi),%r13 movq -24(%rsi),%r12 movq -16(%rsi),%rbp movq -8(%rsi),%rbx leaq (%rsi),%rsp L$epilogue_avx2: .byte 0xf3,0xc3 .p2align 5 aesni_cbc_sha256_enc_shaext: movq 8(%rsp),%r10 leaq K256+128(%rip),%rax movdqu (%r9),%xmm1 movdqu 16(%r9),%xmm2 movdqa 512-128(%rax),%xmm3 movl 240(%rcx),%r11d subq %rdi,%rsi movups (%rcx),%xmm15 movups (%r8),%xmm6 movups 16(%rcx),%xmm4 leaq 112(%rcx),%rcx pshufd $0x1b,%xmm1,%xmm0 pshufd $0xb1,%xmm1,%xmm1 pshufd $0x1b,%xmm2,%xmm2 movdqa %xmm3,%xmm7 .byte 102,15,58,15,202,8 punpcklqdq %xmm0,%xmm2 jmp L$oop_shaext .p2align 4 L$oop_shaext: movdqu (%r10),%xmm10 movdqu 16(%r10),%xmm11 movdqu 32(%r10),%xmm12 .byte 102,68,15,56,0,211 movdqu 48(%r10),%xmm13 movdqa 0-128(%rax),%xmm0 paddd %xmm10,%xmm0 .byte 102,68,15,56,0,219 movdqa %xmm2,%xmm9 movdqa %xmm1,%xmm8 movups 0(%rdi),%xmm14 xorps %xmm15,%xmm14 xorps %xmm14,%xmm6 movups -80(%rcx),%xmm5 aesenc %xmm4,%xmm6 .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 movups -64(%rcx),%xmm4 aesenc %xmm5,%xmm6 .byte 15,56,203,202 movdqa 32-128(%rax),%xmm0 paddd %xmm11,%xmm0 .byte 102,68,15,56,0,227 leaq 64(%r10),%r10 movups -48(%rcx),%xmm5 aesenc %xmm4,%xmm6 .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 movups -32(%rcx),%xmm4 aesenc %xmm5,%xmm6 .byte 15,56,203,202 movdqa 64-128(%rax),%xmm0 paddd %xmm12,%xmm0 .byte 102,68,15,56,0,235 .byte 69,15,56,204,211 movups -16(%rcx),%xmm5 aesenc %xmm4,%xmm6 .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 movdqa %xmm13,%xmm3 .byte 102,65,15,58,15,220,4 paddd %xmm3,%xmm10 movups 0(%rcx),%xmm4 aesenc %xmm5,%xmm6 .byte 15,56,203,202 movdqa 96-128(%rax),%xmm0 paddd %xmm13,%xmm0 .byte 69,15,56,205,213 .byte 69,15,56,204,220 movups 16(%rcx),%xmm5 aesenc %xmm4,%xmm6 .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 movups 32(%rcx),%xmm4 aesenc %xmm5,%xmm6 movdqa %xmm10,%xmm3 .byte 102,65,15,58,15,221,4 paddd %xmm3,%xmm11 .byte 15,56,203,202 movdqa 128-128(%rax),%xmm0 paddd %xmm10,%xmm0 .byte 69,15,56,205,218 .byte 69,15,56,204,229 movups 48(%rcx),%xmm5 aesenc %xmm4,%xmm6 .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 movdqa %xmm11,%xmm3 .byte 102,65,15,58,15,218,4 paddd %xmm3,%xmm12 cmpl $11,%r11d jb L$aesenclast1 movups 64(%rcx),%xmm4 aesenc %xmm5,%xmm6 movups 80(%rcx),%xmm5 aesenc %xmm4,%xmm6 je L$aesenclast1 movups 96(%rcx),%xmm4 aesenc %xmm5,%xmm6 movups 112(%rcx),%xmm5 aesenc %xmm4,%xmm6 L$aesenclast1: aesenclast %xmm5,%xmm6 movups 16-112(%rcx),%xmm4 nop .byte 15,56,203,202 movups 16(%rdi),%xmm14 xorps %xmm15,%xmm14 movups %xmm6,0(%rsi,%rdi,1) xorps %xmm14,%xmm6 movups -80(%rcx),%xmm5 aesenc %xmm4,%xmm6 movdqa 160-128(%rax),%xmm0 paddd %xmm11,%xmm0 .byte 69,15,56,205,227 .byte 69,15,56,204,234 movups -64(%rcx),%xmm4 aesenc %xmm5,%xmm6 .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 movdqa %xmm12,%xmm3 .byte 102,65,15,58,15,219,4 paddd %xmm3,%xmm13 movups -48(%rcx),%xmm5 aesenc %xmm4,%xmm6 .byte 15,56,203,202 movdqa 192-128(%rax),%xmm0 paddd %xmm12,%xmm0 .byte 69,15,56,205,236 .byte 69,15,56,204,211 movups -32(%rcx),%xmm4 aesenc %xmm5,%xmm6 .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 movdqa %xmm13,%xmm3 .byte 102,65,15,58,15,220,4 paddd %xmm3,%xmm10 movups -16(%rcx),%xmm5 aesenc %xmm4,%xmm6 .byte 15,56,203,202 movdqa 224-128(%rax),%xmm0 paddd %xmm13,%xmm0 .byte 69,15,56,205,213 .byte 69,15,56,204,220 movups 0(%rcx),%xmm4 aesenc %xmm5,%xmm6 .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 movdqa %xmm10,%xmm3 .byte 102,65,15,58,15,221,4 paddd %xmm3,%xmm11 movups 16(%rcx),%xmm5 aesenc %xmm4,%xmm6 .byte 15,56,203,202 movdqa 256-128(%rax),%xmm0 paddd %xmm10,%xmm0 .byte 69,15,56,205,218 .byte 69,15,56,204,229 movups 32(%rcx),%xmm4 aesenc %xmm5,%xmm6 .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 movdqa %xmm11,%xmm3 .byte 102,65,15,58,15,218,4 paddd %xmm3,%xmm12 movups 48(%rcx),%xmm5 aesenc %xmm4,%xmm6 cmpl $11,%r11d jb L$aesenclast2 movups 64(%rcx),%xmm4 aesenc %xmm5,%xmm6 movups 80(%rcx),%xmm5 aesenc %xmm4,%xmm6 je L$aesenclast2 movups 96(%rcx),%xmm4 aesenc %xmm5,%xmm6 movups 112(%rcx),%xmm5 aesenc %xmm4,%xmm6 L$aesenclast2: aesenclast %xmm5,%xmm6 movups 16-112(%rcx),%xmm4 nop .byte 15,56,203,202 movups 32(%rdi),%xmm14 xorps %xmm15,%xmm14 movups %xmm6,16(%rsi,%rdi,1) xorps %xmm14,%xmm6 movups -80(%rcx),%xmm5 aesenc %xmm4,%xmm6 movdqa 288-128(%rax),%xmm0 paddd %xmm11,%xmm0 .byte 69,15,56,205,227 .byte 69,15,56,204,234 movups -64(%rcx),%xmm4 aesenc %xmm5,%xmm6 .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 movdqa %xmm12,%xmm3 .byte 102,65,15,58,15,219,4 paddd %xmm3,%xmm13 movups -48(%rcx),%xmm5 aesenc %xmm4,%xmm6 .byte 15,56,203,202 movdqa 320-128(%rax),%xmm0 paddd %xmm12,%xmm0 .byte 69,15,56,205,236 .byte 69,15,56,204,211 movups -32(%rcx),%xmm4 aesenc %xmm5,%xmm6 .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 movdqa %xmm13,%xmm3 .byte 102,65,15,58,15,220,4 paddd %xmm3,%xmm10 movups -16(%rcx),%xmm5 aesenc %xmm4,%xmm6 .byte 15,56,203,202 movdqa 352-128(%rax),%xmm0 paddd %xmm13,%xmm0 .byte 69,15,56,205,213 .byte 69,15,56,204,220 movups 0(%rcx),%xmm4 aesenc %xmm5,%xmm6 .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 movdqa %xmm10,%xmm3 .byte 102,65,15,58,15,221,4 paddd %xmm3,%xmm11 movups 16(%rcx),%xmm5 aesenc %xmm4,%xmm6 .byte 15,56,203,202 movdqa 384-128(%rax),%xmm0 paddd %xmm10,%xmm0 .byte 69,15,56,205,218 .byte 69,15,56,204,229 movups 32(%rcx),%xmm4 aesenc %xmm5,%xmm6 .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 movdqa %xmm11,%xmm3 .byte 102,65,15,58,15,218,4 paddd %xmm3,%xmm12 movups 48(%rcx),%xmm5 aesenc %xmm4,%xmm6 .byte 15,56,203,202 movdqa 416-128(%rax),%xmm0 paddd %xmm11,%xmm0 .byte 69,15,56,205,227 .byte 69,15,56,204,234 cmpl $11,%r11d jb L$aesenclast3 movups 64(%rcx),%xmm4 aesenc %xmm5,%xmm6 movups 80(%rcx),%xmm5 aesenc %xmm4,%xmm6 je L$aesenclast3 movups 96(%rcx),%xmm4 aesenc %xmm5,%xmm6 movups 112(%rcx),%xmm5 aesenc %xmm4,%xmm6 L$aesenclast3: aesenclast %xmm5,%xmm6 movups 16-112(%rcx),%xmm4 nop .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 movdqa %xmm12,%xmm3 .byte 102,65,15,58,15,219,4 paddd %xmm3,%xmm13 movups 48(%rdi),%xmm14 xorps %xmm15,%xmm14 movups %xmm6,32(%rsi,%rdi,1) xorps %xmm14,%xmm6 movups -80(%rcx),%xmm5 aesenc %xmm4,%xmm6 movups -64(%rcx),%xmm4 aesenc %xmm5,%xmm6 .byte 15,56,203,202 movdqa 448-128(%rax),%xmm0 paddd %xmm12,%xmm0 .byte 69,15,56,205,236 movdqa %xmm7,%xmm3 movups -48(%rcx),%xmm5 aesenc %xmm4,%xmm6 .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 movups -32(%rcx),%xmm4 aesenc %xmm5,%xmm6 .byte 15,56,203,202 movdqa 480-128(%rax),%xmm0 paddd %xmm13,%xmm0 movups -16(%rcx),%xmm5 aesenc %xmm4,%xmm6 movups 0(%rcx),%xmm4 aesenc %xmm5,%xmm6 .byte 15,56,203,209 pshufd $0x0e,%xmm0,%xmm0 movups 16(%rcx),%xmm5 aesenc %xmm4,%xmm6 .byte 15,56,203,202 movups 32(%rcx),%xmm4 aesenc %xmm5,%xmm6 movups 48(%rcx),%xmm5 aesenc %xmm4,%xmm6 cmpl $11,%r11d jb L$aesenclast4 movups 64(%rcx),%xmm4 aesenc %xmm5,%xmm6 movups 80(%rcx),%xmm5 aesenc %xmm4,%xmm6 je L$aesenclast4 movups 96(%rcx),%xmm4 aesenc %xmm5,%xmm6 movups 112(%rcx),%xmm5 aesenc %xmm4,%xmm6 L$aesenclast4: aesenclast %xmm5,%xmm6 movups 16-112(%rcx),%xmm4 nop paddd %xmm9,%xmm2 paddd %xmm8,%xmm1 decq %rdx movups %xmm6,48(%rsi,%rdi,1) leaq 64(%rdi),%rdi jnz L$oop_shaext pshufd $0xb1,%xmm2,%xmm2 pshufd $0x1b,%xmm1,%xmm3 pshufd $0xb1,%xmm1,%xmm1 punpckhqdq %xmm2,%xmm1 .byte 102,15,58,15,211,8 movups %xmm6,(%r8) movdqu %xmm1,(%r9) movdqu %xmm2,16(%r9) .byte 0xf3,0xc3