summaryrefslogtreecommitdiff
path: root/deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aesni-x86_64.asm
diff options
context:
space:
mode:
Diffstat (limited to 'deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aesni-x86_64.asm')
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aesni-x86_64.asm4994
1 files changed, 4994 insertions, 0 deletions
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aesni-x86_64.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aesni-x86_64.asm
new file mode 100644
index 0000000000..0f4790eead
--- /dev/null
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aesni-x86_64.asm
@@ -0,0 +1,4994 @@
+default rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+section .text code align=64
+
+EXTERN OPENSSL_ia32cap_P
+global aesni_encrypt
+
+ALIGN 16
+aesni_encrypt:
+ movups xmm2,XMMWORD[rcx]
+ mov eax,DWORD[240+r8]
+ movups xmm0,XMMWORD[r8]
+ movups xmm1,XMMWORD[16+r8]
+ lea r8,[32+r8]
+ xorps xmm2,xmm0
+$L$oop_enc1_1:
+DB 102,15,56,220,209
+ dec eax
+ movups xmm1,XMMWORD[r8]
+ lea r8,[16+r8]
+ jnz NEAR $L$oop_enc1_1
+DB 102,15,56,221,209
+ pxor xmm0,xmm0
+ pxor xmm1,xmm1
+ movups XMMWORD[rdx],xmm2
+ pxor xmm2,xmm2
+ DB 0F3h,0C3h ;repret
+
+
+global aesni_decrypt
+
+ALIGN 16
+aesni_decrypt:
+ movups xmm2,XMMWORD[rcx]
+ mov eax,DWORD[240+r8]
+ movups xmm0,XMMWORD[r8]
+ movups xmm1,XMMWORD[16+r8]
+ lea r8,[32+r8]
+ xorps xmm2,xmm0
+$L$oop_dec1_2:
+DB 102,15,56,222,209
+ dec eax
+ movups xmm1,XMMWORD[r8]
+ lea r8,[16+r8]
+ jnz NEAR $L$oop_dec1_2
+DB 102,15,56,223,209
+ pxor xmm0,xmm0
+ pxor xmm1,xmm1
+ movups XMMWORD[rdx],xmm2
+ pxor xmm2,xmm2
+ DB 0F3h,0C3h ;repret
+
+
+ALIGN 16
+_aesni_encrypt2:
+ movups xmm0,XMMWORD[rcx]
+ shl eax,4
+ movups xmm1,XMMWORD[16+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ movups xmm0,XMMWORD[32+rcx]
+ lea rcx,[32+rax*1+rcx]
+ neg rax
+ add rax,16
+
+$L$enc_loop2:
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+ movups xmm1,XMMWORD[rax*1+rcx]
+ add rax,32
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+ movups xmm0,XMMWORD[((-16))+rax*1+rcx]
+ jnz NEAR $L$enc_loop2
+
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,221,208
+DB 102,15,56,221,216
+ DB 0F3h,0C3h ;repret
+
+
+ALIGN 16
+_aesni_decrypt2:
+ movups xmm0,XMMWORD[rcx]
+ shl eax,4
+ movups xmm1,XMMWORD[16+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ movups xmm0,XMMWORD[32+rcx]
+ lea rcx,[32+rax*1+rcx]
+ neg rax
+ add rax,16
+
+$L$dec_loop2:
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+ movups xmm1,XMMWORD[rax*1+rcx]
+ add rax,32
+DB 102,15,56,222,208
+DB 102,15,56,222,216
+ movups xmm0,XMMWORD[((-16))+rax*1+rcx]
+ jnz NEAR $L$dec_loop2
+
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,223,208
+DB 102,15,56,223,216
+ DB 0F3h,0C3h ;repret
+
+
+ALIGN 16
+_aesni_encrypt3:
+ movups xmm0,XMMWORD[rcx]
+ shl eax,4
+ movups xmm1,XMMWORD[16+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ xorps xmm4,xmm0
+ movups xmm0,XMMWORD[32+rcx]
+ lea rcx,[32+rax*1+rcx]
+ neg rax
+ add rax,16
+
+$L$enc_loop3:
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+ movups xmm1,XMMWORD[rax*1+rcx]
+ add rax,32
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+DB 102,15,56,220,224
+ movups xmm0,XMMWORD[((-16))+rax*1+rcx]
+ jnz NEAR $L$enc_loop3
+
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+DB 102,15,56,221,208
+DB 102,15,56,221,216
+DB 102,15,56,221,224
+ DB 0F3h,0C3h ;repret
+
+
+ALIGN 16
+_aesni_decrypt3:
+ movups xmm0,XMMWORD[rcx]
+ shl eax,4
+ movups xmm1,XMMWORD[16+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ xorps xmm4,xmm0
+ movups xmm0,XMMWORD[32+rcx]
+ lea rcx,[32+rax*1+rcx]
+ neg rax
+ add rax,16
+
+$L$dec_loop3:
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+ movups xmm1,XMMWORD[rax*1+rcx]
+ add rax,32
+DB 102,15,56,222,208
+DB 102,15,56,222,216
+DB 102,15,56,222,224
+ movups xmm0,XMMWORD[((-16))+rax*1+rcx]
+ jnz NEAR $L$dec_loop3
+
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,223,208
+DB 102,15,56,223,216
+DB 102,15,56,223,224
+ DB 0F3h,0C3h ;repret
+
+
+ALIGN 16
+_aesni_encrypt4:
+ movups xmm0,XMMWORD[rcx]
+ shl eax,4
+ movups xmm1,XMMWORD[16+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ xorps xmm4,xmm0
+ xorps xmm5,xmm0
+ movups xmm0,XMMWORD[32+rcx]
+ lea rcx,[32+rax*1+rcx]
+ neg rax
+DB 0x0f,0x1f,0x00
+ add rax,16
+
+$L$enc_loop4:
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+ movups xmm1,XMMWORD[rax*1+rcx]
+ add rax,32
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+DB 102,15,56,220,224
+DB 102,15,56,220,232
+ movups xmm0,XMMWORD[((-16))+rax*1+rcx]
+ jnz NEAR $L$enc_loop4
+
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+DB 102,15,56,221,208
+DB 102,15,56,221,216
+DB 102,15,56,221,224
+DB 102,15,56,221,232
+ DB 0F3h,0C3h ;repret
+
+
+ALIGN 16
+_aesni_decrypt4:
+ movups xmm0,XMMWORD[rcx]
+ shl eax,4
+ movups xmm1,XMMWORD[16+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ xorps xmm4,xmm0
+ xorps xmm5,xmm0
+ movups xmm0,XMMWORD[32+rcx]
+ lea rcx,[32+rax*1+rcx]
+ neg rax
+DB 0x0f,0x1f,0x00
+ add rax,16
+
+$L$dec_loop4:
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+ movups xmm1,XMMWORD[rax*1+rcx]
+ add rax,32
+DB 102,15,56,222,208
+DB 102,15,56,222,216
+DB 102,15,56,222,224
+DB 102,15,56,222,232
+ movups xmm0,XMMWORD[((-16))+rax*1+rcx]
+ jnz NEAR $L$dec_loop4
+
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+DB 102,15,56,223,208
+DB 102,15,56,223,216
+DB 102,15,56,223,224
+DB 102,15,56,223,232
+ DB 0F3h,0C3h ;repret
+
+
+ALIGN 16
+_aesni_encrypt6:
+ movups xmm0,XMMWORD[rcx]
+ shl eax,4
+ movups xmm1,XMMWORD[16+rcx]
+ xorps xmm2,xmm0
+ pxor xmm3,xmm0
+ pxor xmm4,xmm0
+DB 102,15,56,220,209
+ lea rcx,[32+rax*1+rcx]
+ neg rax
+DB 102,15,56,220,217
+ pxor xmm5,xmm0
+ pxor xmm6,xmm0
+DB 102,15,56,220,225
+ pxor xmm7,xmm0
+ movups xmm0,XMMWORD[rax*1+rcx]
+ add rax,16
+ jmp NEAR $L$enc_loop6_enter
+ALIGN 16
+$L$enc_loop6:
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+$L$enc_loop6_enter:
+DB 102,15,56,220,233
+DB 102,15,56,220,241
+DB 102,15,56,220,249
+ movups xmm1,XMMWORD[rax*1+rcx]
+ add rax,32
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+DB 102,15,56,220,224
+DB 102,15,56,220,232
+DB 102,15,56,220,240
+DB 102,15,56,220,248
+ movups xmm0,XMMWORD[((-16))+rax*1+rcx]
+ jnz NEAR $L$enc_loop6
+
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+DB 102,15,56,220,241
+DB 102,15,56,220,249
+DB 102,15,56,221,208
+DB 102,15,56,221,216
+DB 102,15,56,221,224
+DB 102,15,56,221,232
+DB 102,15,56,221,240
+DB 102,15,56,221,248
+ DB 0F3h,0C3h ;repret
+
+
+ALIGN 16
+_aesni_decrypt6:
+ movups xmm0,XMMWORD[rcx]
+ shl eax,4
+ movups xmm1,XMMWORD[16+rcx]
+ xorps xmm2,xmm0
+ pxor xmm3,xmm0
+ pxor xmm4,xmm0
+DB 102,15,56,222,209
+ lea rcx,[32+rax*1+rcx]
+ neg rax
+DB 102,15,56,222,217
+ pxor xmm5,xmm0
+ pxor xmm6,xmm0
+DB 102,15,56,222,225
+ pxor xmm7,xmm0
+ movups xmm0,XMMWORD[rax*1+rcx]
+ add rax,16
+ jmp NEAR $L$dec_loop6_enter
+ALIGN 16
+$L$dec_loop6:
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+$L$dec_loop6_enter:
+DB 102,15,56,222,233
+DB 102,15,56,222,241
+DB 102,15,56,222,249
+ movups xmm1,XMMWORD[rax*1+rcx]
+ add rax,32
+DB 102,15,56,222,208
+DB 102,15,56,222,216
+DB 102,15,56,222,224
+DB 102,15,56,222,232
+DB 102,15,56,222,240
+DB 102,15,56,222,248
+ movups xmm0,XMMWORD[((-16))+rax*1+rcx]
+ jnz NEAR $L$dec_loop6
+
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+DB 102,15,56,222,241
+DB 102,15,56,222,249
+DB 102,15,56,223,208
+DB 102,15,56,223,216
+DB 102,15,56,223,224
+DB 102,15,56,223,232
+DB 102,15,56,223,240
+DB 102,15,56,223,248
+ DB 0F3h,0C3h ;repret
+
+
+ALIGN 16
+_aesni_encrypt8:
+ movups xmm0,XMMWORD[rcx]
+ shl eax,4
+ movups xmm1,XMMWORD[16+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ pxor xmm4,xmm0
+ pxor xmm5,xmm0
+ pxor xmm6,xmm0
+ lea rcx,[32+rax*1+rcx]
+ neg rax
+DB 102,15,56,220,209
+ pxor xmm7,xmm0
+ pxor xmm8,xmm0
+DB 102,15,56,220,217
+ pxor xmm9,xmm0
+ movups xmm0,XMMWORD[rax*1+rcx]
+ add rax,16
+ jmp NEAR $L$enc_loop8_inner
+ALIGN 16
+$L$enc_loop8:
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+$L$enc_loop8_inner:
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+DB 102,15,56,220,241
+DB 102,15,56,220,249
+DB 102,68,15,56,220,193
+DB 102,68,15,56,220,201
+$L$enc_loop8_enter:
+ movups xmm1,XMMWORD[rax*1+rcx]
+ add rax,32
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+DB 102,15,56,220,224
+DB 102,15,56,220,232
+DB 102,15,56,220,240
+DB 102,15,56,220,248
+DB 102,68,15,56,220,192
+DB 102,68,15,56,220,200
+ movups xmm0,XMMWORD[((-16))+rax*1+rcx]
+ jnz NEAR $L$enc_loop8
+
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+DB 102,15,56,220,241
+DB 102,15,56,220,249
+DB 102,68,15,56,220,193
+DB 102,68,15,56,220,201
+DB 102,15,56,221,208
+DB 102,15,56,221,216
+DB 102,15,56,221,224
+DB 102,15,56,221,232
+DB 102,15,56,221,240
+DB 102,15,56,221,248
+DB 102,68,15,56,221,192
+DB 102,68,15,56,221,200
+ DB 0F3h,0C3h ;repret
+
+
+ALIGN 16
+_aesni_decrypt8:
+ movups xmm0,XMMWORD[rcx]
+ shl eax,4
+ movups xmm1,XMMWORD[16+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ pxor xmm4,xmm0
+ pxor xmm5,xmm0
+ pxor xmm6,xmm0
+ lea rcx,[32+rax*1+rcx]
+ neg rax
+DB 102,15,56,222,209
+ pxor xmm7,xmm0
+ pxor xmm8,xmm0
+DB 102,15,56,222,217
+ pxor xmm9,xmm0
+ movups xmm0,XMMWORD[rax*1+rcx]
+ add rax,16
+ jmp NEAR $L$dec_loop8_inner
+ALIGN 16
+$L$dec_loop8:
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+$L$dec_loop8_inner:
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+DB 102,15,56,222,241
+DB 102,15,56,222,249
+DB 102,68,15,56,222,193
+DB 102,68,15,56,222,201
+$L$dec_loop8_enter:
+ movups xmm1,XMMWORD[rax*1+rcx]
+ add rax,32
+DB 102,15,56,222,208
+DB 102,15,56,222,216
+DB 102,15,56,222,224
+DB 102,15,56,222,232
+DB 102,15,56,222,240
+DB 102,15,56,222,248
+DB 102,68,15,56,222,192
+DB 102,68,15,56,222,200
+ movups xmm0,XMMWORD[((-16))+rax*1+rcx]
+ jnz NEAR $L$dec_loop8
+
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+DB 102,15,56,222,241
+DB 102,15,56,222,249
+DB 102,68,15,56,222,193
+DB 102,68,15,56,222,201
+DB 102,15,56,223,208
+DB 102,15,56,223,216
+DB 102,15,56,223,224
+DB 102,15,56,223,232
+DB 102,15,56,223,240
+DB 102,15,56,223,248
+DB 102,68,15,56,223,192
+DB 102,68,15,56,223,200
+ DB 0F3h,0C3h ;repret
+
+global aesni_ecb_encrypt
+
+ALIGN 16
+aesni_ecb_encrypt:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_ecb_encrypt:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD[40+rsp]
+
+
+ lea rsp,[((-88))+rsp]
+ movaps XMMWORD[rsp],xmm6
+ movaps XMMWORD[16+rsp],xmm7
+ movaps XMMWORD[32+rsp],xmm8
+ movaps XMMWORD[48+rsp],xmm9
+$L$ecb_enc_body:
+ and rdx,-16
+ jz NEAR $L$ecb_ret
+
+ mov eax,DWORD[240+rcx]
+ movups xmm0,XMMWORD[rcx]
+ mov r11,rcx
+ mov r10d,eax
+ test r8d,r8d
+ jz NEAR $L$ecb_decrypt
+
+ cmp rdx,0x80
+ jb NEAR $L$ecb_enc_tail
+
+ movdqu xmm2,XMMWORD[rdi]
+ movdqu xmm3,XMMWORD[16+rdi]
+ movdqu xmm4,XMMWORD[32+rdi]
+ movdqu xmm5,XMMWORD[48+rdi]
+ movdqu xmm6,XMMWORD[64+rdi]
+ movdqu xmm7,XMMWORD[80+rdi]
+ movdqu xmm8,XMMWORD[96+rdi]
+ movdqu xmm9,XMMWORD[112+rdi]
+ lea rdi,[128+rdi]
+ sub rdx,0x80
+ jmp NEAR $L$ecb_enc_loop8_enter
+ALIGN 16
+$L$ecb_enc_loop8:
+ movups XMMWORD[rsi],xmm2
+ mov rcx,r11
+ movdqu xmm2,XMMWORD[rdi]
+ mov eax,r10d
+ movups XMMWORD[16+rsi],xmm3
+ movdqu xmm3,XMMWORD[16+rdi]
+ movups XMMWORD[32+rsi],xmm4
+ movdqu xmm4,XMMWORD[32+rdi]
+ movups XMMWORD[48+rsi],xmm5
+ movdqu xmm5,XMMWORD[48+rdi]
+ movups XMMWORD[64+rsi],xmm6
+ movdqu xmm6,XMMWORD[64+rdi]
+ movups XMMWORD[80+rsi],xmm7
+ movdqu xmm7,XMMWORD[80+rdi]
+ movups XMMWORD[96+rsi],xmm8
+ movdqu xmm8,XMMWORD[96+rdi]
+ movups XMMWORD[112+rsi],xmm9
+ lea rsi,[128+rsi]
+ movdqu xmm9,XMMWORD[112+rdi]
+ lea rdi,[128+rdi]
+$L$ecb_enc_loop8_enter:
+
+ call _aesni_encrypt8
+
+ sub rdx,0x80
+ jnc NEAR $L$ecb_enc_loop8
+
+ movups XMMWORD[rsi],xmm2
+ mov rcx,r11
+ movups XMMWORD[16+rsi],xmm3
+ mov eax,r10d
+ movups XMMWORD[32+rsi],xmm4
+ movups XMMWORD[48+rsi],xmm5
+ movups XMMWORD[64+rsi],xmm6
+ movups XMMWORD[80+rsi],xmm7
+ movups XMMWORD[96+rsi],xmm8
+ movups XMMWORD[112+rsi],xmm9
+ lea rsi,[128+rsi]
+ add rdx,0x80
+ jz NEAR $L$ecb_ret
+
+$L$ecb_enc_tail:
+ movups xmm2,XMMWORD[rdi]
+ cmp rdx,0x20
+ jb NEAR $L$ecb_enc_one
+ movups xmm3,XMMWORD[16+rdi]
+ je NEAR $L$ecb_enc_two
+ movups xmm4,XMMWORD[32+rdi]
+ cmp rdx,0x40
+ jb NEAR $L$ecb_enc_three
+ movups xmm5,XMMWORD[48+rdi]
+ je NEAR $L$ecb_enc_four
+ movups xmm6,XMMWORD[64+rdi]
+ cmp rdx,0x60
+ jb NEAR $L$ecb_enc_five
+ movups xmm7,XMMWORD[80+rdi]
+ je NEAR $L$ecb_enc_six
+ movdqu xmm8,XMMWORD[96+rdi]
+ xorps xmm9,xmm9
+ call _aesni_encrypt8
+ movups XMMWORD[rsi],xmm2
+ movups XMMWORD[16+rsi],xmm3
+ movups XMMWORD[32+rsi],xmm4
+ movups XMMWORD[48+rsi],xmm5
+ movups XMMWORD[64+rsi],xmm6
+ movups XMMWORD[80+rsi],xmm7
+ movups XMMWORD[96+rsi],xmm8
+ jmp NEAR $L$ecb_ret
+ALIGN 16
+$L$ecb_enc_one:
+ movups xmm0,XMMWORD[rcx]
+ movups xmm1,XMMWORD[16+rcx]
+ lea rcx,[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_enc1_3:
+DB 102,15,56,220,209
+ dec eax
+ movups xmm1,XMMWORD[rcx]
+ lea rcx,[16+rcx]
+ jnz NEAR $L$oop_enc1_3
+DB 102,15,56,221,209
+ movups XMMWORD[rsi],xmm2
+ jmp NEAR $L$ecb_ret
+ALIGN 16
+$L$ecb_enc_two:
+ call _aesni_encrypt2
+ movups XMMWORD[rsi],xmm2
+ movups XMMWORD[16+rsi],xmm3
+ jmp NEAR $L$ecb_ret
+ALIGN 16
+$L$ecb_enc_three:
+ call _aesni_encrypt3
+ movups XMMWORD[rsi],xmm2
+ movups XMMWORD[16+rsi],xmm3
+ movups XMMWORD[32+rsi],xmm4
+ jmp NEAR $L$ecb_ret
+ALIGN 16
+$L$ecb_enc_four:
+ call _aesni_encrypt4
+ movups XMMWORD[rsi],xmm2
+ movups XMMWORD[16+rsi],xmm3
+ movups XMMWORD[32+rsi],xmm4
+ movups XMMWORD[48+rsi],xmm5
+ jmp NEAR $L$ecb_ret
+ALIGN 16
+$L$ecb_enc_five:
+ xorps xmm7,xmm7
+ call _aesni_encrypt6
+ movups XMMWORD[rsi],xmm2
+ movups XMMWORD[16+rsi],xmm3
+ movups XMMWORD[32+rsi],xmm4
+ movups XMMWORD[48+rsi],xmm5
+ movups XMMWORD[64+rsi],xmm6
+ jmp NEAR $L$ecb_ret
+ALIGN 16
+$L$ecb_enc_six:
+ call _aesni_encrypt6
+ movups XMMWORD[rsi],xmm2
+ movups XMMWORD[16+rsi],xmm3
+ movups XMMWORD[32+rsi],xmm4
+ movups XMMWORD[48+rsi],xmm5
+ movups XMMWORD[64+rsi],xmm6
+ movups XMMWORD[80+rsi],xmm7
+ jmp NEAR $L$ecb_ret
+
+ALIGN 16
+$L$ecb_decrypt:
+ cmp rdx,0x80
+ jb NEAR $L$ecb_dec_tail
+
+ movdqu xmm2,XMMWORD[rdi]
+ movdqu xmm3,XMMWORD[16+rdi]
+ movdqu xmm4,XMMWORD[32+rdi]
+ movdqu xmm5,XMMWORD[48+rdi]
+ movdqu xmm6,XMMWORD[64+rdi]
+ movdqu xmm7,XMMWORD[80+rdi]
+ movdqu xmm8,XMMWORD[96+rdi]
+ movdqu xmm9,XMMWORD[112+rdi]
+ lea rdi,[128+rdi]
+ sub rdx,0x80
+ jmp NEAR $L$ecb_dec_loop8_enter
+ALIGN 16
+$L$ecb_dec_loop8:
+ movups XMMWORD[rsi],xmm2
+ mov rcx,r11
+ movdqu xmm2,XMMWORD[rdi]
+ mov eax,r10d
+ movups XMMWORD[16+rsi],xmm3
+ movdqu xmm3,XMMWORD[16+rdi]
+ movups XMMWORD[32+rsi],xmm4
+ movdqu xmm4,XMMWORD[32+rdi]
+ movups XMMWORD[48+rsi],xmm5
+ movdqu xmm5,XMMWORD[48+rdi]
+ movups XMMWORD[64+rsi],xmm6
+ movdqu xmm6,XMMWORD[64+rdi]
+ movups XMMWORD[80+rsi],xmm7
+ movdqu xmm7,XMMWORD[80+rdi]
+ movups XMMWORD[96+rsi],xmm8
+ movdqu xmm8,XMMWORD[96+rdi]
+ movups XMMWORD[112+rsi],xmm9
+ lea rsi,[128+rsi]
+ movdqu xmm9,XMMWORD[112+rdi]
+ lea rdi,[128+rdi]
+$L$ecb_dec_loop8_enter:
+
+ call _aesni_decrypt8
+
+ movups xmm0,XMMWORD[r11]
+ sub rdx,0x80
+ jnc NEAR $L$ecb_dec_loop8
+
+ movups XMMWORD[rsi],xmm2
+ pxor xmm2,xmm2
+ mov rcx,r11
+ movups XMMWORD[16+rsi],xmm3
+ pxor xmm3,xmm3
+ mov eax,r10d
+ movups XMMWORD[32+rsi],xmm4
+ pxor xmm4,xmm4
+ movups XMMWORD[48+rsi],xmm5
+ pxor xmm5,xmm5
+ movups XMMWORD[64+rsi],xmm6
+ pxor xmm6,xmm6
+ movups XMMWORD[80+rsi],xmm7
+ pxor xmm7,xmm7
+ movups XMMWORD[96+rsi],xmm8
+ pxor xmm8,xmm8
+ movups XMMWORD[112+rsi],xmm9
+ pxor xmm9,xmm9
+ lea rsi,[128+rsi]
+ add rdx,0x80
+ jz NEAR $L$ecb_ret
+
+$L$ecb_dec_tail:
+ movups xmm2,XMMWORD[rdi]
+ cmp rdx,0x20
+ jb NEAR $L$ecb_dec_one
+ movups xmm3,XMMWORD[16+rdi]
+ je NEAR $L$ecb_dec_two
+ movups xmm4,XMMWORD[32+rdi]
+ cmp rdx,0x40
+ jb NEAR $L$ecb_dec_three
+ movups xmm5,XMMWORD[48+rdi]
+ je NEAR $L$ecb_dec_four
+ movups xmm6,XMMWORD[64+rdi]
+ cmp rdx,0x60
+ jb NEAR $L$ecb_dec_five
+ movups xmm7,XMMWORD[80+rdi]
+ je NEAR $L$ecb_dec_six
+ movups xmm8,XMMWORD[96+rdi]
+ movups xmm0,XMMWORD[rcx]
+ xorps xmm9,xmm9
+ call _aesni_decrypt8
+ movups XMMWORD[rsi],xmm2
+ pxor xmm2,xmm2
+ movups XMMWORD[16+rsi],xmm3
+ pxor xmm3,xmm3
+ movups XMMWORD[32+rsi],xmm4
+ pxor xmm4,xmm4
+ movups XMMWORD[48+rsi],xmm5
+ pxor xmm5,xmm5
+ movups XMMWORD[64+rsi],xmm6
+ pxor xmm6,xmm6
+ movups XMMWORD[80+rsi],xmm7
+ pxor xmm7,xmm7
+ movups XMMWORD[96+rsi],xmm8
+ pxor xmm8,xmm8
+ pxor xmm9,xmm9
+ jmp NEAR $L$ecb_ret
+ALIGN 16
+$L$ecb_dec_one:
+ movups xmm0,XMMWORD[rcx]
+ movups xmm1,XMMWORD[16+rcx]
+ lea rcx,[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_dec1_4:
+DB 102,15,56,222,209
+ dec eax
+ movups xmm1,XMMWORD[rcx]
+ lea rcx,[16+rcx]
+ jnz NEAR $L$oop_dec1_4
+DB 102,15,56,223,209
+ movups XMMWORD[rsi],xmm2
+ pxor xmm2,xmm2
+ jmp NEAR $L$ecb_ret
+ALIGN 16
+$L$ecb_dec_two:
+ call _aesni_decrypt2
+ movups XMMWORD[rsi],xmm2
+ pxor xmm2,xmm2
+ movups XMMWORD[16+rsi],xmm3
+ pxor xmm3,xmm3
+ jmp NEAR $L$ecb_ret
+ALIGN 16
+$L$ecb_dec_three:
+ call _aesni_decrypt3
+ movups XMMWORD[rsi],xmm2
+ pxor xmm2,xmm2
+ movups XMMWORD[16+rsi],xmm3
+ pxor xmm3,xmm3
+ movups XMMWORD[32+rsi],xmm4
+ pxor xmm4,xmm4
+ jmp NEAR $L$ecb_ret
+ALIGN 16
+$L$ecb_dec_four:
+ call _aesni_decrypt4
+ movups XMMWORD[rsi],xmm2
+ pxor xmm2,xmm2
+ movups XMMWORD[16+rsi],xmm3
+ pxor xmm3,xmm3
+ movups XMMWORD[32+rsi],xmm4
+ pxor xmm4,xmm4
+ movups XMMWORD[48+rsi],xmm5
+ pxor xmm5,xmm5
+ jmp NEAR $L$ecb_ret
+ALIGN 16
+$L$ecb_dec_five:
+ xorps xmm7,xmm7
+ call _aesni_decrypt6
+ movups XMMWORD[rsi],xmm2
+ pxor xmm2,xmm2
+ movups XMMWORD[16+rsi],xmm3
+ pxor xmm3,xmm3
+ movups XMMWORD[32+rsi],xmm4
+ pxor xmm4,xmm4
+ movups XMMWORD[48+rsi],xmm5
+ pxor xmm5,xmm5
+ movups XMMWORD[64+rsi],xmm6
+ pxor xmm6,xmm6
+ pxor xmm7,xmm7
+ jmp NEAR $L$ecb_ret
+ALIGN 16
+$L$ecb_dec_six:
+ call _aesni_decrypt6
+ movups XMMWORD[rsi],xmm2
+ pxor xmm2,xmm2
+ movups XMMWORD[16+rsi],xmm3
+ pxor xmm3,xmm3
+ movups XMMWORD[32+rsi],xmm4
+ pxor xmm4,xmm4
+ movups XMMWORD[48+rsi],xmm5
+ pxor xmm5,xmm5
+ movups XMMWORD[64+rsi],xmm6
+ pxor xmm6,xmm6
+ movups XMMWORD[80+rsi],xmm7
+ pxor xmm7,xmm7
+
+$L$ecb_ret:
+ xorps xmm0,xmm0
+ pxor xmm1,xmm1
+ movaps xmm6,XMMWORD[rsp]
+ movaps XMMWORD[rsp],xmm0
+ movaps xmm7,XMMWORD[16+rsp]
+ movaps XMMWORD[16+rsp],xmm0
+ movaps xmm8,XMMWORD[32+rsp]
+ movaps XMMWORD[32+rsp],xmm0
+ movaps xmm9,XMMWORD[48+rsp]
+ movaps XMMWORD[48+rsp],xmm0
+ lea rsp,[88+rsp]
+$L$ecb_enc_ret:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_ecb_encrypt:
+global aesni_ccm64_encrypt_blocks
+
+ALIGN 16
+aesni_ccm64_encrypt_blocks:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_ccm64_encrypt_blocks:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD[40+rsp]
+ mov r9,QWORD[48+rsp]
+
+
+ lea rsp,[((-88))+rsp]
+ movaps XMMWORD[rsp],xmm6
+ movaps XMMWORD[16+rsp],xmm7
+ movaps XMMWORD[32+rsp],xmm8
+ movaps XMMWORD[48+rsp],xmm9
+$L$ccm64_enc_body:
+ mov eax,DWORD[240+rcx]
+ movdqu xmm6,XMMWORD[r8]
+ movdqa xmm9,XMMWORD[$L$increment64]
+ movdqa xmm7,XMMWORD[$L$bswap_mask]
+
+ shl eax,4
+ mov r10d,16
+ lea r11,[rcx]
+ movdqu xmm3,XMMWORD[r9]
+ movdqa xmm2,xmm6
+ lea rcx,[32+rax*1+rcx]
+DB 102,15,56,0,247
+ sub r10,rax
+ jmp NEAR $L$ccm64_enc_outer
+ALIGN 16
+$L$ccm64_enc_outer:
+ movups xmm0,XMMWORD[r11]
+ mov rax,r10
+ movups xmm8,XMMWORD[rdi]
+
+ xorps xmm2,xmm0
+ movups xmm1,XMMWORD[16+r11]
+ xorps xmm0,xmm8
+ xorps xmm3,xmm0
+ movups xmm0,XMMWORD[32+r11]
+
+$L$ccm64_enc2_loop:
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+ movups xmm1,XMMWORD[rax*1+rcx]
+ add rax,32
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+ movups xmm0,XMMWORD[((-16))+rax*1+rcx]
+ jnz NEAR $L$ccm64_enc2_loop
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+ paddq xmm6,xmm9
+ dec rdx
+DB 102,15,56,221,208
+DB 102,15,56,221,216
+
+ lea rdi,[16+rdi]
+ xorps xmm8,xmm2
+ movdqa xmm2,xmm6
+ movups XMMWORD[rsi],xmm8
+DB 102,15,56,0,215
+ lea rsi,[16+rsi]
+ jnz NEAR $L$ccm64_enc_outer
+
+ pxor xmm0,xmm0
+ pxor xmm1,xmm1
+ pxor xmm2,xmm2
+ movups XMMWORD[r9],xmm3
+ pxor xmm3,xmm3
+ pxor xmm8,xmm8
+ pxor xmm6,xmm6
+ movaps xmm6,XMMWORD[rsp]
+ movaps XMMWORD[rsp],xmm0
+ movaps xmm7,XMMWORD[16+rsp]
+ movaps XMMWORD[16+rsp],xmm0
+ movaps xmm8,XMMWORD[32+rsp]
+ movaps XMMWORD[32+rsp],xmm0
+ movaps xmm9,XMMWORD[48+rsp]
+ movaps XMMWORD[48+rsp],xmm0
+ lea rsp,[88+rsp]
+$L$ccm64_enc_ret:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_ccm64_encrypt_blocks:
+global aesni_ccm64_decrypt_blocks
+
+ALIGN 16
+aesni_ccm64_decrypt_blocks:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_ccm64_decrypt_blocks:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD[40+rsp]
+ mov r9,QWORD[48+rsp]
+
+
+ lea rsp,[((-88))+rsp]
+ movaps XMMWORD[rsp],xmm6
+ movaps XMMWORD[16+rsp],xmm7
+ movaps XMMWORD[32+rsp],xmm8
+ movaps XMMWORD[48+rsp],xmm9
+$L$ccm64_dec_body:
+ mov eax,DWORD[240+rcx]
+ movups xmm6,XMMWORD[r8]
+ movdqu xmm3,XMMWORD[r9]
+ movdqa xmm9,XMMWORD[$L$increment64]
+ movdqa xmm7,XMMWORD[$L$bswap_mask]
+
+ movaps xmm2,xmm6
+ mov r10d,eax
+ mov r11,rcx
+DB 102,15,56,0,247
+ movups xmm0,XMMWORD[rcx]
+ movups xmm1,XMMWORD[16+rcx]
+ lea rcx,[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_enc1_5:
+DB 102,15,56,220,209
+ dec eax
+ movups xmm1,XMMWORD[rcx]
+ lea rcx,[16+rcx]
+ jnz NEAR $L$oop_enc1_5
+DB 102,15,56,221,209
+ shl r10d,4
+ mov eax,16
+ movups xmm8,XMMWORD[rdi]
+ paddq xmm6,xmm9
+ lea rdi,[16+rdi]
+ sub rax,r10
+ lea rcx,[32+r10*1+r11]
+ mov r10,rax
+ jmp NEAR $L$ccm64_dec_outer
+ALIGN 16
+$L$ccm64_dec_outer:
+ xorps xmm8,xmm2
+ movdqa xmm2,xmm6
+ movups XMMWORD[rsi],xmm8
+ lea rsi,[16+rsi]
+DB 102,15,56,0,215
+
+ sub rdx,1
+ jz NEAR $L$ccm64_dec_break
+
+ movups xmm0,XMMWORD[r11]
+ mov rax,r10
+ movups xmm1,XMMWORD[16+r11]
+ xorps xmm8,xmm0
+ xorps xmm2,xmm0
+ xorps xmm3,xmm8
+ movups xmm0,XMMWORD[32+r11]
+ jmp NEAR $L$ccm64_dec2_loop
+ALIGN 16
+$L$ccm64_dec2_loop:
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+ movups xmm1,XMMWORD[rax*1+rcx]
+ add rax,32
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+ movups xmm0,XMMWORD[((-16))+rax*1+rcx]
+ jnz NEAR $L$ccm64_dec2_loop
+ movups xmm8,XMMWORD[rdi]
+ paddq xmm6,xmm9
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,221,208
+DB 102,15,56,221,216
+ lea rdi,[16+rdi]
+ jmp NEAR $L$ccm64_dec_outer
+
+ALIGN 16
+$L$ccm64_dec_break:
+
+ mov eax,DWORD[240+r11]
+ movups xmm0,XMMWORD[r11]
+ movups xmm1,XMMWORD[16+r11]
+ xorps xmm8,xmm0
+ lea r11,[32+r11]
+ xorps xmm3,xmm8
+$L$oop_enc1_6:
+DB 102,15,56,220,217
+ dec eax
+ movups xmm1,XMMWORD[r11]
+ lea r11,[16+r11]
+ jnz NEAR $L$oop_enc1_6
+DB 102,15,56,221,217
+ pxor xmm0,xmm0
+ pxor xmm1,xmm1
+ pxor xmm2,xmm2
+ movups XMMWORD[r9],xmm3
+ pxor xmm3,xmm3
+ pxor xmm8,xmm8
+ pxor xmm6,xmm6
+ movaps xmm6,XMMWORD[rsp]
+ movaps XMMWORD[rsp],xmm0
+ movaps xmm7,XMMWORD[16+rsp]
+ movaps XMMWORD[16+rsp],xmm0
+ movaps xmm8,XMMWORD[32+rsp]
+ movaps XMMWORD[32+rsp],xmm0
+ movaps xmm9,XMMWORD[48+rsp]
+ movaps XMMWORD[48+rsp],xmm0
+ lea rsp,[88+rsp]
+$L$ccm64_dec_ret:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_ccm64_decrypt_blocks:
+global aesni_ctr32_encrypt_blocks
+
+ALIGN 16
+aesni_ctr32_encrypt_blocks:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_ctr32_encrypt_blocks:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD[40+rsp]
+
+
+ cmp rdx,1
+ jne NEAR $L$ctr32_bulk
+
+
+
+ movups xmm2,XMMWORD[r8]
+ movups xmm3,XMMWORD[rdi]
+ mov edx,DWORD[240+rcx]
+ movups xmm0,XMMWORD[rcx]
+ movups xmm1,XMMWORD[16+rcx]
+ lea rcx,[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_enc1_7:
+DB 102,15,56,220,209
+ dec edx
+ movups xmm1,XMMWORD[rcx]
+ lea rcx,[16+rcx]
+ jnz NEAR $L$oop_enc1_7
+DB 102,15,56,221,209
+ pxor xmm0,xmm0
+ pxor xmm1,xmm1
+ xorps xmm2,xmm3
+ pxor xmm3,xmm3
+ movups XMMWORD[rsi],xmm2
+ xorps xmm2,xmm2
+ jmp NEAR $L$ctr32_epilogue
+
+ALIGN 16
+$L$ctr32_bulk:
+ lea rax,[rsp]
+ push rbp
+ sub rsp,288
+ and rsp,-16
+ movaps XMMWORD[(-168)+rax],xmm6
+ movaps XMMWORD[(-152)+rax],xmm7
+ movaps XMMWORD[(-136)+rax],xmm8
+ movaps XMMWORD[(-120)+rax],xmm9
+ movaps XMMWORD[(-104)+rax],xmm10
+ movaps XMMWORD[(-88)+rax],xmm11
+ movaps XMMWORD[(-72)+rax],xmm12
+ movaps XMMWORD[(-56)+rax],xmm13
+ movaps XMMWORD[(-40)+rax],xmm14
+ movaps XMMWORD[(-24)+rax],xmm15
+$L$ctr32_body:
+ lea rbp,[((-8))+rax]
+
+
+
+
+ movdqu xmm2,XMMWORD[r8]
+ movdqu xmm0,XMMWORD[rcx]
+ mov r8d,DWORD[12+r8]
+ pxor xmm2,xmm0
+ mov r11d,DWORD[12+rcx]
+ movdqa XMMWORD[rsp],xmm2
+ bswap r8d
+ movdqa xmm3,xmm2
+ movdqa xmm4,xmm2
+ movdqa xmm5,xmm2
+ movdqa XMMWORD[64+rsp],xmm2
+ movdqa XMMWORD[80+rsp],xmm2
+ movdqa XMMWORD[96+rsp],xmm2
+ mov r10,rdx
+ movdqa XMMWORD[112+rsp],xmm2
+
+ lea rax,[1+r8]
+ lea rdx,[2+r8]
+ bswap eax
+ bswap edx
+ xor eax,r11d
+ xor edx,r11d
+DB 102,15,58,34,216,3
+ lea rax,[3+r8]
+ movdqa XMMWORD[16+rsp],xmm3
+DB 102,15,58,34,226,3
+ bswap eax
+ mov rdx,r10
+ lea r10,[4+r8]
+ movdqa XMMWORD[32+rsp],xmm4
+ xor eax,r11d
+ bswap r10d
+DB 102,15,58,34,232,3
+ xor r10d,r11d
+ movdqa XMMWORD[48+rsp],xmm5
+ lea r9,[5+r8]
+ mov DWORD[((64+12))+rsp],r10d
+ bswap r9d
+ lea r10,[6+r8]
+ mov eax,DWORD[240+rcx]
+ xor r9d,r11d
+ bswap r10d
+ mov DWORD[((80+12))+rsp],r9d
+ xor r10d,r11d
+ lea r9,[7+r8]
+ mov DWORD[((96+12))+rsp],r10d
+ bswap r9d
+ mov r10d,DWORD[((OPENSSL_ia32cap_P+4))]
+ xor r9d,r11d
+ and r10d,71303168
+ mov DWORD[((112+12))+rsp],r9d
+
+ movups xmm1,XMMWORD[16+rcx]
+
+ movdqa xmm6,XMMWORD[64+rsp]
+ movdqa xmm7,XMMWORD[80+rsp]
+
+ cmp rdx,8
+ jb NEAR $L$ctr32_tail
+
+ sub rdx,6
+ cmp r10d,4194304
+ je NEAR $L$ctr32_6x
+
+ lea rcx,[128+rcx]
+ sub rdx,2
+ jmp NEAR $L$ctr32_loop8
+
+ALIGN 16
+$L$ctr32_6x:
+ shl eax,4
+ mov r10d,48
+ bswap r11d
+ lea rcx,[32+rax*1+rcx]
+ sub r10,rax
+ jmp NEAR $L$ctr32_loop6
+
+ALIGN 16
+$L$ctr32_loop6:
+ add r8d,6
+ movups xmm0,XMMWORD[((-48))+r10*1+rcx]
+DB 102,15,56,220,209
+ mov eax,r8d
+ xor eax,r11d
+DB 102,15,56,220,217
+DB 0x0f,0x38,0xf1,0x44,0x24,12
+ lea eax,[1+r8]
+DB 102,15,56,220,225
+ xor eax,r11d
+DB 0x0f,0x38,0xf1,0x44,0x24,28
+DB 102,15,56,220,233
+ lea eax,[2+r8]
+ xor eax,r11d
+DB 102,15,56,220,241
+DB 0x0f,0x38,0xf1,0x44,0x24,44
+ lea eax,[3+r8]
+DB 102,15,56,220,249
+ movups xmm1,XMMWORD[((-32))+r10*1+rcx]
+ xor eax,r11d
+
+DB 102,15,56,220,208
+DB 0x0f,0x38,0xf1,0x44,0x24,60
+ lea eax,[4+r8]
+DB 102,15,56,220,216
+ xor eax,r11d
+DB 0x0f,0x38,0xf1,0x44,0x24,76
+DB 102,15,56,220,224
+ lea eax,[5+r8]
+ xor eax,r11d
+DB 102,15,56,220,232
+DB 0x0f,0x38,0xf1,0x44,0x24,92
+ mov rax,r10
+DB 102,15,56,220,240
+DB 102,15,56,220,248
+ movups xmm0,XMMWORD[((-16))+r10*1+rcx]
+
+ call $L$enc_loop6
+
+ movdqu xmm8,XMMWORD[rdi]
+ movdqu xmm9,XMMWORD[16+rdi]
+ movdqu xmm10,XMMWORD[32+rdi]
+ movdqu xmm11,XMMWORD[48+rdi]
+ movdqu xmm12,XMMWORD[64+rdi]
+ movdqu xmm13,XMMWORD[80+rdi]
+ lea rdi,[96+rdi]
+ movups xmm1,XMMWORD[((-64))+r10*1+rcx]
+ pxor xmm8,xmm2
+ movaps xmm2,XMMWORD[rsp]
+ pxor xmm9,xmm3
+ movaps xmm3,XMMWORD[16+rsp]
+ pxor xmm10,xmm4
+ movaps xmm4,XMMWORD[32+rsp]
+ pxor xmm11,xmm5
+ movaps xmm5,XMMWORD[48+rsp]
+ pxor xmm12,xmm6
+ movaps xmm6,XMMWORD[64+rsp]
+ pxor xmm13,xmm7
+ movaps xmm7,XMMWORD[80+rsp]
+ movdqu XMMWORD[rsi],xmm8
+ movdqu XMMWORD[16+rsi],xmm9
+ movdqu XMMWORD[32+rsi],xmm10
+ movdqu XMMWORD[48+rsi],xmm11
+ movdqu XMMWORD[64+rsi],xmm12
+ movdqu XMMWORD[80+rsi],xmm13
+ lea rsi,[96+rsi]
+
+ sub rdx,6
+ jnc NEAR $L$ctr32_loop6
+
+ add rdx,6
+ jz NEAR $L$ctr32_done
+
+ lea eax,[((-48))+r10]
+ lea rcx,[((-80))+r10*1+rcx]
+ neg eax
+ shr eax,4
+ jmp NEAR $L$ctr32_tail
+
+ALIGN 32
+$L$ctr32_loop8:
+ add r8d,8
+ movdqa xmm8,XMMWORD[96+rsp]
+DB 102,15,56,220,209
+ mov r9d,r8d
+ movdqa xmm9,XMMWORD[112+rsp]
+DB 102,15,56,220,217
+ bswap r9d
+ movups xmm0,XMMWORD[((32-128))+rcx]
+DB 102,15,56,220,225
+ xor r9d,r11d
+ nop
+DB 102,15,56,220,233
+ mov DWORD[((0+12))+rsp],r9d
+ lea r9,[1+r8]
+DB 102,15,56,220,241
+DB 102,15,56,220,249
+DB 102,68,15,56,220,193
+DB 102,68,15,56,220,201
+ movups xmm1,XMMWORD[((48-128))+rcx]
+ bswap r9d
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+ xor r9d,r11d
+DB 0x66,0x90
+DB 102,15,56,220,224
+DB 102,15,56,220,232
+ mov DWORD[((16+12))+rsp],r9d
+ lea r9,[2+r8]
+DB 102,15,56,220,240
+DB 102,15,56,220,248
+DB 102,68,15,56,220,192
+DB 102,68,15,56,220,200
+ movups xmm0,XMMWORD[((64-128))+rcx]
+ bswap r9d
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+ xor r9d,r11d
+DB 0x66,0x90
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+ mov DWORD[((32+12))+rsp],r9d
+ lea r9,[3+r8]
+DB 102,15,56,220,241
+DB 102,15,56,220,249
+DB 102,68,15,56,220,193
+DB 102,68,15,56,220,201
+ movups xmm1,XMMWORD[((80-128))+rcx]
+ bswap r9d
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+ xor r9d,r11d
+DB 0x66,0x90
+DB 102,15,56,220,224
+DB 102,15,56,220,232
+ mov DWORD[((48+12))+rsp],r9d
+ lea r9,[4+r8]
+DB 102,15,56,220,240
+DB 102,15,56,220,248
+DB 102,68,15,56,220,192
+DB 102,68,15,56,220,200
+ movups xmm0,XMMWORD[((96-128))+rcx]
+ bswap r9d
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+ xor r9d,r11d
+DB 0x66,0x90
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+ mov DWORD[((64+12))+rsp],r9d
+ lea r9,[5+r8]
+DB 102,15,56,220,241
+DB 102,15,56,220,249
+DB 102,68,15,56,220,193
+DB 102,68,15,56,220,201
+ movups xmm1,XMMWORD[((112-128))+rcx]
+ bswap r9d
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+ xor r9d,r11d
+DB 0x66,0x90
+DB 102,15,56,220,224
+DB 102,15,56,220,232
+ mov DWORD[((80+12))+rsp],r9d
+ lea r9,[6+r8]
+DB 102,15,56,220,240
+DB 102,15,56,220,248
+DB 102,68,15,56,220,192
+DB 102,68,15,56,220,200
+ movups xmm0,XMMWORD[((128-128))+rcx]
+ bswap r9d
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+ xor r9d,r11d
+DB 0x66,0x90
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+ mov DWORD[((96+12))+rsp],r9d
+ lea r9,[7+r8]
+DB 102,15,56,220,241
+DB 102,15,56,220,249
+DB 102,68,15,56,220,193
+DB 102,68,15,56,220,201
+ movups xmm1,XMMWORD[((144-128))+rcx]
+ bswap r9d
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+DB 102,15,56,220,224
+ xor r9d,r11d
+ movdqu xmm10,XMMWORD[rdi]
+DB 102,15,56,220,232
+ mov DWORD[((112+12))+rsp],r9d
+ cmp eax,11
+DB 102,15,56,220,240
+DB 102,15,56,220,248
+DB 102,68,15,56,220,192
+DB 102,68,15,56,220,200
+ movups xmm0,XMMWORD[((160-128))+rcx]
+
+ jb NEAR $L$ctr32_enc_done
+
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+DB 102,15,56,220,241
+DB 102,15,56,220,249
+DB 102,68,15,56,220,193
+DB 102,68,15,56,220,201
+ movups xmm1,XMMWORD[((176-128))+rcx]
+
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+DB 102,15,56,220,224
+DB 102,15,56,220,232
+DB 102,15,56,220,240
+DB 102,15,56,220,248
+DB 102,68,15,56,220,192
+DB 102,68,15,56,220,200
+ movups xmm0,XMMWORD[((192-128))+rcx]
+ je NEAR $L$ctr32_enc_done
+
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+DB 102,15,56,220,241
+DB 102,15,56,220,249
+DB 102,68,15,56,220,193
+DB 102,68,15,56,220,201
+ movups xmm1,XMMWORD[((208-128))+rcx]
+
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+DB 102,15,56,220,224
+DB 102,15,56,220,232
+DB 102,15,56,220,240
+DB 102,15,56,220,248
+DB 102,68,15,56,220,192
+DB 102,68,15,56,220,200
+ movups xmm0,XMMWORD[((224-128))+rcx]
+ jmp NEAR $L$ctr32_enc_done
+
+ALIGN 16
+$L$ctr32_enc_done:
+ movdqu xmm11,XMMWORD[16+rdi]
+ pxor xmm10,xmm0
+ movdqu xmm12,XMMWORD[32+rdi]
+ pxor xmm11,xmm0
+ movdqu xmm13,XMMWORD[48+rdi]
+ pxor xmm12,xmm0
+ movdqu xmm14,XMMWORD[64+rdi]
+ pxor xmm13,xmm0
+ movdqu xmm15,XMMWORD[80+rdi]
+ pxor xmm14,xmm0
+ pxor xmm15,xmm0
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+DB 102,15,56,220,241
+DB 102,15,56,220,249
+DB 102,68,15,56,220,193
+DB 102,68,15,56,220,201
+ movdqu xmm1,XMMWORD[96+rdi]
+ lea rdi,[128+rdi]
+
+DB 102,65,15,56,221,210
+ pxor xmm1,xmm0
+ movdqu xmm10,XMMWORD[((112-128))+rdi]
+DB 102,65,15,56,221,219
+ pxor xmm10,xmm0
+ movdqa xmm11,XMMWORD[rsp]
+DB 102,65,15,56,221,228
+DB 102,65,15,56,221,237
+ movdqa xmm12,XMMWORD[16+rsp]
+ movdqa xmm13,XMMWORD[32+rsp]
+DB 102,65,15,56,221,246
+DB 102,65,15,56,221,255
+ movdqa xmm14,XMMWORD[48+rsp]
+ movdqa xmm15,XMMWORD[64+rsp]
+DB 102,68,15,56,221,193
+ movdqa xmm0,XMMWORD[80+rsp]
+ movups xmm1,XMMWORD[((16-128))+rcx]
+DB 102,69,15,56,221,202
+
+ movups XMMWORD[rsi],xmm2
+ movdqa xmm2,xmm11
+ movups XMMWORD[16+rsi],xmm3
+ movdqa xmm3,xmm12
+ movups XMMWORD[32+rsi],xmm4
+ movdqa xmm4,xmm13
+ movups XMMWORD[48+rsi],xmm5
+ movdqa xmm5,xmm14
+ movups XMMWORD[64+rsi],xmm6
+ movdqa xmm6,xmm15
+ movups XMMWORD[80+rsi],xmm7
+ movdqa xmm7,xmm0
+ movups XMMWORD[96+rsi],xmm8
+ movups XMMWORD[112+rsi],xmm9
+ lea rsi,[128+rsi]
+
+ sub rdx,8
+ jnc NEAR $L$ctr32_loop8
+
+ add rdx,8
+ jz NEAR $L$ctr32_done
+ lea rcx,[((-128))+rcx]
+
+$L$ctr32_tail:
+
+
+ lea rcx,[16+rcx]
+ cmp rdx,4
+ jb NEAR $L$ctr32_loop3
+ je NEAR $L$ctr32_loop4
+
+
+ shl eax,4
+ movdqa xmm8,XMMWORD[96+rsp]
+ pxor xmm9,xmm9
+
+ movups xmm0,XMMWORD[16+rcx]
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+ lea rcx,[((32-16))+rax*1+rcx]
+ neg rax
+DB 102,15,56,220,225
+ add rax,16
+ movups xmm10,XMMWORD[rdi]
+DB 102,15,56,220,233
+DB 102,15,56,220,241
+ movups xmm11,XMMWORD[16+rdi]
+ movups xmm12,XMMWORD[32+rdi]
+DB 102,15,56,220,249
+DB 102,68,15,56,220,193
+
+ call $L$enc_loop8_enter
+
+ movdqu xmm13,XMMWORD[48+rdi]
+ pxor xmm2,xmm10
+ movdqu xmm10,XMMWORD[64+rdi]
+ pxor xmm3,xmm11
+ movdqu XMMWORD[rsi],xmm2
+ pxor xmm4,xmm12
+ movdqu XMMWORD[16+rsi],xmm3
+ pxor xmm5,xmm13
+ movdqu XMMWORD[32+rsi],xmm4
+ pxor xmm6,xmm10
+ movdqu XMMWORD[48+rsi],xmm5
+ movdqu XMMWORD[64+rsi],xmm6
+ cmp rdx,6
+ jb NEAR $L$ctr32_done
+
+ movups xmm11,XMMWORD[80+rdi]
+ xorps xmm7,xmm11
+ movups XMMWORD[80+rsi],xmm7
+ je NEAR $L$ctr32_done
+
+ movups xmm12,XMMWORD[96+rdi]
+ xorps xmm8,xmm12
+ movups XMMWORD[96+rsi],xmm8
+ jmp NEAR $L$ctr32_done
+
+ALIGN 32
+$L$ctr32_loop4:
+DB 102,15,56,220,209
+ lea rcx,[16+rcx]
+ dec eax
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+ movups xmm1,XMMWORD[rcx]
+ jnz NEAR $L$ctr32_loop4
+DB 102,15,56,221,209
+DB 102,15,56,221,217
+ movups xmm10,XMMWORD[rdi]
+ movups xmm11,XMMWORD[16+rdi]
+DB 102,15,56,221,225
+DB 102,15,56,221,233
+ movups xmm12,XMMWORD[32+rdi]
+ movups xmm13,XMMWORD[48+rdi]
+
+ xorps xmm2,xmm10
+ movups XMMWORD[rsi],xmm2
+ xorps xmm3,xmm11
+ movups XMMWORD[16+rsi],xmm3
+ pxor xmm4,xmm12
+ movdqu XMMWORD[32+rsi],xmm4
+ pxor xmm5,xmm13
+ movdqu XMMWORD[48+rsi],xmm5
+ jmp NEAR $L$ctr32_done
+
+ALIGN 32
+$L$ctr32_loop3:
+DB 102,15,56,220,209
+ lea rcx,[16+rcx]
+ dec eax
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+ movups xmm1,XMMWORD[rcx]
+ jnz NEAR $L$ctr32_loop3
+DB 102,15,56,221,209
+DB 102,15,56,221,217
+DB 102,15,56,221,225
+
+ movups xmm10,XMMWORD[rdi]
+ xorps xmm2,xmm10
+ movups XMMWORD[rsi],xmm2
+ cmp rdx,2
+ jb NEAR $L$ctr32_done
+
+ movups xmm11,XMMWORD[16+rdi]
+ xorps xmm3,xmm11
+ movups XMMWORD[16+rsi],xmm3
+ je NEAR $L$ctr32_done
+
+ movups xmm12,XMMWORD[32+rdi]
+ xorps xmm4,xmm12
+ movups XMMWORD[32+rsi],xmm4
+
+$L$ctr32_done:
+ xorps xmm0,xmm0
+ xor r11d,r11d
+ pxor xmm1,xmm1
+ pxor xmm2,xmm2
+ pxor xmm3,xmm3
+ pxor xmm4,xmm4
+ pxor xmm5,xmm5
+ movaps xmm6,XMMWORD[((-160))+rbp]
+ movaps XMMWORD[(-160)+rbp],xmm0
+ movaps xmm7,XMMWORD[((-144))+rbp]
+ movaps XMMWORD[(-144)+rbp],xmm0
+ movaps xmm8,XMMWORD[((-128))+rbp]
+ movaps XMMWORD[(-128)+rbp],xmm0
+ movaps xmm9,XMMWORD[((-112))+rbp]
+ movaps XMMWORD[(-112)+rbp],xmm0
+ movaps xmm10,XMMWORD[((-96))+rbp]
+ movaps XMMWORD[(-96)+rbp],xmm0
+ movaps xmm11,XMMWORD[((-80))+rbp]
+ movaps XMMWORD[(-80)+rbp],xmm0
+ movaps xmm12,XMMWORD[((-64))+rbp]
+ movaps XMMWORD[(-64)+rbp],xmm0
+ movaps xmm13,XMMWORD[((-48))+rbp]
+ movaps XMMWORD[(-48)+rbp],xmm0
+ movaps xmm14,XMMWORD[((-32))+rbp]
+ movaps XMMWORD[(-32)+rbp],xmm0
+ movaps xmm15,XMMWORD[((-16))+rbp]
+ movaps XMMWORD[(-16)+rbp],xmm0
+ movaps XMMWORD[rsp],xmm0
+ movaps XMMWORD[16+rsp],xmm0
+ movaps XMMWORD[32+rsp],xmm0
+ movaps XMMWORD[48+rsp],xmm0
+ movaps XMMWORD[64+rsp],xmm0
+ movaps XMMWORD[80+rsp],xmm0
+ movaps XMMWORD[96+rsp],xmm0
+ movaps XMMWORD[112+rsp],xmm0
+ lea rsp,[rbp]
+ pop rbp
+$L$ctr32_epilogue:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_ctr32_encrypt_blocks:
+global aesni_xts_encrypt
+
+ALIGN 16
+aesni_xts_encrypt:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_xts_encrypt:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD[40+rsp]
+ mov r9,QWORD[48+rsp]
+
+
+ lea rax,[rsp]
+ push rbp
+ sub rsp,272
+ and rsp,-16
+ movaps XMMWORD[(-168)+rax],xmm6
+ movaps XMMWORD[(-152)+rax],xmm7
+ movaps XMMWORD[(-136)+rax],xmm8
+ movaps XMMWORD[(-120)+rax],xmm9
+ movaps XMMWORD[(-104)+rax],xmm10
+ movaps XMMWORD[(-88)+rax],xmm11
+ movaps XMMWORD[(-72)+rax],xmm12
+ movaps XMMWORD[(-56)+rax],xmm13
+ movaps XMMWORD[(-40)+rax],xmm14
+ movaps XMMWORD[(-24)+rax],xmm15
+$L$xts_enc_body:
+ lea rbp,[((-8))+rax]
+ movups xmm2,XMMWORD[r9]
+ mov eax,DWORD[240+r8]
+ mov r10d,DWORD[240+rcx]
+ movups xmm0,XMMWORD[r8]
+ movups xmm1,XMMWORD[16+r8]
+ lea r8,[32+r8]
+ xorps xmm2,xmm0
+$L$oop_enc1_8:
+DB 102,15,56,220,209
+ dec eax
+ movups xmm1,XMMWORD[r8]
+ lea r8,[16+r8]
+ jnz NEAR $L$oop_enc1_8
+DB 102,15,56,221,209
+ movups xmm0,XMMWORD[rcx]
+ mov r11,rcx
+ mov eax,r10d
+ shl r10d,4
+ mov r9,rdx
+ and rdx,-16
+
+ movups xmm1,XMMWORD[16+r10*1+rcx]
+
+ movdqa xmm8,XMMWORD[$L$xts_magic]
+ movdqa xmm15,xmm2
+ pshufd xmm9,xmm2,0x5f
+ pxor xmm1,xmm0
+ movdqa xmm14,xmm9
+ paddd xmm9,xmm9
+ movdqa xmm10,xmm15
+ psrad xmm14,31
+ paddq xmm15,xmm15
+ pand xmm14,xmm8
+ pxor xmm10,xmm0
+ pxor xmm15,xmm14
+ movdqa xmm14,xmm9
+ paddd xmm9,xmm9
+ movdqa xmm11,xmm15
+ psrad xmm14,31
+ paddq xmm15,xmm15
+ pand xmm14,xmm8
+ pxor xmm11,xmm0
+ pxor xmm15,xmm14
+ movdqa xmm14,xmm9
+ paddd xmm9,xmm9
+ movdqa xmm12,xmm15
+ psrad xmm14,31
+ paddq xmm15,xmm15
+ pand xmm14,xmm8
+ pxor xmm12,xmm0
+ pxor xmm15,xmm14
+ movdqa xmm14,xmm9
+ paddd xmm9,xmm9
+ movdqa xmm13,xmm15
+ psrad xmm14,31
+ paddq xmm15,xmm15
+ pand xmm14,xmm8
+ pxor xmm13,xmm0
+ pxor xmm15,xmm14
+ movdqa xmm14,xmm15
+ psrad xmm9,31
+ paddq xmm15,xmm15
+ pand xmm9,xmm8
+ pxor xmm14,xmm0
+ pxor xmm15,xmm9
+ movaps XMMWORD[96+rsp],xmm1
+
+ sub rdx,16*6
+ jc NEAR $L$xts_enc_short
+
+ mov eax,16+96
+ lea rcx,[32+r10*1+r11]
+ sub rax,r10
+ movups xmm1,XMMWORD[16+r11]
+ mov r10,rax
+ lea r8,[$L$xts_magic]
+ jmp NEAR $L$xts_enc_grandloop
+
+ALIGN 32
+$L$xts_enc_grandloop:
+ movdqu xmm2,XMMWORD[rdi]
+ movdqa xmm8,xmm0
+ movdqu xmm3,XMMWORD[16+rdi]
+ pxor xmm2,xmm10
+ movdqu xmm4,XMMWORD[32+rdi]
+ pxor xmm3,xmm11
+DB 102,15,56,220,209
+ movdqu xmm5,XMMWORD[48+rdi]
+ pxor xmm4,xmm12
+DB 102,15,56,220,217
+ movdqu xmm6,XMMWORD[64+rdi]
+ pxor xmm5,xmm13
+DB 102,15,56,220,225
+ movdqu xmm7,XMMWORD[80+rdi]
+ pxor xmm8,xmm15
+ movdqa xmm9,XMMWORD[96+rsp]
+ pxor xmm6,xmm14
+DB 102,15,56,220,233
+ movups xmm0,XMMWORD[32+r11]
+ lea rdi,[96+rdi]
+ pxor xmm7,xmm8
+
+ pxor xmm10,xmm9
+DB 102,15,56,220,241
+ pxor xmm11,xmm9
+ movdqa XMMWORD[rsp],xmm10
+DB 102,15,56,220,249
+ movups xmm1,XMMWORD[48+r11]
+ pxor xmm12,xmm9
+
+DB 102,15,56,220,208
+ pxor xmm13,xmm9
+ movdqa XMMWORD[16+rsp],xmm11
+DB 102,15,56,220,216
+ pxor xmm14,xmm9
+ movdqa XMMWORD[32+rsp],xmm12
+DB 102,15,56,220,224
+DB 102,15,56,220,232
+ pxor xmm8,xmm9
+ movdqa XMMWORD[64+rsp],xmm14
+DB 102,15,56,220,240
+DB 102,15,56,220,248
+ movups xmm0,XMMWORD[64+r11]
+ movdqa XMMWORD[80+rsp],xmm8
+ pshufd xmm9,xmm15,0x5f
+ jmp NEAR $L$xts_enc_loop6
+ALIGN 32
+$L$xts_enc_loop6:
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+DB 102,15,56,220,241
+DB 102,15,56,220,249
+ movups xmm1,XMMWORD[((-64))+rax*1+rcx]
+ add rax,32
+
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+DB 102,15,56,220,224
+DB 102,15,56,220,232
+DB 102,15,56,220,240
+DB 102,15,56,220,248
+ movups xmm0,XMMWORD[((-80))+rax*1+rcx]
+ jnz NEAR $L$xts_enc_loop6
+
+ movdqa xmm8,XMMWORD[r8]
+ movdqa xmm14,xmm9
+ paddd xmm9,xmm9
+DB 102,15,56,220,209
+ paddq xmm15,xmm15
+ psrad xmm14,31
+DB 102,15,56,220,217
+ pand xmm14,xmm8
+ movups xmm10,XMMWORD[r11]
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+DB 102,15,56,220,241
+ pxor xmm15,xmm14
+ movaps xmm11,xmm10
+DB 102,15,56,220,249
+ movups xmm1,XMMWORD[((-64))+rcx]
+
+ movdqa xmm14,xmm9
+DB 102,15,56,220,208
+ paddd xmm9,xmm9
+ pxor xmm10,xmm15
+DB 102,15,56,220,216
+ psrad xmm14,31
+ paddq xmm15,xmm15
+DB 102,15,56,220,224
+DB 102,15,56,220,232
+ pand xmm14,xmm8
+ movaps xmm12,xmm11
+DB 102,15,56,220,240
+ pxor xmm15,xmm14
+ movdqa xmm14,xmm9
+DB 102,15,56,220,248
+ movups xmm0,XMMWORD[((-48))+rcx]
+
+ paddd xmm9,xmm9
+DB 102,15,56,220,209
+ pxor xmm11,xmm15
+ psrad xmm14,31
+DB 102,15,56,220,217
+ paddq xmm15,xmm15
+ pand xmm14,xmm8
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+ movdqa XMMWORD[48+rsp],xmm13
+ pxor xmm15,xmm14
+DB 102,15,56,220,241
+ movaps xmm13,xmm12
+ movdqa xmm14,xmm9
+DB 102,15,56,220,249
+ movups xmm1,XMMWORD[((-32))+rcx]
+
+ paddd xmm9,xmm9
+DB 102,15,56,220,208
+ pxor xmm12,xmm15
+ psrad xmm14,31
+DB 102,15,56,220,216
+ paddq xmm15,xmm15
+ pand xmm14,xmm8
+DB 102,15,56,220,224
+DB 102,15,56,220,232
+DB 102,15,56,220,240
+ pxor xmm15,xmm14
+ movaps xmm14,xmm13
+DB 102,15,56,220,248
+
+ movdqa xmm0,xmm9
+ paddd xmm9,xmm9
+DB 102,15,56,220,209
+ pxor xmm13,xmm15
+ psrad xmm0,31
+DB 102,15,56,220,217
+ paddq xmm15,xmm15
+ pand xmm0,xmm8
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+ pxor xmm15,xmm0
+ movups xmm0,XMMWORD[r11]
+DB 102,15,56,220,241
+DB 102,15,56,220,249
+ movups xmm1,XMMWORD[16+r11]
+
+ pxor xmm14,xmm15
+DB 102,15,56,221,84,36,0
+ psrad xmm9,31
+ paddq xmm15,xmm15
+DB 102,15,56,221,92,36,16
+DB 102,15,56,221,100,36,32
+ pand xmm9,xmm8
+ mov rax,r10
+DB 102,15,56,221,108,36,48
+DB 102,15,56,221,116,36,64
+DB 102,15,56,221,124,36,80
+ pxor xmm15,xmm9
+
+ lea rsi,[96+rsi]
+ movups XMMWORD[(-96)+rsi],xmm2
+ movups XMMWORD[(-80)+rsi],xmm3
+ movups XMMWORD[(-64)+rsi],xmm4
+ movups XMMWORD[(-48)+rsi],xmm5
+ movups XMMWORD[(-32)+rsi],xmm6
+ movups XMMWORD[(-16)+rsi],xmm7
+ sub rdx,16*6
+ jnc NEAR $L$xts_enc_grandloop
+
+ mov eax,16+96
+ sub eax,r10d
+ mov rcx,r11
+ shr eax,4
+
+$L$xts_enc_short:
+
+ mov r10d,eax
+ pxor xmm10,xmm0
+ add rdx,16*6
+ jz NEAR $L$xts_enc_done
+
+ pxor xmm11,xmm0
+ cmp rdx,0x20
+ jb NEAR $L$xts_enc_one
+ pxor xmm12,xmm0
+ je NEAR $L$xts_enc_two
+
+ pxor xmm13,xmm0
+ cmp rdx,0x40
+ jb NEAR $L$xts_enc_three
+ pxor xmm14,xmm0
+ je NEAR $L$xts_enc_four
+
+ movdqu xmm2,XMMWORD[rdi]
+ movdqu xmm3,XMMWORD[16+rdi]
+ movdqu xmm4,XMMWORD[32+rdi]
+ pxor xmm2,xmm10
+ movdqu xmm5,XMMWORD[48+rdi]
+ pxor xmm3,xmm11
+ movdqu xmm6,XMMWORD[64+rdi]
+ lea rdi,[80+rdi]
+ pxor xmm4,xmm12
+ pxor xmm5,xmm13
+ pxor xmm6,xmm14
+ pxor xmm7,xmm7
+
+ call _aesni_encrypt6
+
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm15
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+ movdqu XMMWORD[rsi],xmm2
+ xorps xmm5,xmm13
+ movdqu XMMWORD[16+rsi],xmm3
+ xorps xmm6,xmm14
+ movdqu XMMWORD[32+rsi],xmm4
+ movdqu XMMWORD[48+rsi],xmm5
+ movdqu XMMWORD[64+rsi],xmm6
+ lea rsi,[80+rsi]
+ jmp NEAR $L$xts_enc_done
+
+ALIGN 16
+$L$xts_enc_one:
+ movups xmm2,XMMWORD[rdi]
+ lea rdi,[16+rdi]
+ xorps xmm2,xmm10
+ movups xmm0,XMMWORD[rcx]
+ movups xmm1,XMMWORD[16+rcx]
+ lea rcx,[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_enc1_9:
+DB 102,15,56,220,209
+ dec eax
+ movups xmm1,XMMWORD[rcx]
+ lea rcx,[16+rcx]
+ jnz NEAR $L$oop_enc1_9
+DB 102,15,56,221,209
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm11
+ movups XMMWORD[rsi],xmm2
+ lea rsi,[16+rsi]
+ jmp NEAR $L$xts_enc_done
+
+ALIGN 16
+$L$xts_enc_two:
+ movups xmm2,XMMWORD[rdi]
+ movups xmm3,XMMWORD[16+rdi]
+ lea rdi,[32+rdi]
+ xorps xmm2,xmm10
+ xorps xmm3,xmm11
+
+ call _aesni_encrypt2
+
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm12
+ xorps xmm3,xmm11
+ movups XMMWORD[rsi],xmm2
+ movups XMMWORD[16+rsi],xmm3
+ lea rsi,[32+rsi]
+ jmp NEAR $L$xts_enc_done
+
+ALIGN 16
+$L$xts_enc_three:
+ movups xmm2,XMMWORD[rdi]
+ movups xmm3,XMMWORD[16+rdi]
+ movups xmm4,XMMWORD[32+rdi]
+ lea rdi,[48+rdi]
+ xorps xmm2,xmm10
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+
+ call _aesni_encrypt3
+
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm13
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+ movups XMMWORD[rsi],xmm2
+ movups XMMWORD[16+rsi],xmm3
+ movups XMMWORD[32+rsi],xmm4
+ lea rsi,[48+rsi]
+ jmp NEAR $L$xts_enc_done
+
+ALIGN 16
+$L$xts_enc_four:
+ movups xmm2,XMMWORD[rdi]
+ movups xmm3,XMMWORD[16+rdi]
+ movups xmm4,XMMWORD[32+rdi]
+ xorps xmm2,xmm10
+ movups xmm5,XMMWORD[48+rdi]
+ lea rdi,[64+rdi]
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+ xorps xmm5,xmm13
+
+ call _aesni_encrypt4
+
+ pxor xmm2,xmm10
+ movdqa xmm10,xmm14
+ pxor xmm3,xmm11
+ pxor xmm4,xmm12
+ movdqu XMMWORD[rsi],xmm2
+ pxor xmm5,xmm13
+ movdqu XMMWORD[16+rsi],xmm3
+ movdqu XMMWORD[32+rsi],xmm4
+ movdqu XMMWORD[48+rsi],xmm5
+ lea rsi,[64+rsi]
+ jmp NEAR $L$xts_enc_done
+
+ALIGN 16
+$L$xts_enc_done:
+ and r9,15
+ jz NEAR $L$xts_enc_ret
+ mov rdx,r9
+
+$L$xts_enc_steal:
+ movzx eax,BYTE[rdi]
+ movzx ecx,BYTE[((-16))+rsi]
+ lea rdi,[1+rdi]
+ mov BYTE[((-16))+rsi],al
+ mov BYTE[rsi],cl
+ lea rsi,[1+rsi]
+ sub rdx,1
+ jnz NEAR $L$xts_enc_steal
+
+ sub rsi,r9
+ mov rcx,r11
+ mov eax,r10d
+
+ movups xmm2,XMMWORD[((-16))+rsi]
+ xorps xmm2,xmm10
+ movups xmm0,XMMWORD[rcx]
+ movups xmm1,XMMWORD[16+rcx]
+ lea rcx,[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_enc1_10:
+DB 102,15,56,220,209
+ dec eax
+ movups xmm1,XMMWORD[rcx]
+ lea rcx,[16+rcx]
+ jnz NEAR $L$oop_enc1_10
+DB 102,15,56,221,209
+ xorps xmm2,xmm10
+ movups XMMWORD[(-16)+rsi],xmm2
+
+$L$xts_enc_ret:
+ xorps xmm0,xmm0
+ pxor xmm1,xmm1
+ pxor xmm2,xmm2
+ pxor xmm3,xmm3
+ pxor xmm4,xmm4
+ pxor xmm5,xmm5
+ movaps xmm6,XMMWORD[((-160))+rbp]
+ movaps XMMWORD[(-160)+rbp],xmm0
+ movaps xmm7,XMMWORD[((-144))+rbp]
+ movaps XMMWORD[(-144)+rbp],xmm0
+ movaps xmm8,XMMWORD[((-128))+rbp]
+ movaps XMMWORD[(-128)+rbp],xmm0
+ movaps xmm9,XMMWORD[((-112))+rbp]
+ movaps XMMWORD[(-112)+rbp],xmm0
+ movaps xmm10,XMMWORD[((-96))+rbp]
+ movaps XMMWORD[(-96)+rbp],xmm0
+ movaps xmm11,XMMWORD[((-80))+rbp]
+ movaps XMMWORD[(-80)+rbp],xmm0
+ movaps xmm12,XMMWORD[((-64))+rbp]
+ movaps XMMWORD[(-64)+rbp],xmm0
+ movaps xmm13,XMMWORD[((-48))+rbp]
+ movaps XMMWORD[(-48)+rbp],xmm0
+ movaps xmm14,XMMWORD[((-32))+rbp]
+ movaps XMMWORD[(-32)+rbp],xmm0
+ movaps xmm15,XMMWORD[((-16))+rbp]
+ movaps XMMWORD[(-16)+rbp],xmm0
+ movaps XMMWORD[rsp],xmm0
+ movaps XMMWORD[16+rsp],xmm0
+ movaps XMMWORD[32+rsp],xmm0
+ movaps XMMWORD[48+rsp],xmm0
+ movaps XMMWORD[64+rsp],xmm0
+ movaps XMMWORD[80+rsp],xmm0
+ movaps XMMWORD[96+rsp],xmm0
+ lea rsp,[rbp]
+ pop rbp
+$L$xts_enc_epilogue:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_xts_encrypt:
+global aesni_xts_decrypt
+
+ALIGN 16
+aesni_xts_decrypt:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_xts_decrypt:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD[40+rsp]
+ mov r9,QWORD[48+rsp]
+
+
+ lea rax,[rsp]
+ push rbp
+ sub rsp,272
+ and rsp,-16
+ movaps XMMWORD[(-168)+rax],xmm6
+ movaps XMMWORD[(-152)+rax],xmm7
+ movaps XMMWORD[(-136)+rax],xmm8
+ movaps XMMWORD[(-120)+rax],xmm9
+ movaps XMMWORD[(-104)+rax],xmm10
+ movaps XMMWORD[(-88)+rax],xmm11
+ movaps XMMWORD[(-72)+rax],xmm12
+ movaps XMMWORD[(-56)+rax],xmm13
+ movaps XMMWORD[(-40)+rax],xmm14
+ movaps XMMWORD[(-24)+rax],xmm15
+$L$xts_dec_body:
+ lea rbp,[((-8))+rax]
+ movups xmm2,XMMWORD[r9]
+ mov eax,DWORD[240+r8]
+ mov r10d,DWORD[240+rcx]
+ movups xmm0,XMMWORD[r8]
+ movups xmm1,XMMWORD[16+r8]
+ lea r8,[32+r8]
+ xorps xmm2,xmm0
+$L$oop_enc1_11:
+DB 102,15,56,220,209
+ dec eax
+ movups xmm1,XMMWORD[r8]
+ lea r8,[16+r8]
+ jnz NEAR $L$oop_enc1_11
+DB 102,15,56,221,209
+ xor eax,eax
+ test rdx,15
+ setnz al
+ shl rax,4
+ sub rdx,rax
+
+ movups xmm0,XMMWORD[rcx]
+ mov r11,rcx
+ mov eax,r10d
+ shl r10d,4
+ mov r9,rdx
+ and rdx,-16
+
+ movups xmm1,XMMWORD[16+r10*1+rcx]
+
+ movdqa xmm8,XMMWORD[$L$xts_magic]
+ movdqa xmm15,xmm2
+ pshufd xmm9,xmm2,0x5f
+ pxor xmm1,xmm0
+ movdqa xmm14,xmm9
+ paddd xmm9,xmm9
+ movdqa xmm10,xmm15
+ psrad xmm14,31
+ paddq xmm15,xmm15
+ pand xmm14,xmm8
+ pxor xmm10,xmm0
+ pxor xmm15,xmm14
+ movdqa xmm14,xmm9
+ paddd xmm9,xmm9
+ movdqa xmm11,xmm15
+ psrad xmm14,31
+ paddq xmm15,xmm15
+ pand xmm14,xmm8
+ pxor xmm11,xmm0
+ pxor xmm15,xmm14
+ movdqa xmm14,xmm9
+ paddd xmm9,xmm9
+ movdqa xmm12,xmm15
+ psrad xmm14,31
+ paddq xmm15,xmm15
+ pand xmm14,xmm8
+ pxor xmm12,xmm0
+ pxor xmm15,xmm14
+ movdqa xmm14,xmm9
+ paddd xmm9,xmm9
+ movdqa xmm13,xmm15
+ psrad xmm14,31
+ paddq xmm15,xmm15
+ pand xmm14,xmm8
+ pxor xmm13,xmm0
+ pxor xmm15,xmm14
+ movdqa xmm14,xmm15
+ psrad xmm9,31
+ paddq xmm15,xmm15
+ pand xmm9,xmm8
+ pxor xmm14,xmm0
+ pxor xmm15,xmm9
+ movaps XMMWORD[96+rsp],xmm1
+
+ sub rdx,16*6
+ jc NEAR $L$xts_dec_short
+
+ mov eax,16+96
+ lea rcx,[32+r10*1+r11]
+ sub rax,r10
+ movups xmm1,XMMWORD[16+r11]
+ mov r10,rax
+ lea r8,[$L$xts_magic]
+ jmp NEAR $L$xts_dec_grandloop
+
+ALIGN 32
+$L$xts_dec_grandloop:
+ movdqu xmm2,XMMWORD[rdi]
+ movdqa xmm8,xmm0
+ movdqu xmm3,XMMWORD[16+rdi]
+ pxor xmm2,xmm10
+ movdqu xmm4,XMMWORD[32+rdi]
+ pxor xmm3,xmm11
+DB 102,15,56,222,209
+ movdqu xmm5,XMMWORD[48+rdi]
+ pxor xmm4,xmm12
+DB 102,15,56,222,217
+ movdqu xmm6,XMMWORD[64+rdi]
+ pxor xmm5,xmm13
+DB 102,15,56,222,225
+ movdqu xmm7,XMMWORD[80+rdi]
+ pxor xmm8,xmm15
+ movdqa xmm9,XMMWORD[96+rsp]
+ pxor xmm6,xmm14
+DB 102,15,56,222,233
+ movups xmm0,XMMWORD[32+r11]
+ lea rdi,[96+rdi]
+ pxor xmm7,xmm8
+
+ pxor xmm10,xmm9
+DB 102,15,56,222,241
+ pxor xmm11,xmm9
+ movdqa XMMWORD[rsp],xmm10
+DB 102,15,56,222,249
+ movups xmm1,XMMWORD[48+r11]
+ pxor xmm12,xmm9
+
+DB 102,15,56,222,208
+ pxor xmm13,xmm9
+ movdqa XMMWORD[16+rsp],xmm11
+DB 102,15,56,222,216
+ pxor xmm14,xmm9
+ movdqa XMMWORD[32+rsp],xmm12
+DB 102,15,56,222,224
+DB 102,15,56,222,232
+ pxor xmm8,xmm9
+ movdqa XMMWORD[64+rsp],xmm14
+DB 102,15,56,222,240
+DB 102,15,56,222,248
+ movups xmm0,XMMWORD[64+r11]
+ movdqa XMMWORD[80+rsp],xmm8
+ pshufd xmm9,xmm15,0x5f
+ jmp NEAR $L$xts_dec_loop6
+ALIGN 32
+$L$xts_dec_loop6:
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+DB 102,15,56,222,241
+DB 102,15,56,222,249
+ movups xmm1,XMMWORD[((-64))+rax*1+rcx]
+ add rax,32
+
+DB 102,15,56,222,208
+DB 102,15,56,222,216
+DB 102,15,56,222,224
+DB 102,15,56,222,232
+DB 102,15,56,222,240
+DB 102,15,56,222,248
+ movups xmm0,XMMWORD[((-80))+rax*1+rcx]
+ jnz NEAR $L$xts_dec_loop6
+
+ movdqa xmm8,XMMWORD[r8]
+ movdqa xmm14,xmm9
+ paddd xmm9,xmm9
+DB 102,15,56,222,209
+ paddq xmm15,xmm15
+ psrad xmm14,31
+DB 102,15,56,222,217
+ pand xmm14,xmm8
+ movups xmm10,XMMWORD[r11]
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+DB 102,15,56,222,241
+ pxor xmm15,xmm14
+ movaps xmm11,xmm10
+DB 102,15,56,222,249
+ movups xmm1,XMMWORD[((-64))+rcx]
+
+ movdqa xmm14,xmm9
+DB 102,15,56,222,208
+ paddd xmm9,xmm9
+ pxor xmm10,xmm15
+DB 102,15,56,222,216
+ psrad xmm14,31
+ paddq xmm15,xmm15
+DB 102,15,56,222,224
+DB 102,15,56,222,232
+ pand xmm14,xmm8
+ movaps xmm12,xmm11
+DB 102,15,56,222,240
+ pxor xmm15,xmm14
+ movdqa xmm14,xmm9
+DB 102,15,56,222,248
+ movups xmm0,XMMWORD[((-48))+rcx]
+
+ paddd xmm9,xmm9
+DB 102,15,56,222,209
+ pxor xmm11,xmm15
+ psrad xmm14,31
+DB 102,15,56,222,217
+ paddq xmm15,xmm15
+ pand xmm14,xmm8
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+ movdqa XMMWORD[48+rsp],xmm13
+ pxor xmm15,xmm14
+DB 102,15,56,222,241
+ movaps xmm13,xmm12
+ movdqa xmm14,xmm9
+DB 102,15,56,222,249
+ movups xmm1,XMMWORD[((-32))+rcx]
+
+ paddd xmm9,xmm9
+DB 102,15,56,222,208
+ pxor xmm12,xmm15
+ psrad xmm14,31
+DB 102,15,56,222,216
+ paddq xmm15,xmm15
+ pand xmm14,xmm8
+DB 102,15,56,222,224
+DB 102,15,56,222,232
+DB 102,15,56,222,240
+ pxor xmm15,xmm14
+ movaps xmm14,xmm13
+DB 102,15,56,222,248
+
+ movdqa xmm0,xmm9
+ paddd xmm9,xmm9
+DB 102,15,56,222,209
+ pxor xmm13,xmm15
+ psrad xmm0,31
+DB 102,15,56,222,217
+ paddq xmm15,xmm15
+ pand xmm0,xmm8
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+ pxor xmm15,xmm0
+ movups xmm0,XMMWORD[r11]
+DB 102,15,56,222,241
+DB 102,15,56,222,249
+ movups xmm1,XMMWORD[16+r11]
+
+ pxor xmm14,xmm15
+DB 102,15,56,223,84,36,0
+ psrad xmm9,31
+ paddq xmm15,xmm15
+DB 102,15,56,223,92,36,16
+DB 102,15,56,223,100,36,32
+ pand xmm9,xmm8
+ mov rax,r10
+DB 102,15,56,223,108,36,48
+DB 102,15,56,223,116,36,64
+DB 102,15,56,223,124,36,80
+ pxor xmm15,xmm9
+
+ lea rsi,[96+rsi]
+ movups XMMWORD[(-96)+rsi],xmm2
+ movups XMMWORD[(-80)+rsi],xmm3
+ movups XMMWORD[(-64)+rsi],xmm4
+ movups XMMWORD[(-48)+rsi],xmm5
+ movups XMMWORD[(-32)+rsi],xmm6
+ movups XMMWORD[(-16)+rsi],xmm7
+ sub rdx,16*6
+ jnc NEAR $L$xts_dec_grandloop
+
+ mov eax,16+96
+ sub eax,r10d
+ mov rcx,r11
+ shr eax,4
+
+$L$xts_dec_short:
+
+ mov r10d,eax
+ pxor xmm10,xmm0
+ pxor xmm11,xmm0
+ add rdx,16*6
+ jz NEAR $L$xts_dec_done
+
+ pxor xmm12,xmm0
+ cmp rdx,0x20
+ jb NEAR $L$xts_dec_one
+ pxor xmm13,xmm0
+ je NEAR $L$xts_dec_two
+
+ pxor xmm14,xmm0
+ cmp rdx,0x40
+ jb NEAR $L$xts_dec_three
+ je NEAR $L$xts_dec_four
+
+ movdqu xmm2,XMMWORD[rdi]
+ movdqu xmm3,XMMWORD[16+rdi]
+ movdqu xmm4,XMMWORD[32+rdi]
+ pxor xmm2,xmm10
+ movdqu xmm5,XMMWORD[48+rdi]
+ pxor xmm3,xmm11
+ movdqu xmm6,XMMWORD[64+rdi]
+ lea rdi,[80+rdi]
+ pxor xmm4,xmm12
+ pxor xmm5,xmm13
+ pxor xmm6,xmm14
+
+ call _aesni_decrypt6
+
+ xorps xmm2,xmm10
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+ movdqu XMMWORD[rsi],xmm2
+ xorps xmm5,xmm13
+ movdqu XMMWORD[16+rsi],xmm3
+ xorps xmm6,xmm14
+ movdqu XMMWORD[32+rsi],xmm4
+ pxor xmm14,xmm14
+ movdqu XMMWORD[48+rsi],xmm5
+ pcmpgtd xmm14,xmm15
+ movdqu XMMWORD[64+rsi],xmm6
+ lea rsi,[80+rsi]
+ pshufd xmm11,xmm14,0x13
+ and r9,15
+ jz NEAR $L$xts_dec_ret
+
+ movdqa xmm10,xmm15
+ paddq xmm15,xmm15
+ pand xmm11,xmm8
+ pxor xmm11,xmm15
+ jmp NEAR $L$xts_dec_done2
+
+ALIGN 16
+$L$xts_dec_one:
+ movups xmm2,XMMWORD[rdi]
+ lea rdi,[16+rdi]
+ xorps xmm2,xmm10
+ movups xmm0,XMMWORD[rcx]
+ movups xmm1,XMMWORD[16+rcx]
+ lea rcx,[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_dec1_12:
+DB 102,15,56,222,209
+ dec eax
+ movups xmm1,XMMWORD[rcx]
+ lea rcx,[16+rcx]
+ jnz NEAR $L$oop_dec1_12
+DB 102,15,56,223,209
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm11
+ movups XMMWORD[rsi],xmm2
+ movdqa xmm11,xmm12
+ lea rsi,[16+rsi]
+ jmp NEAR $L$xts_dec_done
+
+ALIGN 16
+$L$xts_dec_two:
+ movups xmm2,XMMWORD[rdi]
+ movups xmm3,XMMWORD[16+rdi]
+ lea rdi,[32+rdi]
+ xorps xmm2,xmm10
+ xorps xmm3,xmm11
+
+ call _aesni_decrypt2
+
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm12
+ xorps xmm3,xmm11
+ movdqa xmm11,xmm13
+ movups XMMWORD[rsi],xmm2
+ movups XMMWORD[16+rsi],xmm3
+ lea rsi,[32+rsi]
+ jmp NEAR $L$xts_dec_done
+
+ALIGN 16
+$L$xts_dec_three:
+ movups xmm2,XMMWORD[rdi]
+ movups xmm3,XMMWORD[16+rdi]
+ movups xmm4,XMMWORD[32+rdi]
+ lea rdi,[48+rdi]
+ xorps xmm2,xmm10
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+
+ call _aesni_decrypt3
+
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm13
+ xorps xmm3,xmm11
+ movdqa xmm11,xmm14
+ xorps xmm4,xmm12
+ movups XMMWORD[rsi],xmm2
+ movups XMMWORD[16+rsi],xmm3
+ movups XMMWORD[32+rsi],xmm4
+ lea rsi,[48+rsi]
+ jmp NEAR $L$xts_dec_done
+
+ALIGN 16
+$L$xts_dec_four:
+ movups xmm2,XMMWORD[rdi]
+ movups xmm3,XMMWORD[16+rdi]
+ movups xmm4,XMMWORD[32+rdi]
+ xorps xmm2,xmm10
+ movups xmm5,XMMWORD[48+rdi]
+ lea rdi,[64+rdi]
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+ xorps xmm5,xmm13
+
+ call _aesni_decrypt4
+
+ pxor xmm2,xmm10
+ movdqa xmm10,xmm14
+ pxor xmm3,xmm11
+ movdqa xmm11,xmm15
+ pxor xmm4,xmm12
+ movdqu XMMWORD[rsi],xmm2
+ pxor xmm5,xmm13
+ movdqu XMMWORD[16+rsi],xmm3
+ movdqu XMMWORD[32+rsi],xmm4
+ movdqu XMMWORD[48+rsi],xmm5
+ lea rsi,[64+rsi]
+ jmp NEAR $L$xts_dec_done
+
+ALIGN 16
+$L$xts_dec_done:
+ and r9,15
+ jz NEAR $L$xts_dec_ret
+$L$xts_dec_done2:
+ mov rdx,r9
+ mov rcx,r11
+ mov eax,r10d
+
+ movups xmm2,XMMWORD[rdi]
+ xorps xmm2,xmm11
+ movups xmm0,XMMWORD[rcx]
+ movups xmm1,XMMWORD[16+rcx]
+ lea rcx,[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_dec1_13:
+DB 102,15,56,222,209
+ dec eax
+ movups xmm1,XMMWORD[rcx]
+ lea rcx,[16+rcx]
+ jnz NEAR $L$oop_dec1_13
+DB 102,15,56,223,209
+ xorps xmm2,xmm11
+ movups XMMWORD[rsi],xmm2
+
+$L$xts_dec_steal:
+ movzx eax,BYTE[16+rdi]
+ movzx ecx,BYTE[rsi]
+ lea rdi,[1+rdi]
+ mov BYTE[rsi],al
+ mov BYTE[16+rsi],cl
+ lea rsi,[1+rsi]
+ sub rdx,1
+ jnz NEAR $L$xts_dec_steal
+
+ sub rsi,r9
+ mov rcx,r11
+ mov eax,r10d
+
+ movups xmm2,XMMWORD[rsi]
+ xorps xmm2,xmm10
+ movups xmm0,XMMWORD[rcx]
+ movups xmm1,XMMWORD[16+rcx]
+ lea rcx,[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_dec1_14:
+DB 102,15,56,222,209
+ dec eax
+ movups xmm1,XMMWORD[rcx]
+ lea rcx,[16+rcx]
+ jnz NEAR $L$oop_dec1_14
+DB 102,15,56,223,209
+ xorps xmm2,xmm10
+ movups XMMWORD[rsi],xmm2
+
+$L$xts_dec_ret:
+ xorps xmm0,xmm0
+ pxor xmm1,xmm1
+ pxor xmm2,xmm2
+ pxor xmm3,xmm3
+ pxor xmm4,xmm4
+ pxor xmm5,xmm5
+ movaps xmm6,XMMWORD[((-160))+rbp]
+ movaps XMMWORD[(-160)+rbp],xmm0
+ movaps xmm7,XMMWORD[((-144))+rbp]
+ movaps XMMWORD[(-144)+rbp],xmm0
+ movaps xmm8,XMMWORD[((-128))+rbp]
+ movaps XMMWORD[(-128)+rbp],xmm0
+ movaps xmm9,XMMWORD[((-112))+rbp]
+ movaps XMMWORD[(-112)+rbp],xmm0
+ movaps xmm10,XMMWORD[((-96))+rbp]
+ movaps XMMWORD[(-96)+rbp],xmm0
+ movaps xmm11,XMMWORD[((-80))+rbp]
+ movaps XMMWORD[(-80)+rbp],xmm0
+ movaps xmm12,XMMWORD[((-64))+rbp]
+ movaps XMMWORD[(-64)+rbp],xmm0
+ movaps xmm13,XMMWORD[((-48))+rbp]
+ movaps XMMWORD[(-48)+rbp],xmm0
+ movaps xmm14,XMMWORD[((-32))+rbp]
+ movaps XMMWORD[(-32)+rbp],xmm0
+ movaps xmm15,XMMWORD[((-16))+rbp]
+ movaps XMMWORD[(-16)+rbp],xmm0
+ movaps XMMWORD[rsp],xmm0
+ movaps XMMWORD[16+rsp],xmm0
+ movaps XMMWORD[32+rsp],xmm0
+ movaps XMMWORD[48+rsp],xmm0
+ movaps XMMWORD[64+rsp],xmm0
+ movaps XMMWORD[80+rsp],xmm0
+ movaps XMMWORD[96+rsp],xmm0
+ lea rsp,[rbp]
+ pop rbp
+$L$xts_dec_epilogue:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_xts_decrypt:
+global aesni_ocb_encrypt
+
+ALIGN 32
+aesni_ocb_encrypt:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_ocb_encrypt:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD[40+rsp]
+ mov r9,QWORD[48+rsp]
+
+
+ lea rax,[rsp]
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ lea rsp,[((-160))+rsp]
+ movaps XMMWORD[rsp],xmm6
+ movaps XMMWORD[16+rsp],xmm7
+ movaps XMMWORD[32+rsp],xmm8
+ movaps XMMWORD[48+rsp],xmm9
+ movaps XMMWORD[64+rsp],xmm10
+ movaps XMMWORD[80+rsp],xmm11
+ movaps XMMWORD[96+rsp],xmm12
+ movaps XMMWORD[112+rsp],xmm13
+ movaps XMMWORD[128+rsp],xmm14
+ movaps XMMWORD[144+rsp],xmm15
+$L$ocb_enc_body:
+ mov rbx,QWORD[56+rax]
+ mov rbp,QWORD[((56+8))+rax]
+
+ mov r10d,DWORD[240+rcx]
+ mov r11,rcx
+ shl r10d,4
+ movups xmm9,XMMWORD[rcx]
+ movups xmm1,XMMWORD[16+r10*1+rcx]
+
+ movdqu xmm15,XMMWORD[r9]
+ pxor xmm9,xmm1
+ pxor xmm15,xmm1
+
+ mov eax,16+32
+ lea rcx,[32+r10*1+r11]
+ movups xmm1,XMMWORD[16+r11]
+ sub rax,r10
+ mov r10,rax
+
+ movdqu xmm10,XMMWORD[rbx]
+ movdqu xmm8,XMMWORD[rbp]
+
+ test r8,1
+ jnz NEAR $L$ocb_enc_odd
+
+ bsf r12,r8
+ add r8,1
+ shl r12,4
+ movdqu xmm7,XMMWORD[r12*1+rbx]
+ movdqu xmm2,XMMWORD[rdi]
+ lea rdi,[16+rdi]
+
+ call __ocb_encrypt1
+
+ movdqa xmm15,xmm7
+ movups XMMWORD[rsi],xmm2
+ lea rsi,[16+rsi]
+ sub rdx,1
+ jz NEAR $L$ocb_enc_done
+
+$L$ocb_enc_odd:
+ lea r12,[1+r8]
+ lea r13,[3+r8]
+ lea r14,[5+r8]
+ lea r8,[6+r8]
+ bsf r12,r12
+ bsf r13,r13
+ bsf r14,r14
+ shl r12,4
+ shl r13,4
+ shl r14,4
+
+ sub rdx,6
+ jc NEAR $L$ocb_enc_short
+ jmp NEAR $L$ocb_enc_grandloop
+
+ALIGN 32
+$L$ocb_enc_grandloop:
+ movdqu xmm2,XMMWORD[rdi]
+ movdqu xmm3,XMMWORD[16+rdi]
+ movdqu xmm4,XMMWORD[32+rdi]
+ movdqu xmm5,XMMWORD[48+rdi]
+ movdqu xmm6,XMMWORD[64+rdi]
+ movdqu xmm7,XMMWORD[80+rdi]
+ lea rdi,[96+rdi]
+
+ call __ocb_encrypt6
+
+ movups XMMWORD[rsi],xmm2
+ movups XMMWORD[16+rsi],xmm3
+ movups XMMWORD[32+rsi],xmm4
+ movups XMMWORD[48+rsi],xmm5
+ movups XMMWORD[64+rsi],xmm6
+ movups XMMWORD[80+rsi],xmm7
+ lea rsi,[96+rsi]
+ sub rdx,6
+ jnc NEAR $L$ocb_enc_grandloop
+
+$L$ocb_enc_short:
+ add rdx,6
+ jz NEAR $L$ocb_enc_done
+
+ movdqu xmm2,XMMWORD[rdi]
+ cmp rdx,2
+ jb NEAR $L$ocb_enc_one
+ movdqu xmm3,XMMWORD[16+rdi]
+ je NEAR $L$ocb_enc_two
+
+ movdqu xmm4,XMMWORD[32+rdi]
+ cmp rdx,4
+ jb NEAR $L$ocb_enc_three
+ movdqu xmm5,XMMWORD[48+rdi]
+ je NEAR $L$ocb_enc_four
+
+ movdqu xmm6,XMMWORD[64+rdi]
+ pxor xmm7,xmm7
+
+ call __ocb_encrypt6
+
+ movdqa xmm15,xmm14
+ movups XMMWORD[rsi],xmm2
+ movups XMMWORD[16+rsi],xmm3
+ movups XMMWORD[32+rsi],xmm4
+ movups XMMWORD[48+rsi],xmm5
+ movups XMMWORD[64+rsi],xmm6
+
+ jmp NEAR $L$ocb_enc_done
+
+ALIGN 16
+$L$ocb_enc_one:
+ movdqa xmm7,xmm10
+
+ call __ocb_encrypt1
+
+ movdqa xmm15,xmm7
+ movups XMMWORD[rsi],xmm2
+ jmp NEAR $L$ocb_enc_done
+
+ALIGN 16
+$L$ocb_enc_two:
+ pxor xmm4,xmm4
+ pxor xmm5,xmm5
+
+ call __ocb_encrypt4
+
+ movdqa xmm15,xmm11
+ movups XMMWORD[rsi],xmm2
+ movups XMMWORD[16+rsi],xmm3
+
+ jmp NEAR $L$ocb_enc_done
+
+ALIGN 16
+$L$ocb_enc_three:
+ pxor xmm5,xmm5
+
+ call __ocb_encrypt4
+
+ movdqa xmm15,xmm12
+ movups XMMWORD[rsi],xmm2
+ movups XMMWORD[16+rsi],xmm3
+ movups XMMWORD[32+rsi],xmm4
+
+ jmp NEAR $L$ocb_enc_done
+
+ALIGN 16
+$L$ocb_enc_four:
+ call __ocb_encrypt4
+
+ movdqa xmm15,xmm13
+ movups XMMWORD[rsi],xmm2
+ movups XMMWORD[16+rsi],xmm3
+ movups XMMWORD[32+rsi],xmm4
+ movups XMMWORD[48+rsi],xmm5
+
+$L$ocb_enc_done:
+ pxor xmm15,xmm0
+ movdqu XMMWORD[rbp],xmm8
+ movdqu XMMWORD[r9],xmm15
+
+ xorps xmm0,xmm0
+ pxor xmm1,xmm1
+ pxor xmm2,xmm2
+ pxor xmm3,xmm3
+ pxor xmm4,xmm4
+ pxor xmm5,xmm5
+ movaps xmm6,XMMWORD[rsp]
+ movaps XMMWORD[rsp],xmm0
+ movaps xmm7,XMMWORD[16+rsp]
+ movaps XMMWORD[16+rsp],xmm0
+ movaps xmm8,XMMWORD[32+rsp]
+ movaps XMMWORD[32+rsp],xmm0
+ movaps xmm9,XMMWORD[48+rsp]
+ movaps XMMWORD[48+rsp],xmm0
+ movaps xmm10,XMMWORD[64+rsp]
+ movaps XMMWORD[64+rsp],xmm0
+ movaps xmm11,XMMWORD[80+rsp]
+ movaps XMMWORD[80+rsp],xmm0
+ movaps xmm12,XMMWORD[96+rsp]
+ movaps XMMWORD[96+rsp],xmm0
+ movaps xmm13,XMMWORD[112+rsp]
+ movaps XMMWORD[112+rsp],xmm0
+ movaps xmm14,XMMWORD[128+rsp]
+ movaps XMMWORD[128+rsp],xmm0
+ movaps xmm15,XMMWORD[144+rsp]
+ movaps XMMWORD[144+rsp],xmm0
+ lea rax,[((160+40))+rsp]
+$L$ocb_enc_pop:
+ lea rsp,[160+rsp]
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+$L$ocb_enc_epilogue:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_ocb_encrypt:
+
+
+ALIGN 32
+__ocb_encrypt6:
+ pxor xmm15,xmm9
+ movdqu xmm11,XMMWORD[r12*1+rbx]
+ movdqa xmm12,xmm10
+ movdqu xmm13,XMMWORD[r13*1+rbx]
+ movdqa xmm14,xmm10
+ pxor xmm10,xmm15
+ movdqu xmm15,XMMWORD[r14*1+rbx]
+ pxor xmm11,xmm10
+ pxor xmm8,xmm2
+ pxor xmm2,xmm10
+ pxor xmm12,xmm11
+ pxor xmm8,xmm3
+ pxor xmm3,xmm11
+ pxor xmm13,xmm12
+ pxor xmm8,xmm4
+ pxor xmm4,xmm12
+ pxor xmm14,xmm13
+ pxor xmm8,xmm5
+ pxor xmm5,xmm13
+ pxor xmm15,xmm14
+ pxor xmm8,xmm6
+ pxor xmm6,xmm14
+ pxor xmm8,xmm7
+ pxor xmm7,xmm15
+ movups xmm0,XMMWORD[32+r11]
+
+ lea r12,[1+r8]
+ lea r13,[3+r8]
+ lea r14,[5+r8]
+ add r8,6
+ pxor xmm10,xmm9
+ bsf r12,r12
+ bsf r13,r13
+ bsf r14,r14
+
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+ pxor xmm11,xmm9
+ pxor xmm12,xmm9
+DB 102,15,56,220,241
+ pxor xmm13,xmm9
+ pxor xmm14,xmm9
+DB 102,15,56,220,249
+ movups xmm1,XMMWORD[48+r11]
+ pxor xmm15,xmm9
+
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+DB 102,15,56,220,224
+DB 102,15,56,220,232
+DB 102,15,56,220,240
+DB 102,15,56,220,248
+ movups xmm0,XMMWORD[64+r11]
+ shl r12,4
+ shl r13,4
+ jmp NEAR $L$ocb_enc_loop6
+
+ALIGN 32
+$L$ocb_enc_loop6:
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+DB 102,15,56,220,241
+DB 102,15,56,220,249
+ movups xmm1,XMMWORD[rax*1+rcx]
+ add rax,32
+
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+DB 102,15,56,220,224
+DB 102,15,56,220,232
+DB 102,15,56,220,240
+DB 102,15,56,220,248
+ movups xmm0,XMMWORD[((-16))+rax*1+rcx]
+ jnz NEAR $L$ocb_enc_loop6
+
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+DB 102,15,56,220,241
+DB 102,15,56,220,249
+ movups xmm1,XMMWORD[16+r11]
+ shl r14,4
+
+DB 102,65,15,56,221,210
+ movdqu xmm10,XMMWORD[rbx]
+ mov rax,r10
+DB 102,65,15,56,221,219
+DB 102,65,15,56,221,228
+DB 102,65,15,56,221,237
+DB 102,65,15,56,221,246
+DB 102,65,15,56,221,255
+ DB 0F3h,0C3h ;repret
+
+
+
+ALIGN 32
+__ocb_encrypt4:
+ pxor xmm15,xmm9
+ movdqu xmm11,XMMWORD[r12*1+rbx]
+ movdqa xmm12,xmm10
+ movdqu xmm13,XMMWORD[r13*1+rbx]
+ pxor xmm10,xmm15
+ pxor xmm11,xmm10
+ pxor xmm8,xmm2
+ pxor xmm2,xmm10
+ pxor xmm12,xmm11
+ pxor xmm8,xmm3
+ pxor xmm3,xmm11
+ pxor xmm13,xmm12
+ pxor xmm8,xmm4
+ pxor xmm4,xmm12
+ pxor xmm8,xmm5
+ pxor xmm5,xmm13
+ movups xmm0,XMMWORD[32+r11]
+
+ pxor xmm10,xmm9
+ pxor xmm11,xmm9
+ pxor xmm12,xmm9
+ pxor xmm13,xmm9
+
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+ movups xmm1,XMMWORD[48+r11]
+
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+DB 102,15,56,220,224
+DB 102,15,56,220,232
+ movups xmm0,XMMWORD[64+r11]
+ jmp NEAR $L$ocb_enc_loop4
+
+ALIGN 32
+$L$ocb_enc_loop4:
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+ movups xmm1,XMMWORD[rax*1+rcx]
+ add rax,32
+
+DB 102,15,56,220,208
+DB 102,15,56,220,216
+DB 102,15,56,220,224
+DB 102,15,56,220,232
+ movups xmm0,XMMWORD[((-16))+rax*1+rcx]
+ jnz NEAR $L$ocb_enc_loop4
+
+DB 102,15,56,220,209
+DB 102,15,56,220,217
+DB 102,15,56,220,225
+DB 102,15,56,220,233
+ movups xmm1,XMMWORD[16+r11]
+ mov rax,r10
+
+DB 102,65,15,56,221,210
+DB 102,65,15,56,221,219
+DB 102,65,15,56,221,228
+DB 102,65,15,56,221,237
+ DB 0F3h,0C3h ;repret
+
+
+
+ALIGN 32
+__ocb_encrypt1:
+ pxor xmm7,xmm15
+ pxor xmm7,xmm9
+ pxor xmm8,xmm2
+ pxor xmm2,xmm7
+ movups xmm0,XMMWORD[32+r11]
+
+DB 102,15,56,220,209
+ movups xmm1,XMMWORD[48+r11]
+ pxor xmm7,xmm9
+
+DB 102,15,56,220,208
+ movups xmm0,XMMWORD[64+r11]
+ jmp NEAR $L$ocb_enc_loop1
+
+ALIGN 32
+$L$ocb_enc_loop1:
+DB 102,15,56,220,209
+ movups xmm1,XMMWORD[rax*1+rcx]
+ add rax,32
+
+DB 102,15,56,220,208
+ movups xmm0,XMMWORD[((-16))+rax*1+rcx]
+ jnz NEAR $L$ocb_enc_loop1
+
+DB 102,15,56,220,209
+ movups xmm1,XMMWORD[16+r11]
+ mov rax,r10
+
+DB 102,15,56,221,215
+ DB 0F3h,0C3h ;repret
+
+
+global aesni_ocb_decrypt
+
+ALIGN 32
+aesni_ocb_decrypt:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_ocb_decrypt:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD[40+rsp]
+ mov r9,QWORD[48+rsp]
+
+
+ lea rax,[rsp]
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ lea rsp,[((-160))+rsp]
+ movaps XMMWORD[rsp],xmm6
+ movaps XMMWORD[16+rsp],xmm7
+ movaps XMMWORD[32+rsp],xmm8
+ movaps XMMWORD[48+rsp],xmm9
+ movaps XMMWORD[64+rsp],xmm10
+ movaps XMMWORD[80+rsp],xmm11
+ movaps XMMWORD[96+rsp],xmm12
+ movaps XMMWORD[112+rsp],xmm13
+ movaps XMMWORD[128+rsp],xmm14
+ movaps XMMWORD[144+rsp],xmm15
+$L$ocb_dec_body:
+ mov rbx,QWORD[56+rax]
+ mov rbp,QWORD[((56+8))+rax]
+
+ mov r10d,DWORD[240+rcx]
+ mov r11,rcx
+ shl r10d,4
+ movups xmm9,XMMWORD[rcx]
+ movups xmm1,XMMWORD[16+r10*1+rcx]
+
+ movdqu xmm15,XMMWORD[r9]
+ pxor xmm9,xmm1
+ pxor xmm15,xmm1
+
+ mov eax,16+32
+ lea rcx,[32+r10*1+r11]
+ movups xmm1,XMMWORD[16+r11]
+ sub rax,r10
+ mov r10,rax
+
+ movdqu xmm10,XMMWORD[rbx]
+ movdqu xmm8,XMMWORD[rbp]
+
+ test r8,1
+ jnz NEAR $L$ocb_dec_odd
+
+ bsf r12,r8
+ add r8,1
+ shl r12,4
+ movdqu xmm7,XMMWORD[r12*1+rbx]
+ movdqu xmm2,XMMWORD[rdi]
+ lea rdi,[16+rdi]
+
+ call __ocb_decrypt1
+
+ movdqa xmm15,xmm7
+ movups XMMWORD[rsi],xmm2
+ xorps xmm8,xmm2
+ lea rsi,[16+rsi]
+ sub rdx,1
+ jz NEAR $L$ocb_dec_done
+
+$L$ocb_dec_odd:
+ lea r12,[1+r8]
+ lea r13,[3+r8]
+ lea r14,[5+r8]
+ lea r8,[6+r8]
+ bsf r12,r12
+ bsf r13,r13
+ bsf r14,r14
+ shl r12,4
+ shl r13,4
+ shl r14,4
+
+ sub rdx,6
+ jc NEAR $L$ocb_dec_short
+ jmp NEAR $L$ocb_dec_grandloop
+
+ALIGN 32
+$L$ocb_dec_grandloop:
+ movdqu xmm2,XMMWORD[rdi]
+ movdqu xmm3,XMMWORD[16+rdi]
+ movdqu xmm4,XMMWORD[32+rdi]
+ movdqu xmm5,XMMWORD[48+rdi]
+ movdqu xmm6,XMMWORD[64+rdi]
+ movdqu xmm7,XMMWORD[80+rdi]
+ lea rdi,[96+rdi]
+
+ call __ocb_decrypt6
+
+ movups XMMWORD[rsi],xmm2
+ pxor xmm8,xmm2
+ movups XMMWORD[16+rsi],xmm3
+ pxor xmm8,xmm3
+ movups XMMWORD[32+rsi],xmm4
+ pxor xmm8,xmm4
+ movups XMMWORD[48+rsi],xmm5
+ pxor xmm8,xmm5
+ movups XMMWORD[64+rsi],xmm6
+ pxor xmm8,xmm6
+ movups XMMWORD[80+rsi],xmm7
+ pxor xmm8,xmm7
+ lea rsi,[96+rsi]
+ sub rdx,6
+ jnc NEAR $L$ocb_dec_grandloop
+
+$L$ocb_dec_short:
+ add rdx,6
+ jz NEAR $L$ocb_dec_done
+
+ movdqu xmm2,XMMWORD[rdi]
+ cmp rdx,2
+ jb NEAR $L$ocb_dec_one
+ movdqu xmm3,XMMWORD[16+rdi]
+ je NEAR $L$ocb_dec_two
+
+ movdqu xmm4,XMMWORD[32+rdi]
+ cmp rdx,4
+ jb NEAR $L$ocb_dec_three
+ movdqu xmm5,XMMWORD[48+rdi]
+ je NEAR $L$ocb_dec_four
+
+ movdqu xmm6,XMMWORD[64+rdi]
+ pxor xmm7,xmm7
+
+ call __ocb_decrypt6
+
+ movdqa xmm15,xmm14
+ movups XMMWORD[rsi],xmm2
+ pxor xmm8,xmm2
+ movups XMMWORD[16+rsi],xmm3
+ pxor xmm8,xmm3
+ movups XMMWORD[32+rsi],xmm4
+ pxor xmm8,xmm4
+ movups XMMWORD[48+rsi],xmm5
+ pxor xmm8,xmm5
+ movups XMMWORD[64+rsi],xmm6
+ pxor xmm8,xmm6
+
+ jmp NEAR $L$ocb_dec_done
+
+ALIGN 16
+$L$ocb_dec_one:
+ movdqa xmm7,xmm10
+
+ call __ocb_decrypt1
+
+ movdqa xmm15,xmm7
+ movups XMMWORD[rsi],xmm2
+ xorps xmm8,xmm2
+ jmp NEAR $L$ocb_dec_done
+
+ALIGN 16
+$L$ocb_dec_two:
+ pxor xmm4,xmm4
+ pxor xmm5,xmm5
+
+ call __ocb_decrypt4
+
+ movdqa xmm15,xmm11
+ movups XMMWORD[rsi],xmm2
+ xorps xmm8,xmm2
+ movups XMMWORD[16+rsi],xmm3
+ xorps xmm8,xmm3
+
+ jmp NEAR $L$ocb_dec_done
+
+ALIGN 16
+$L$ocb_dec_three:
+ pxor xmm5,xmm5
+
+ call __ocb_decrypt4
+
+ movdqa xmm15,xmm12
+ movups XMMWORD[rsi],xmm2
+ xorps xmm8,xmm2
+ movups XMMWORD[16+rsi],xmm3
+ xorps xmm8,xmm3
+ movups XMMWORD[32+rsi],xmm4
+ xorps xmm8,xmm4
+
+ jmp NEAR $L$ocb_dec_done
+
+ALIGN 16
+$L$ocb_dec_four:
+ call __ocb_decrypt4
+
+ movdqa xmm15,xmm13
+ movups XMMWORD[rsi],xmm2
+ pxor xmm8,xmm2
+ movups XMMWORD[16+rsi],xmm3
+ pxor xmm8,xmm3
+ movups XMMWORD[32+rsi],xmm4
+ pxor xmm8,xmm4
+ movups XMMWORD[48+rsi],xmm5
+ pxor xmm8,xmm5
+
+$L$ocb_dec_done:
+ pxor xmm15,xmm0
+ movdqu XMMWORD[rbp],xmm8
+ movdqu XMMWORD[r9],xmm15
+
+ xorps xmm0,xmm0
+ pxor xmm1,xmm1
+ pxor xmm2,xmm2
+ pxor xmm3,xmm3
+ pxor xmm4,xmm4
+ pxor xmm5,xmm5
+ movaps xmm6,XMMWORD[rsp]
+ movaps XMMWORD[rsp],xmm0
+ movaps xmm7,XMMWORD[16+rsp]
+ movaps XMMWORD[16+rsp],xmm0
+ movaps xmm8,XMMWORD[32+rsp]
+ movaps XMMWORD[32+rsp],xmm0
+ movaps xmm9,XMMWORD[48+rsp]
+ movaps XMMWORD[48+rsp],xmm0
+ movaps xmm10,XMMWORD[64+rsp]
+ movaps XMMWORD[64+rsp],xmm0
+ movaps xmm11,XMMWORD[80+rsp]
+ movaps XMMWORD[80+rsp],xmm0
+ movaps xmm12,XMMWORD[96+rsp]
+ movaps XMMWORD[96+rsp],xmm0
+ movaps xmm13,XMMWORD[112+rsp]
+ movaps XMMWORD[112+rsp],xmm0
+ movaps xmm14,XMMWORD[128+rsp]
+ movaps XMMWORD[128+rsp],xmm0
+ movaps xmm15,XMMWORD[144+rsp]
+ movaps XMMWORD[144+rsp],xmm0
+ lea rax,[((160+40))+rsp]
+$L$ocb_dec_pop:
+ lea rsp,[160+rsp]
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+$L$ocb_dec_epilogue:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_ocb_decrypt:
+
+
+ALIGN 32
+__ocb_decrypt6:
+ pxor xmm15,xmm9
+ movdqu xmm11,XMMWORD[r12*1+rbx]
+ movdqa xmm12,xmm10
+ movdqu xmm13,XMMWORD[r13*1+rbx]
+ movdqa xmm14,xmm10
+ pxor xmm10,xmm15
+ movdqu xmm15,XMMWORD[r14*1+rbx]
+ pxor xmm11,xmm10
+ pxor xmm2,xmm10
+ pxor xmm12,xmm11
+ pxor xmm3,xmm11
+ pxor xmm13,xmm12
+ pxor xmm4,xmm12
+ pxor xmm14,xmm13
+ pxor xmm5,xmm13
+ pxor xmm15,xmm14
+ pxor xmm6,xmm14
+ pxor xmm7,xmm15
+ movups xmm0,XMMWORD[32+r11]
+
+ lea r12,[1+r8]
+ lea r13,[3+r8]
+ lea r14,[5+r8]
+ add r8,6
+ pxor xmm10,xmm9
+ bsf r12,r12
+ bsf r13,r13
+ bsf r14,r14
+
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+ pxor xmm11,xmm9
+ pxor xmm12,xmm9
+DB 102,15,56,222,241
+ pxor xmm13,xmm9
+ pxor xmm14,xmm9
+DB 102,15,56,222,249
+ movups xmm1,XMMWORD[48+r11]
+ pxor xmm15,xmm9
+
+DB 102,15,56,222,208
+DB 102,15,56,222,216
+DB 102,15,56,222,224
+DB 102,15,56,222,232
+DB 102,15,56,222,240
+DB 102,15,56,222,248
+ movups xmm0,XMMWORD[64+r11]
+ shl r12,4
+ shl r13,4
+ jmp NEAR $L$ocb_dec_loop6
+
+ALIGN 32
+$L$ocb_dec_loop6:
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+DB 102,15,56,222,241
+DB 102,15,56,222,249
+ movups xmm1,XMMWORD[rax*1+rcx]
+ add rax,32
+
+DB 102,15,56,222,208
+DB 102,15,56,222,216
+DB 102,15,56,222,224
+DB 102,15,56,222,232
+DB 102,15,56,222,240
+DB 102,15,56,222,248
+ movups xmm0,XMMWORD[((-16))+rax*1+rcx]
+ jnz NEAR $L$ocb_dec_loop6
+
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+DB 102,15,56,222,241
+DB 102,15,56,222,249
+ movups xmm1,XMMWORD[16+r11]
+ shl r14,4
+
+DB 102,65,15,56,223,210
+ movdqu xmm10,XMMWORD[rbx]
+ mov rax,r10
+DB 102,65,15,56,223,219
+DB 102,65,15,56,223,228
+DB 102,65,15,56,223,237
+DB 102,65,15,56,223,246
+DB 102,65,15,56,223,255
+ DB 0F3h,0C3h ;repret
+
+
+
+ALIGN 32
+__ocb_decrypt4:
+ pxor xmm15,xmm9
+ movdqu xmm11,XMMWORD[r12*1+rbx]
+ movdqa xmm12,xmm10
+ movdqu xmm13,XMMWORD[r13*1+rbx]
+ pxor xmm10,xmm15
+ pxor xmm11,xmm10
+ pxor xmm2,xmm10
+ pxor xmm12,xmm11
+ pxor xmm3,xmm11
+ pxor xmm13,xmm12
+ pxor xmm4,xmm12
+ pxor xmm5,xmm13
+ movups xmm0,XMMWORD[32+r11]
+
+ pxor xmm10,xmm9
+ pxor xmm11,xmm9
+ pxor xmm12,xmm9
+ pxor xmm13,xmm9
+
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+ movups xmm1,XMMWORD[48+r11]
+
+DB 102,15,56,222,208
+DB 102,15,56,222,216
+DB 102,15,56,222,224
+DB 102,15,56,222,232
+ movups xmm0,XMMWORD[64+r11]
+ jmp NEAR $L$ocb_dec_loop4
+
+ALIGN 32
+$L$ocb_dec_loop4:
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+ movups xmm1,XMMWORD[rax*1+rcx]
+ add rax,32
+
+DB 102,15,56,222,208
+DB 102,15,56,222,216
+DB 102,15,56,222,224
+DB 102,15,56,222,232
+ movups xmm0,XMMWORD[((-16))+rax*1+rcx]
+ jnz NEAR $L$ocb_dec_loop4
+
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+ movups xmm1,XMMWORD[16+r11]
+ mov rax,r10
+
+DB 102,65,15,56,223,210
+DB 102,65,15,56,223,219
+DB 102,65,15,56,223,228
+DB 102,65,15,56,223,237
+ DB 0F3h,0C3h ;repret
+
+
+
+ALIGN 32
+__ocb_decrypt1:
+ pxor xmm7,xmm15
+ pxor xmm7,xmm9
+ pxor xmm2,xmm7
+ movups xmm0,XMMWORD[32+r11]
+
+DB 102,15,56,222,209
+ movups xmm1,XMMWORD[48+r11]
+ pxor xmm7,xmm9
+
+DB 102,15,56,222,208
+ movups xmm0,XMMWORD[64+r11]
+ jmp NEAR $L$ocb_dec_loop1
+
+ALIGN 32
+$L$ocb_dec_loop1:
+DB 102,15,56,222,209
+ movups xmm1,XMMWORD[rax*1+rcx]
+ add rax,32
+
+DB 102,15,56,222,208
+ movups xmm0,XMMWORD[((-16))+rax*1+rcx]
+ jnz NEAR $L$ocb_dec_loop1
+
+DB 102,15,56,222,209
+ movups xmm1,XMMWORD[16+r11]
+ mov rax,r10
+
+DB 102,15,56,223,215
+ DB 0F3h,0C3h ;repret
+
+global aesni_cbc_encrypt
+
+ALIGN 16
+aesni_cbc_encrypt:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_cbc_encrypt:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD[40+rsp]
+ mov r9,QWORD[48+rsp]
+
+
+ test rdx,rdx
+ jz NEAR $L$cbc_ret
+
+ mov r10d,DWORD[240+rcx]
+ mov r11,rcx
+ test r9d,r9d
+ jz NEAR $L$cbc_decrypt
+
+ movups xmm2,XMMWORD[r8]
+ mov eax,r10d
+ cmp rdx,16
+ jb NEAR $L$cbc_enc_tail
+ sub rdx,16
+ jmp NEAR $L$cbc_enc_loop
+ALIGN 16
+$L$cbc_enc_loop:
+ movups xmm3,XMMWORD[rdi]
+ lea rdi,[16+rdi]
+
+ movups xmm0,XMMWORD[rcx]
+ movups xmm1,XMMWORD[16+rcx]
+ xorps xmm3,xmm0
+ lea rcx,[32+rcx]
+ xorps xmm2,xmm3
+$L$oop_enc1_15:
+DB 102,15,56,220,209
+ dec eax
+ movups xmm1,XMMWORD[rcx]
+ lea rcx,[16+rcx]
+ jnz NEAR $L$oop_enc1_15
+DB 102,15,56,221,209
+ mov eax,r10d
+ mov rcx,r11
+ movups XMMWORD[rsi],xmm2
+ lea rsi,[16+rsi]
+ sub rdx,16
+ jnc NEAR $L$cbc_enc_loop
+ add rdx,16
+ jnz NEAR $L$cbc_enc_tail
+ pxor xmm0,xmm0
+ pxor xmm1,xmm1
+ movups XMMWORD[r8],xmm2
+ pxor xmm2,xmm2
+ pxor xmm3,xmm3
+ jmp NEAR $L$cbc_ret
+
+$L$cbc_enc_tail:
+ mov rcx,rdx
+ xchg rsi,rdi
+ DD 0x9066A4F3
+ mov ecx,16
+ sub rcx,rdx
+ xor eax,eax
+ DD 0x9066AAF3
+ lea rdi,[((-16))+rdi]
+ mov eax,r10d
+ mov rsi,rdi
+ mov rcx,r11
+ xor rdx,rdx
+ jmp NEAR $L$cbc_enc_loop
+
+ALIGN 16
+$L$cbc_decrypt:
+ cmp rdx,16
+ jne NEAR $L$cbc_decrypt_bulk
+
+
+
+ movdqu xmm2,XMMWORD[rdi]
+ movdqu xmm3,XMMWORD[r8]
+ movdqa xmm4,xmm2
+ movups xmm0,XMMWORD[rcx]
+ movups xmm1,XMMWORD[16+rcx]
+ lea rcx,[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_dec1_16:
+DB 102,15,56,222,209
+ dec r10d
+ movups xmm1,XMMWORD[rcx]
+ lea rcx,[16+rcx]
+ jnz NEAR $L$oop_dec1_16
+DB 102,15,56,223,209
+ pxor xmm0,xmm0
+ pxor xmm1,xmm1
+ movdqu XMMWORD[r8],xmm4
+ xorps xmm2,xmm3
+ pxor xmm3,xmm3
+ movups XMMWORD[rsi],xmm2
+ pxor xmm2,xmm2
+ jmp NEAR $L$cbc_ret
+ALIGN 16
+$L$cbc_decrypt_bulk:
+ lea rax,[rsp]
+ push rbp
+ sub rsp,176
+ and rsp,-16
+ movaps XMMWORD[16+rsp],xmm6
+ movaps XMMWORD[32+rsp],xmm7
+ movaps XMMWORD[48+rsp],xmm8
+ movaps XMMWORD[64+rsp],xmm9
+ movaps XMMWORD[80+rsp],xmm10
+ movaps XMMWORD[96+rsp],xmm11
+ movaps XMMWORD[112+rsp],xmm12
+ movaps XMMWORD[128+rsp],xmm13
+ movaps XMMWORD[144+rsp],xmm14
+ movaps XMMWORD[160+rsp],xmm15
+$L$cbc_decrypt_body:
+ lea rbp,[((-8))+rax]
+ movups xmm10,XMMWORD[r8]
+ mov eax,r10d
+ cmp rdx,0x50
+ jbe NEAR $L$cbc_dec_tail
+
+ movups xmm0,XMMWORD[rcx]
+ movdqu xmm2,XMMWORD[rdi]
+ movdqu xmm3,XMMWORD[16+rdi]
+ movdqa xmm11,xmm2
+ movdqu xmm4,XMMWORD[32+rdi]
+ movdqa xmm12,xmm3
+ movdqu xmm5,XMMWORD[48+rdi]
+ movdqa xmm13,xmm4
+ movdqu xmm6,XMMWORD[64+rdi]
+ movdqa xmm14,xmm5
+ movdqu xmm7,XMMWORD[80+rdi]
+ movdqa xmm15,xmm6
+ mov r9d,DWORD[((OPENSSL_ia32cap_P+4))]
+ cmp rdx,0x70
+ jbe NEAR $L$cbc_dec_six_or_seven
+
+ and r9d,71303168
+ sub rdx,0x50
+ cmp r9d,4194304
+ je NEAR $L$cbc_dec_loop6_enter
+ sub rdx,0x20
+ lea rcx,[112+rcx]
+ jmp NEAR $L$cbc_dec_loop8_enter
+ALIGN 16
+$L$cbc_dec_loop8:
+ movups XMMWORD[rsi],xmm9
+ lea rsi,[16+rsi]
+$L$cbc_dec_loop8_enter:
+ movdqu xmm8,XMMWORD[96+rdi]
+ pxor xmm2,xmm0
+ movdqu xmm9,XMMWORD[112+rdi]
+ pxor xmm3,xmm0
+ movups xmm1,XMMWORD[((16-112))+rcx]
+ pxor xmm4,xmm0
+ xor r11,r11
+ cmp rdx,0x70
+ pxor xmm5,xmm0
+ pxor xmm6,xmm0
+ pxor xmm7,xmm0
+ pxor xmm8,xmm0
+
+DB 102,15,56,222,209
+ pxor xmm9,xmm0
+ movups xmm0,XMMWORD[((32-112))+rcx]
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+DB 102,15,56,222,241
+DB 102,15,56,222,249
+DB 102,68,15,56,222,193
+ setnc r11b
+ shl r11,7
+DB 102,68,15,56,222,201
+ add r11,rdi
+ movups xmm1,XMMWORD[((48-112))+rcx]
+DB 102,15,56,222,208
+DB 102,15,56,222,216
+DB 102,15,56,222,224
+DB 102,15,56,222,232
+DB 102,15,56,222,240
+DB 102,15,56,222,248
+DB 102,68,15,56,222,192
+DB 102,68,15,56,222,200
+ movups xmm0,XMMWORD[((64-112))+rcx]
+ nop
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+DB 102,15,56,222,241
+DB 102,15,56,222,249
+DB 102,68,15,56,222,193
+DB 102,68,15,56,222,201
+ movups xmm1,XMMWORD[((80-112))+rcx]
+ nop
+DB 102,15,56,222,208
+DB 102,15,56,222,216
+DB 102,15,56,222,224
+DB 102,15,56,222,232
+DB 102,15,56,222,240
+DB 102,15,56,222,248
+DB 102,68,15,56,222,192
+DB 102,68,15,56,222,200
+ movups xmm0,XMMWORD[((96-112))+rcx]
+ nop
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+DB 102,15,56,222,241
+DB 102,15,56,222,249
+DB 102,68,15,56,222,193
+DB 102,68,15,56,222,201
+ movups xmm1,XMMWORD[((112-112))+rcx]
+ nop
+DB 102,15,56,222,208
+DB 102,15,56,222,216
+DB 102,15,56,222,224
+DB 102,15,56,222,232
+DB 102,15,56,222,240
+DB 102,15,56,222,248
+DB 102,68,15,56,222,192
+DB 102,68,15,56,222,200
+ movups xmm0,XMMWORD[((128-112))+rcx]
+ nop
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+DB 102,15,56,222,241
+DB 102,15,56,222,249
+DB 102,68,15,56,222,193
+DB 102,68,15,56,222,201
+ movups xmm1,XMMWORD[((144-112))+rcx]
+ cmp eax,11
+DB 102,15,56,222,208
+DB 102,15,56,222,216
+DB 102,15,56,222,224
+DB 102,15,56,222,232
+DB 102,15,56,222,240
+DB 102,15,56,222,248
+DB 102,68,15,56,222,192
+DB 102,68,15,56,222,200
+ movups xmm0,XMMWORD[((160-112))+rcx]
+ jb NEAR $L$cbc_dec_done
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+DB 102,15,56,222,241
+DB 102,15,56,222,249
+DB 102,68,15,56,222,193
+DB 102,68,15,56,222,201
+ movups xmm1,XMMWORD[((176-112))+rcx]
+ nop
+DB 102,15,56,222,208
+DB 102,15,56,222,216
+DB 102,15,56,222,224
+DB 102,15,56,222,232
+DB 102,15,56,222,240
+DB 102,15,56,222,248
+DB 102,68,15,56,222,192
+DB 102,68,15,56,222,200
+ movups xmm0,XMMWORD[((192-112))+rcx]
+ je NEAR $L$cbc_dec_done
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+DB 102,15,56,222,241
+DB 102,15,56,222,249
+DB 102,68,15,56,222,193
+DB 102,68,15,56,222,201
+ movups xmm1,XMMWORD[((208-112))+rcx]
+ nop
+DB 102,15,56,222,208
+DB 102,15,56,222,216
+DB 102,15,56,222,224
+DB 102,15,56,222,232
+DB 102,15,56,222,240
+DB 102,15,56,222,248
+DB 102,68,15,56,222,192
+DB 102,68,15,56,222,200
+ movups xmm0,XMMWORD[((224-112))+rcx]
+ jmp NEAR $L$cbc_dec_done
+ALIGN 16
+$L$cbc_dec_done:
+DB 102,15,56,222,209
+DB 102,15,56,222,217
+ pxor xmm10,xmm0
+ pxor xmm11,xmm0
+DB 102,15,56,222,225
+DB 102,15,56,222,233
+ pxor xmm12,xmm0
+ pxor xmm13,xmm0
+DB 102,15,56,222,241
+DB 102,15,56,222,249
+ pxor xmm14,xmm0
+ pxor xmm15,xmm0
+DB 102,68,15,56,222,193
+DB 102,68,15,56,222,201
+ movdqu xmm1,XMMWORD[80+rdi]
+
+DB 102,65,15,56,223,210
+ movdqu xmm10,XMMWORD[96+rdi]
+ pxor xmm1,xmm0
+DB 102,65,15,56,223,219
+ pxor xmm10,xmm0
+ movdqu xmm0,XMMWORD[112+rdi]
+DB 102,65,15,56,223,228
+ lea rdi,[128+rdi]
+ movdqu xmm11,XMMWORD[r11]
+DB 102,65,15,56,223,237
+DB 102,65,15,56,223,246
+ movdqu xmm12,XMMWORD[16+r11]
+ movdqu xmm13,XMMWORD[32+r11]
+DB 102,65,15,56,223,255
+DB 102,68,15,56,223,193
+ movdqu xmm14,XMMWORD[48+r11]
+ movdqu xmm15,XMMWORD[64+r11]
+DB 102,69,15,56,223,202
+ movdqa xmm10,xmm0
+ movdqu xmm1,XMMWORD[80+r11]
+ movups xmm0,XMMWORD[((-112))+rcx]
+
+ movups XMMWORD[rsi],xmm2
+ movdqa xmm2,xmm11
+ movups XMMWORD[16+rsi],xmm3
+ movdqa xmm3,xmm12
+ movups XMMWORD[32+rsi],xmm4
+ movdqa xmm4,xmm13
+ movups XMMWORD[48+rsi],xmm5
+ movdqa xmm5,xmm14
+ movups XMMWORD[64+rsi],xmm6
+ movdqa xmm6,xmm15
+ movups XMMWORD[80+rsi],xmm7
+ movdqa xmm7,xmm1
+ movups XMMWORD[96+rsi],xmm8
+ lea rsi,[112+rsi]
+
+ sub rdx,0x80
+ ja NEAR $L$cbc_dec_loop8
+
+ movaps xmm2,xmm9
+ lea rcx,[((-112))+rcx]
+ add rdx,0x70
+ jle NEAR $L$cbc_dec_clear_tail_collected
+ movups XMMWORD[rsi],xmm9
+ lea rsi,[16+rsi]
+ cmp rdx,0x50
+ jbe NEAR $L$cbc_dec_tail
+
+ movaps xmm2,xmm11
+$L$cbc_dec_six_or_seven:
+ cmp rdx,0x60
+ ja NEAR $L$cbc_dec_seven
+
+ movaps xmm8,xmm7
+ call _aesni_decrypt6
+ pxor xmm2,xmm10
+ movaps xmm10,xmm8
+ pxor xmm3,xmm11
+ movdqu XMMWORD[rsi],xmm2
+ pxor xmm4,xmm12
+ movdqu XMMWORD[16+rsi],xmm3
+ pxor xmm3,xmm3
+ pxor xmm5,xmm13
+ movdqu XMMWORD[32+rsi],xmm4
+ pxor xmm4,xmm4
+ pxor xmm6,xmm14
+ movdqu XMMWORD[48+rsi],xmm5
+ pxor xmm5,xmm5
+ pxor xmm7,xmm15
+ movdqu XMMWORD[64+rsi],xmm6
+ pxor xmm6,xmm6
+ lea rsi,[80+rsi]
+ movdqa xmm2,xmm7
+ pxor xmm7,xmm7
+ jmp NEAR $L$cbc_dec_tail_collected
+
+ALIGN 16
+$L$cbc_dec_seven:
+ movups xmm8,XMMWORD[96+rdi]
+ xorps xmm9,xmm9
+ call _aesni_decrypt8
+ movups xmm9,XMMWORD[80+rdi]
+ pxor xmm2,xmm10
+ movups xmm10,XMMWORD[96+rdi]
+ pxor xmm3,xmm11
+ movdqu XMMWORD[rsi],xmm2
+ pxor xmm4,xmm12
+ movdqu XMMWORD[16+rsi],xmm3
+ pxor xmm3,xmm3
+ pxor xmm5,xmm13
+ movdqu XMMWORD[32+rsi],xmm4
+ pxor xmm4,xmm4
+ pxor xmm6,xmm14
+ movdqu XMMWORD[48+rsi],xmm5
+ pxor xmm5,xmm5
+ pxor xmm7,xmm15
+ movdqu XMMWORD[64+rsi],xmm6
+ pxor xmm6,xmm6
+ pxor xmm8,xmm9
+ movdqu XMMWORD[80+rsi],xmm7
+ pxor xmm7,xmm7
+ lea rsi,[96+rsi]
+ movdqa xmm2,xmm8
+ pxor xmm8,xmm8
+ pxor xmm9,xmm9
+ jmp NEAR $L$cbc_dec_tail_collected
+
+ALIGN 16
+$L$cbc_dec_loop6:
+ movups XMMWORD[rsi],xmm7
+ lea rsi,[16+rsi]
+ movdqu xmm2,XMMWORD[rdi]
+ movdqu xmm3,XMMWORD[16+rdi]
+ movdqa xmm11,xmm2
+ movdqu xmm4,XMMWORD[32+rdi]
+ movdqa xmm12,xmm3
+ movdqu xmm5,XMMWORD[48+rdi]
+ movdqa xmm13,xmm4
+ movdqu xmm6,XMMWORD[64+rdi]
+ movdqa xmm14,xmm5
+ movdqu xmm7,XMMWORD[80+rdi]
+ movdqa xmm15,xmm6
+$L$cbc_dec_loop6_enter:
+ lea rdi,[96+rdi]
+ movdqa xmm8,xmm7
+
+ call _aesni_decrypt6
+
+ pxor xmm2,xmm10
+ movdqa xmm10,xmm8
+ pxor xmm3,xmm11
+ movdqu XMMWORD[rsi],xmm2
+ pxor xmm4,xmm12
+ movdqu XMMWORD[16+rsi],xmm3
+ pxor xmm5,xmm13
+ movdqu XMMWORD[32+rsi],xmm4
+ pxor xmm6,xmm14
+ mov rcx,r11
+ movdqu XMMWORD[48+rsi],xmm5
+ pxor xmm7,xmm15
+ mov eax,r10d
+ movdqu XMMWORD[64+rsi],xmm6
+ lea rsi,[80+rsi]
+ sub rdx,0x60
+ ja NEAR $L$cbc_dec_loop6
+
+ movdqa xmm2,xmm7
+ add rdx,0x50
+ jle NEAR $L$cbc_dec_clear_tail_collected
+ movups XMMWORD[rsi],xmm7
+ lea rsi,[16+rsi]
+
+$L$cbc_dec_tail:
+ movups xmm2,XMMWORD[rdi]
+ sub rdx,0x10
+ jbe NEAR $L$cbc_dec_one
+
+ movups xmm3,XMMWORD[16+rdi]
+ movaps xmm11,xmm2
+ sub rdx,0x10
+ jbe NEAR $L$cbc_dec_two
+
+ movups xmm4,XMMWORD[32+rdi]
+ movaps xmm12,xmm3
+ sub rdx,0x10
+ jbe NEAR $L$cbc_dec_three
+
+ movups xmm5,XMMWORD[48+rdi]
+ movaps xmm13,xmm4
+ sub rdx,0x10
+ jbe NEAR $L$cbc_dec_four
+
+ movups xmm6,XMMWORD[64+rdi]
+ movaps xmm14,xmm5
+ movaps xmm15,xmm6
+ xorps xmm7,xmm7
+ call _aesni_decrypt6
+ pxor xmm2,xmm10
+ movaps xmm10,xmm15
+ pxor xmm3,xmm11
+ movdqu XMMWORD[rsi],xmm2
+ pxor xmm4,xmm12
+ movdqu XMMWORD[16+rsi],xmm3
+ pxor xmm3,xmm3
+ pxor xmm5,xmm13
+ movdqu XMMWORD[32+rsi],xmm4
+ pxor xmm4,xmm4
+ pxor xmm6,xmm14
+ movdqu XMMWORD[48+rsi],xmm5
+ pxor xmm5,xmm5
+ lea rsi,[64+rsi]
+ movdqa xmm2,xmm6
+ pxor xmm6,xmm6
+ pxor xmm7,xmm7
+ sub rdx,0x10
+ jmp NEAR $L$cbc_dec_tail_collected
+
+ALIGN 16
+$L$cbc_dec_one:
+ movaps xmm11,xmm2
+ movups xmm0,XMMWORD[rcx]
+ movups xmm1,XMMWORD[16+rcx]
+ lea rcx,[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_dec1_17:
+DB 102,15,56,222,209
+ dec eax
+ movups xmm1,XMMWORD[rcx]
+ lea rcx,[16+rcx]
+ jnz NEAR $L$oop_dec1_17
+DB 102,15,56,223,209
+ xorps xmm2,xmm10
+ movaps xmm10,xmm11
+ jmp NEAR $L$cbc_dec_tail_collected
+ALIGN 16
+$L$cbc_dec_two:
+ movaps xmm12,xmm3
+ call _aesni_decrypt2
+ pxor xmm2,xmm10
+ movaps xmm10,xmm12
+ pxor xmm3,xmm11
+ movdqu XMMWORD[rsi],xmm2
+ movdqa xmm2,xmm3
+ pxor xmm3,xmm3
+ lea rsi,[16+rsi]
+ jmp NEAR $L$cbc_dec_tail_collected
+ALIGN 16
+$L$cbc_dec_three:
+ movaps xmm13,xmm4
+ call _aesni_decrypt3
+ pxor xmm2,xmm10
+ movaps xmm10,xmm13
+ pxor xmm3,xmm11
+ movdqu XMMWORD[rsi],xmm2
+ pxor xmm4,xmm12
+ movdqu XMMWORD[16+rsi],xmm3
+ pxor xmm3,xmm3
+ movdqa xmm2,xmm4
+ pxor xmm4,xmm4
+ lea rsi,[32+rsi]
+ jmp NEAR $L$cbc_dec_tail_collected
+ALIGN 16
+$L$cbc_dec_four:
+ movaps xmm14,xmm5
+ call _aesni_decrypt4
+ pxor xmm2,xmm10
+ movaps xmm10,xmm14
+ pxor xmm3,xmm11
+ movdqu XMMWORD[rsi],xmm2
+ pxor xmm4,xmm12
+ movdqu XMMWORD[16+rsi],xmm3
+ pxor xmm3,xmm3
+ pxor xmm5,xmm13
+ movdqu XMMWORD[32+rsi],xmm4
+ pxor xmm4,xmm4
+ movdqa xmm2,xmm5
+ pxor xmm5,xmm5
+ lea rsi,[48+rsi]
+ jmp NEAR $L$cbc_dec_tail_collected
+
+ALIGN 16
+$L$cbc_dec_clear_tail_collected:
+ pxor xmm3,xmm3
+ pxor xmm4,xmm4
+ pxor xmm5,xmm5
+$L$cbc_dec_tail_collected:
+ movups XMMWORD[r8],xmm10
+ and rdx,15
+ jnz NEAR $L$cbc_dec_tail_partial
+ movups XMMWORD[rsi],xmm2
+ pxor xmm2,xmm2
+ jmp NEAR $L$cbc_dec_ret
+ALIGN 16
+$L$cbc_dec_tail_partial:
+ movaps XMMWORD[rsp],xmm2
+ pxor xmm2,xmm2
+ mov rcx,16
+ mov rdi,rsi
+ sub rcx,rdx
+ lea rsi,[rsp]
+ DD 0x9066A4F3
+ movdqa XMMWORD[rsp],xmm2
+
+$L$cbc_dec_ret:
+ xorps xmm0,xmm0
+ pxor xmm1,xmm1
+ movaps xmm6,XMMWORD[16+rsp]
+ movaps XMMWORD[16+rsp],xmm0
+ movaps xmm7,XMMWORD[32+rsp]
+ movaps XMMWORD[32+rsp],xmm0
+ movaps xmm8,XMMWORD[48+rsp]
+ movaps XMMWORD[48+rsp],xmm0
+ movaps xmm9,XMMWORD[64+rsp]
+ movaps XMMWORD[64+rsp],xmm0
+ movaps xmm10,XMMWORD[80+rsp]
+ movaps XMMWORD[80+rsp],xmm0
+ movaps xmm11,XMMWORD[96+rsp]
+ movaps XMMWORD[96+rsp],xmm0
+ movaps xmm12,XMMWORD[112+rsp]
+ movaps XMMWORD[112+rsp],xmm0
+ movaps xmm13,XMMWORD[128+rsp]
+ movaps XMMWORD[128+rsp],xmm0
+ movaps xmm14,XMMWORD[144+rsp]
+ movaps XMMWORD[144+rsp],xmm0
+ movaps xmm15,XMMWORD[160+rsp]
+ movaps XMMWORD[160+rsp],xmm0
+ lea rsp,[rbp]
+ pop rbp
+$L$cbc_ret:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_cbc_encrypt:
+global aesni_set_decrypt_key
+
+ALIGN 16
+aesni_set_decrypt_key:
+DB 0x48,0x83,0xEC,0x08
+ call __aesni_set_encrypt_key
+ shl edx,4
+ test eax,eax
+ jnz NEAR $L$dec_key_ret
+ lea rcx,[16+rdx*1+r8]
+
+ movups xmm0,XMMWORD[r8]
+ movups xmm1,XMMWORD[rcx]
+ movups XMMWORD[rcx],xmm0
+ movups XMMWORD[r8],xmm1
+ lea r8,[16+r8]
+ lea rcx,[((-16))+rcx]
+
+$L$dec_key_inverse:
+ movups xmm0,XMMWORD[r8]
+ movups xmm1,XMMWORD[rcx]
+DB 102,15,56,219,192
+DB 102,15,56,219,201
+ lea r8,[16+r8]
+ lea rcx,[((-16))+rcx]
+ movups XMMWORD[16+rcx],xmm0
+ movups XMMWORD[(-16)+r8],xmm1
+ cmp rcx,r8
+ ja NEAR $L$dec_key_inverse
+
+ movups xmm0,XMMWORD[r8]
+DB 102,15,56,219,192
+ pxor xmm1,xmm1
+ movups XMMWORD[rcx],xmm0
+ pxor xmm0,xmm0
+$L$dec_key_ret:
+ add rsp,8
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_set_decrypt_key:
+
+global aesni_set_encrypt_key
+
+ALIGN 16
+aesni_set_encrypt_key:
+__aesni_set_encrypt_key:
+DB 0x48,0x83,0xEC,0x08
+ mov rax,-1
+ test rcx,rcx
+ jz NEAR $L$enc_key_ret
+ test r8,r8
+ jz NEAR $L$enc_key_ret
+
+ mov r10d,268437504
+ movups xmm0,XMMWORD[rcx]
+ xorps xmm4,xmm4
+ and r10d,DWORD[((OPENSSL_ia32cap_P+4))]
+ lea rax,[16+r8]
+ cmp edx,256
+ je NEAR $L$14rounds
+ cmp edx,192
+ je NEAR $L$12rounds
+ cmp edx,128
+ jne NEAR $L$bad_keybits
+
+$L$10rounds:
+ mov edx,9
+ cmp r10d,268435456
+ je NEAR $L$10rounds_alt
+
+ movups XMMWORD[r8],xmm0
+DB 102,15,58,223,200,1
+ call $L$key_expansion_128_cold
+DB 102,15,58,223,200,2
+ call $L$key_expansion_128
+DB 102,15,58,223,200,4
+ call $L$key_expansion_128
+DB 102,15,58,223,200,8
+ call $L$key_expansion_128
+DB 102,15,58,223,200,16
+ call $L$key_expansion_128
+DB 102,15,58,223,200,32
+ call $L$key_expansion_128
+DB 102,15,58,223,200,64
+ call $L$key_expansion_128
+DB 102,15,58,223,200,128
+ call $L$key_expansion_128
+DB 102,15,58,223,200,27
+ call $L$key_expansion_128
+DB 102,15,58,223,200,54
+ call $L$key_expansion_128
+ movups XMMWORD[rax],xmm0
+ mov DWORD[80+rax],edx
+ xor eax,eax
+ jmp NEAR $L$enc_key_ret
+
+ALIGN 16
+$L$10rounds_alt:
+ movdqa xmm5,XMMWORD[$L$key_rotate]
+ mov r10d,8
+ movdqa xmm4,XMMWORD[$L$key_rcon1]
+ movdqa xmm2,xmm0
+ movdqu XMMWORD[r8],xmm0
+ jmp NEAR $L$oop_key128
+
+ALIGN 16
+$L$oop_key128:
+DB 102,15,56,0,197
+DB 102,15,56,221,196
+ pslld xmm4,1
+ lea rax,[16+rax]
+
+ movdqa xmm3,xmm2
+ pslldq xmm2,4
+ pxor xmm3,xmm2
+ pslldq xmm2,4
+ pxor xmm3,xmm2
+ pslldq xmm2,4
+ pxor xmm2,xmm3
+
+ pxor xmm0,xmm2
+ movdqu XMMWORD[(-16)+rax],xmm0
+ movdqa xmm2,xmm0
+
+ dec r10d
+ jnz NEAR $L$oop_key128
+
+ movdqa xmm4,XMMWORD[$L$key_rcon1b]
+
+DB 102,15,56,0,197
+DB 102,15,56,221,196
+ pslld xmm4,1
+
+ movdqa xmm3,xmm2
+ pslldq xmm2,4
+ pxor xmm3,xmm2
+ pslldq xmm2,4
+ pxor xmm3,xmm2
+ pslldq xmm2,4
+ pxor xmm2,xmm3
+
+ pxor xmm0,xmm2
+ movdqu XMMWORD[rax],xmm0
+
+ movdqa xmm2,xmm0
+DB 102,15,56,0,197
+DB 102,15,56,221,196
+
+ movdqa xmm3,xmm2
+ pslldq xmm2,4
+ pxor xmm3,xmm2
+ pslldq xmm2,4
+ pxor xmm3,xmm2
+ pslldq xmm2,4
+ pxor xmm2,xmm3
+
+ pxor xmm0,xmm2
+ movdqu XMMWORD[16+rax],xmm0
+
+ mov DWORD[96+rax],edx
+ xor eax,eax
+ jmp NEAR $L$enc_key_ret
+
+ALIGN 16
+$L$12rounds:
+ movq xmm2,QWORD[16+rcx]
+ mov edx,11
+ cmp r10d,268435456
+ je NEAR $L$12rounds_alt
+
+ movups XMMWORD[r8],xmm0
+DB 102,15,58,223,202,1
+ call $L$key_expansion_192a_cold
+DB 102,15,58,223,202,2
+ call $L$key_expansion_192b
+DB 102,15,58,223,202,4
+ call $L$key_expansion_192a
+DB 102,15,58,223,202,8
+ call $L$key_expansion_192b
+DB 102,15,58,223,202,16
+ call $L$key_expansion_192a
+DB 102,15,58,223,202,32
+ call $L$key_expansion_192b
+DB 102,15,58,223,202,64
+ call $L$key_expansion_192a
+DB 102,15,58,223,202,128
+ call $L$key_expansion_192b
+ movups XMMWORD[rax],xmm0
+ mov DWORD[48+rax],edx
+ xor rax,rax
+ jmp NEAR $L$enc_key_ret
+
+ALIGN 16
+$L$12rounds_alt:
+ movdqa xmm5,XMMWORD[$L$key_rotate192]
+ movdqa xmm4,XMMWORD[$L$key_rcon1]
+ mov r10d,8
+ movdqu XMMWORD[r8],xmm0
+ jmp NEAR $L$oop_key192
+
+ALIGN 16
+$L$oop_key192:
+ movq QWORD[rax],xmm2
+ movdqa xmm1,xmm2
+DB 102,15,56,0,213
+DB 102,15,56,221,212
+ pslld xmm4,1
+ lea rax,[24+rax]
+
+ movdqa xmm3,xmm0
+ pslldq xmm0,4
+ pxor xmm3,xmm0
+ pslldq xmm0,4
+ pxor xmm3,xmm0
+ pslldq xmm0,4
+ pxor xmm0,xmm3
+
+ pshufd xmm3,xmm0,0xff
+ pxor xmm3,xmm1
+ pslldq xmm1,4
+ pxor xmm3,xmm1
+
+ pxor xmm0,xmm2
+ pxor xmm2,xmm3
+ movdqu XMMWORD[(-16)+rax],xmm0
+
+ dec r10d
+ jnz NEAR $L$oop_key192
+
+ mov DWORD[32+rax],edx
+ xor eax,eax
+ jmp NEAR $L$enc_key_ret
+
+ALIGN 16
+$L$14rounds:
+ movups xmm2,XMMWORD[16+rcx]
+ mov edx,13
+ lea rax,[16+rax]
+ cmp r10d,268435456
+ je NEAR $L$14rounds_alt
+
+ movups XMMWORD[r8],xmm0
+ movups XMMWORD[16+r8],xmm2
+DB 102,15,58,223,202,1
+ call $L$key_expansion_256a_cold
+DB 102,15,58,223,200,1
+ call $L$key_expansion_256b
+DB 102,15,58,223,202,2
+ call $L$key_expansion_256a
+DB 102,15,58,223,200,2
+ call $L$key_expansion_256b
+DB 102,15,58,223,202,4
+ call $L$key_expansion_256a
+DB 102,15,58,223,200,4
+ call $L$key_expansion_256b
+DB 102,15,58,223,202,8
+ call $L$key_expansion_256a
+DB 102,15,58,223,200,8
+ call $L$key_expansion_256b
+DB 102,15,58,223,202,16
+ call $L$key_expansion_256a
+DB 102,15,58,223,200,16
+ call $L$key_expansion_256b
+DB 102,15,58,223,202,32
+ call $L$key_expansion_256a
+DB 102,15,58,223,200,32
+ call $L$key_expansion_256b
+DB 102,15,58,223,202,64
+ call $L$key_expansion_256a
+ movups XMMWORD[rax],xmm0
+ mov DWORD[16+rax],edx
+ xor rax,rax
+ jmp NEAR $L$enc_key_ret
+
+ALIGN 16
+$L$14rounds_alt:
+ movdqa xmm5,XMMWORD[$L$key_rotate]
+ movdqa xmm4,XMMWORD[$L$key_rcon1]
+ mov r10d,7
+ movdqu XMMWORD[r8],xmm0
+ movdqa xmm1,xmm2
+ movdqu XMMWORD[16+r8],xmm2
+ jmp NEAR $L$oop_key256
+
+ALIGN 16
+$L$oop_key256:
+DB 102,15,56,0,213
+DB 102,15,56,221,212
+
+ movdqa xmm3,xmm0
+ pslldq xmm0,4
+ pxor xmm3,xmm0
+ pslldq xmm0,4
+ pxor xmm3,xmm0
+ pslldq xmm0,4
+ pxor xmm0,xmm3
+ pslld xmm4,1
+
+ pxor xmm0,xmm2
+ movdqu XMMWORD[rax],xmm0
+
+ dec r10d
+ jz NEAR $L$done_key256
+
+ pshufd xmm2,xmm0,0xff
+ pxor xmm3,xmm3
+DB 102,15,56,221,211
+
+ movdqa xmm3,xmm1
+ pslldq xmm1,4
+ pxor xmm3,xmm1
+ pslldq xmm1,4
+ pxor xmm3,xmm1
+ pslldq xmm1,4
+ pxor xmm1,xmm3
+
+ pxor xmm2,xmm1
+ movdqu XMMWORD[16+rax],xmm2
+ lea rax,[32+rax]
+ movdqa xmm1,xmm2
+
+ jmp NEAR $L$oop_key256
+
+$L$done_key256:
+ mov DWORD[16+rax],edx
+ xor eax,eax
+ jmp NEAR $L$enc_key_ret
+
+ALIGN 16
+$L$bad_keybits:
+ mov rax,-2
+$L$enc_key_ret:
+ pxor xmm0,xmm0
+ pxor xmm1,xmm1
+ pxor xmm2,xmm2
+ pxor xmm3,xmm3
+ pxor xmm4,xmm4
+ pxor xmm5,xmm5
+ add rsp,8
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_set_encrypt_key:
+
+ALIGN 16
+$L$key_expansion_128:
+ movups XMMWORD[rax],xmm0
+ lea rax,[16+rax]
+$L$key_expansion_128_cold:
+ shufps xmm4,xmm0,16
+ xorps xmm0,xmm4
+ shufps xmm4,xmm0,140
+ xorps xmm0,xmm4
+ shufps xmm1,xmm1,255
+ xorps xmm0,xmm1
+ DB 0F3h,0C3h ;repret
+
+ALIGN 16
+$L$key_expansion_192a:
+ movups XMMWORD[rax],xmm0
+ lea rax,[16+rax]
+$L$key_expansion_192a_cold:
+ movaps xmm5,xmm2
+$L$key_expansion_192b_warm:
+ shufps xmm4,xmm0,16
+ movdqa xmm3,xmm2
+ xorps xmm0,xmm4
+ shufps xmm4,xmm0,140
+ pslldq xmm3,4
+ xorps xmm0,xmm4
+ pshufd xmm1,xmm1,85
+ pxor xmm2,xmm3
+ pxor xmm0,xmm1
+ pshufd xmm3,xmm0,255
+ pxor xmm2,xmm3
+ DB 0F3h,0C3h ;repret
+
+ALIGN 16
+$L$key_expansion_192b:
+ movaps xmm3,xmm0
+ shufps xmm5,xmm0,68
+ movups XMMWORD[rax],xmm5
+ shufps xmm3,xmm2,78
+ movups XMMWORD[16+rax],xmm3
+ lea rax,[32+rax]
+ jmp NEAR $L$key_expansion_192b_warm
+
+ALIGN 16
+$L$key_expansion_256a:
+ movups XMMWORD[rax],xmm2
+ lea rax,[16+rax]
+$L$key_expansion_256a_cold:
+ shufps xmm4,xmm0,16
+ xorps xmm0,xmm4
+ shufps xmm4,xmm0,140
+ xorps xmm0,xmm4
+ shufps xmm1,xmm1,255
+ xorps xmm0,xmm1
+ DB 0F3h,0C3h ;repret
+
+ALIGN 16
+$L$key_expansion_256b:
+ movups XMMWORD[rax],xmm0
+ lea rax,[16+rax]
+
+ shufps xmm4,xmm2,16
+ xorps xmm2,xmm4
+ shufps xmm4,xmm2,140
+ xorps xmm2,xmm4
+ shufps xmm1,xmm1,170
+ xorps xmm2,xmm1
+ DB 0F3h,0C3h ;repret
+
+
+ALIGN 64
+$L$bswap_mask:
+DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+$L$increment32:
+ DD 6,6,6,0
+$L$increment64:
+ DD 1,0,0,0
+$L$xts_magic:
+ DD 0x87,0,1,0
+$L$increment1:
+DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
+$L$key_rotate:
+ DD 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
+$L$key_rotate192:
+ DD 0x04070605,0x04070605,0x04070605,0x04070605
+$L$key_rcon1:
+ DD 1,1,1,1
+$L$key_rcon1b:
+ DD 0x1b,0x1b,0x1b,0x1b
+
+DB 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
+DB 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
+DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
+DB 115,108,46,111,114,103,62,0
+ALIGN 64
+EXTERN __imp_RtlVirtualUnwind
+
+ALIGN 16
+ecb_ccm64_se_handler:
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD[120+r8]
+ mov rbx,QWORD[248+r8]
+
+ mov rsi,QWORD[8+r9]
+ mov r11,QWORD[56+r9]
+
+ mov r10d,DWORD[r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jb NEAR $L$common_seh_tail
+
+ mov rax,QWORD[152+r8]
+
+ mov r10d,DWORD[4+r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jae NEAR $L$common_seh_tail
+
+ lea rsi,[rax]
+ lea rdi,[512+r8]
+ mov ecx,8
+ DD 0xa548f3fc
+ lea rax,[88+rax]
+
+ jmp NEAR $L$common_seh_tail
+
+
+
+ALIGN 16
+ctr_xts_se_handler:
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD[120+r8]
+ mov rbx,QWORD[248+r8]
+
+ mov rsi,QWORD[8+r9]
+ mov r11,QWORD[56+r9]
+
+ mov r10d,DWORD[r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jb NEAR $L$common_seh_tail
+
+ mov rax,QWORD[152+r8]
+
+ mov r10d,DWORD[4+r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jae NEAR $L$common_seh_tail
+
+ mov rax,QWORD[160+r8]
+ lea rsi,[((-160))+rax]
+ lea rdi,[512+r8]
+ mov ecx,20
+ DD 0xa548f3fc
+
+ jmp NEAR $L$common_rbp_tail
+
+
+
+ALIGN 16
+ocb_se_handler:
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD[120+r8]
+ mov rbx,QWORD[248+r8]
+
+ mov rsi,QWORD[8+r9]
+ mov r11,QWORD[56+r9]
+
+ mov r10d,DWORD[r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jb NEAR $L$common_seh_tail
+
+ mov r10d,DWORD[4+r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jae NEAR $L$common_seh_tail
+
+ mov r10d,DWORD[8+r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jae NEAR $L$ocb_no_xmm
+
+ mov rax,QWORD[152+r8]
+
+ lea rsi,[rax]
+ lea rdi,[512+r8]
+ mov ecx,20
+ DD 0xa548f3fc
+ lea rax,[((160+40))+rax]
+
+$L$ocb_no_xmm:
+ mov rbx,QWORD[((-8))+rax]
+ mov rbp,QWORD[((-16))+rax]
+ mov r12,QWORD[((-24))+rax]
+ mov r13,QWORD[((-32))+rax]
+ mov r14,QWORD[((-40))+rax]
+
+ mov QWORD[144+r8],rbx
+ mov QWORD[160+r8],rbp
+ mov QWORD[216+r8],r12
+ mov QWORD[224+r8],r13
+ mov QWORD[232+r8],r14
+
+ jmp NEAR $L$common_seh_tail
+
+
+ALIGN 16
+cbc_se_handler:
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD[152+r8]
+ mov rbx,QWORD[248+r8]
+
+ lea r10,[$L$cbc_decrypt_bulk]
+ cmp rbx,r10
+ jb NEAR $L$common_seh_tail
+
+ lea r10,[$L$cbc_decrypt_body]
+ cmp rbx,r10
+ jb NEAR $L$restore_cbc_rax
+
+ lea r10,[$L$cbc_ret]
+ cmp rbx,r10
+ jae NEAR $L$common_seh_tail
+
+ lea rsi,[16+rax]
+ lea rdi,[512+r8]
+ mov ecx,20
+ DD 0xa548f3fc
+
+$L$common_rbp_tail:
+ mov rax,QWORD[160+r8]
+ mov rbp,QWORD[rax]
+ lea rax,[8+rax]
+ mov QWORD[160+r8],rbp
+ jmp NEAR $L$common_seh_tail
+
+$L$restore_cbc_rax:
+ mov rax,QWORD[120+r8]
+
+$L$common_seh_tail:
+ mov rdi,QWORD[8+rax]
+ mov rsi,QWORD[16+rax]
+ mov QWORD[152+r8],rax
+ mov QWORD[168+r8],rsi
+ mov QWORD[176+r8],rdi
+
+ mov rdi,QWORD[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0xa548f3fc
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD[8+rsi]
+ mov r8,QWORD[rsi]
+ mov r9,QWORD[16+rsi]
+ mov r10,QWORD[40+rsi]
+ lea r11,[56+rsi]
+ lea r12,[24+rsi]
+ mov QWORD[32+rsp],r10
+ mov QWORD[40+rsp],r11
+ mov QWORD[48+rsp],r12
+ mov QWORD[56+rsp],rcx
+ call QWORD[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+
+
+section .pdata rdata align=4
+ALIGN 4
+ DD $L$SEH_begin_aesni_ecb_encrypt wrt ..imagebase
+ DD $L$SEH_end_aesni_ecb_encrypt wrt ..imagebase
+ DD $L$SEH_info_ecb wrt ..imagebase
+
+ DD $L$SEH_begin_aesni_ccm64_encrypt_blocks wrt ..imagebase
+ DD $L$SEH_end_aesni_ccm64_encrypt_blocks wrt ..imagebase
+ DD $L$SEH_info_ccm64_enc wrt ..imagebase
+
+ DD $L$SEH_begin_aesni_ccm64_decrypt_blocks wrt ..imagebase
+ DD $L$SEH_end_aesni_ccm64_decrypt_blocks wrt ..imagebase
+ DD $L$SEH_info_ccm64_dec wrt ..imagebase
+
+ DD $L$SEH_begin_aesni_ctr32_encrypt_blocks wrt ..imagebase
+ DD $L$SEH_end_aesni_ctr32_encrypt_blocks wrt ..imagebase
+ DD $L$SEH_info_ctr32 wrt ..imagebase
+
+ DD $L$SEH_begin_aesni_xts_encrypt wrt ..imagebase
+ DD $L$SEH_end_aesni_xts_encrypt wrt ..imagebase
+ DD $L$SEH_info_xts_enc wrt ..imagebase
+
+ DD $L$SEH_begin_aesni_xts_decrypt wrt ..imagebase
+ DD $L$SEH_end_aesni_xts_decrypt wrt ..imagebase
+ DD $L$SEH_info_xts_dec wrt ..imagebase
+
+ DD $L$SEH_begin_aesni_ocb_encrypt wrt ..imagebase
+ DD $L$SEH_end_aesni_ocb_encrypt wrt ..imagebase
+ DD $L$SEH_info_ocb_enc wrt ..imagebase
+
+ DD $L$SEH_begin_aesni_ocb_decrypt wrt ..imagebase
+ DD $L$SEH_end_aesni_ocb_decrypt wrt ..imagebase
+ DD $L$SEH_info_ocb_dec wrt ..imagebase
+ DD $L$SEH_begin_aesni_cbc_encrypt wrt ..imagebase
+ DD $L$SEH_end_aesni_cbc_encrypt wrt ..imagebase
+ DD $L$SEH_info_cbc wrt ..imagebase
+
+ DD aesni_set_decrypt_key wrt ..imagebase
+ DD $L$SEH_end_set_decrypt_key wrt ..imagebase
+ DD $L$SEH_info_key wrt ..imagebase
+
+ DD aesni_set_encrypt_key wrt ..imagebase
+ DD $L$SEH_end_set_encrypt_key wrt ..imagebase
+ DD $L$SEH_info_key wrt ..imagebase
+section .xdata rdata align=8
+ALIGN 8
+$L$SEH_info_ecb:
+DB 9,0,0,0
+ DD ecb_ccm64_se_handler wrt ..imagebase
+ DD $L$ecb_enc_body wrt ..imagebase,$L$ecb_enc_ret wrt ..imagebase
+$L$SEH_info_ccm64_enc:
+DB 9,0,0,0
+ DD ecb_ccm64_se_handler wrt ..imagebase
+ DD $L$ccm64_enc_body wrt ..imagebase,$L$ccm64_enc_ret wrt ..imagebase
+$L$SEH_info_ccm64_dec:
+DB 9,0,0,0
+ DD ecb_ccm64_se_handler wrt ..imagebase
+ DD $L$ccm64_dec_body wrt ..imagebase,$L$ccm64_dec_ret wrt ..imagebase
+$L$SEH_info_ctr32:
+DB 9,0,0,0
+ DD ctr_xts_se_handler wrt ..imagebase
+ DD $L$ctr32_body wrt ..imagebase,$L$ctr32_epilogue wrt ..imagebase
+$L$SEH_info_xts_enc:
+DB 9,0,0,0
+ DD ctr_xts_se_handler wrt ..imagebase
+ DD $L$xts_enc_body wrt ..imagebase,$L$xts_enc_epilogue wrt ..imagebase
+$L$SEH_info_xts_dec:
+DB 9,0,0,0
+ DD ctr_xts_se_handler wrt ..imagebase
+ DD $L$xts_dec_body wrt ..imagebase,$L$xts_dec_epilogue wrt ..imagebase
+$L$SEH_info_ocb_enc:
+DB 9,0,0,0
+ DD ocb_se_handler wrt ..imagebase
+ DD $L$ocb_enc_body wrt ..imagebase,$L$ocb_enc_epilogue wrt ..imagebase
+ DD $L$ocb_enc_pop wrt ..imagebase
+ DD 0
+$L$SEH_info_ocb_dec:
+DB 9,0,0,0
+ DD ocb_se_handler wrt ..imagebase
+ DD $L$ocb_dec_body wrt ..imagebase,$L$ocb_dec_epilogue wrt ..imagebase
+ DD $L$ocb_dec_pop wrt ..imagebase
+ DD 0
+$L$SEH_info_cbc:
+DB 9,0,0,0
+ DD cbc_se_handler wrt ..imagebase
+$L$SEH_info_key:
+DB 0x01,0x04,0x01,0x00
+DB 0x04,0x02,0x00,0x00