summaryrefslogtreecommitdiff
path: root/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn
diff options
context:
space:
mode:
Diffstat (limited to 'deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn')
-rw-r--r--deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/bn-586.asm1515
-rw-r--r--deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/co-586.asm1252
-rw-r--r--deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/x86-gf2m.asm345
-rw-r--r--deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/x86-mont.asm479
4 files changed, 3591 insertions, 0 deletions
diff --git a/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/bn-586.asm b/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/bn-586.asm
new file mode 100644
index 0000000000..82002b353b
--- /dev/null
+++ b/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/bn-586.asm
@@ -0,0 +1,1515 @@
+%ifidn __OUTPUT_FORMAT__,obj
+section code use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+$@feat.00 equ 1
+section .text code align=64
+%else
+section .text code
+%endif
+;extern _OPENSSL_ia32cap_P
+global _bn_mul_add_words
+align 16
+_bn_mul_add_words:
+L$_bn_mul_add_words_begin:
+ lea eax,[_OPENSSL_ia32cap_P]
+ bt DWORD [eax],26
+ jnc NEAR L$000maw_non_sse2
+ mov eax,DWORD [4+esp]
+ mov edx,DWORD [8+esp]
+ mov ecx,DWORD [12+esp]
+ movd mm0,DWORD [16+esp]
+ pxor mm1,mm1
+ jmp NEAR L$001maw_sse2_entry
+align 16
+L$002maw_sse2_unrolled:
+ movd mm3,DWORD [eax]
+ paddq mm1,mm3
+ movd mm2,DWORD [edx]
+ pmuludq mm2,mm0
+ movd mm4,DWORD [4+edx]
+ pmuludq mm4,mm0
+ movd mm6,DWORD [8+edx]
+ pmuludq mm6,mm0
+ movd mm7,DWORD [12+edx]
+ pmuludq mm7,mm0
+ paddq mm1,mm2
+ movd mm3,DWORD [4+eax]
+ paddq mm3,mm4
+ movd mm5,DWORD [8+eax]
+ paddq mm5,mm6
+ movd mm4,DWORD [12+eax]
+ paddq mm7,mm4
+ movd DWORD [eax],mm1
+ movd mm2,DWORD [16+edx]
+ pmuludq mm2,mm0
+ psrlq mm1,32
+ movd mm4,DWORD [20+edx]
+ pmuludq mm4,mm0
+ paddq mm1,mm3
+ movd mm6,DWORD [24+edx]
+ pmuludq mm6,mm0
+ movd DWORD [4+eax],mm1
+ psrlq mm1,32
+ movd mm3,DWORD [28+edx]
+ add edx,32
+ pmuludq mm3,mm0
+ paddq mm1,mm5
+ movd mm5,DWORD [16+eax]
+ paddq mm2,mm5
+ movd DWORD [8+eax],mm1
+ psrlq mm1,32
+ paddq mm1,mm7
+ movd mm5,DWORD [20+eax]
+ paddq mm4,mm5
+ movd DWORD [12+eax],mm1
+ psrlq mm1,32
+ paddq mm1,mm2
+ movd mm5,DWORD [24+eax]
+ paddq mm6,mm5
+ movd DWORD [16+eax],mm1
+ psrlq mm1,32
+ paddq mm1,mm4
+ movd mm5,DWORD [28+eax]
+ paddq mm3,mm5
+ movd DWORD [20+eax],mm1
+ psrlq mm1,32
+ paddq mm1,mm6
+ movd DWORD [24+eax],mm1
+ psrlq mm1,32
+ paddq mm1,mm3
+ movd DWORD [28+eax],mm1
+ lea eax,[32+eax]
+ psrlq mm1,32
+ sub ecx,8
+ jz NEAR L$003maw_sse2_exit
+L$001maw_sse2_entry:
+ test ecx,4294967288
+ jnz NEAR L$002maw_sse2_unrolled
+align 4
+L$004maw_sse2_loop:
+ movd mm2,DWORD [edx]
+ movd mm3,DWORD [eax]
+ pmuludq mm2,mm0
+ lea edx,[4+edx]
+ paddq mm1,mm3
+ paddq mm1,mm2
+ movd DWORD [eax],mm1
+ sub ecx,1
+ psrlq mm1,32
+ lea eax,[4+eax]
+ jnz NEAR L$004maw_sse2_loop
+L$003maw_sse2_exit:
+ movd eax,mm1
+ emms
+ ret
+align 16
+L$000maw_non_sse2:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ ;
+ xor esi,esi
+ mov edi,DWORD [20+esp]
+ mov ecx,DWORD [28+esp]
+ mov ebx,DWORD [24+esp]
+ and ecx,4294967288
+ mov ebp,DWORD [32+esp]
+ push ecx
+ jz NEAR L$005maw_finish
+align 16
+L$006maw_loop:
+ ; Round 0
+ mov eax,DWORD [ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [edi]
+ adc edx,0
+ mov DWORD [edi],eax
+ mov esi,edx
+ ; Round 4
+ mov eax,DWORD [4+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [4+edi]
+ adc edx,0
+ mov DWORD [4+edi],eax
+ mov esi,edx
+ ; Round 8
+ mov eax,DWORD [8+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [8+edi]
+ adc edx,0
+ mov DWORD [8+edi],eax
+ mov esi,edx
+ ; Round 12
+ mov eax,DWORD [12+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [12+edi]
+ adc edx,0
+ mov DWORD [12+edi],eax
+ mov esi,edx
+ ; Round 16
+ mov eax,DWORD [16+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [16+edi]
+ adc edx,0
+ mov DWORD [16+edi],eax
+ mov esi,edx
+ ; Round 20
+ mov eax,DWORD [20+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [20+edi]
+ adc edx,0
+ mov DWORD [20+edi],eax
+ mov esi,edx
+ ; Round 24
+ mov eax,DWORD [24+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [24+edi]
+ adc edx,0
+ mov DWORD [24+edi],eax
+ mov esi,edx
+ ; Round 28
+ mov eax,DWORD [28+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [28+edi]
+ adc edx,0
+ mov DWORD [28+edi],eax
+ mov esi,edx
+ ;
+ sub ecx,8
+ lea ebx,[32+ebx]
+ lea edi,[32+edi]
+ jnz NEAR L$006maw_loop
+L$005maw_finish:
+ mov ecx,DWORD [32+esp]
+ and ecx,7
+ jnz NEAR L$007maw_finish2
+ jmp NEAR L$008maw_end
+L$007maw_finish2:
+ ; Tail Round 0
+ mov eax,DWORD [ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [edi]
+ adc edx,0
+ dec ecx
+ mov DWORD [edi],eax
+ mov esi,edx
+ jz NEAR L$008maw_end
+ ; Tail Round 1
+ mov eax,DWORD [4+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [4+edi]
+ adc edx,0
+ dec ecx
+ mov DWORD [4+edi],eax
+ mov esi,edx
+ jz NEAR L$008maw_end
+ ; Tail Round 2
+ mov eax,DWORD [8+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [8+edi]
+ adc edx,0
+ dec ecx
+ mov DWORD [8+edi],eax
+ mov esi,edx
+ jz NEAR L$008maw_end
+ ; Tail Round 3
+ mov eax,DWORD [12+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [12+edi]
+ adc edx,0
+ dec ecx
+ mov DWORD [12+edi],eax
+ mov esi,edx
+ jz NEAR L$008maw_end
+ ; Tail Round 4
+ mov eax,DWORD [16+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [16+edi]
+ adc edx,0
+ dec ecx
+ mov DWORD [16+edi],eax
+ mov esi,edx
+ jz NEAR L$008maw_end
+ ; Tail Round 5
+ mov eax,DWORD [20+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [20+edi]
+ adc edx,0
+ dec ecx
+ mov DWORD [20+edi],eax
+ mov esi,edx
+ jz NEAR L$008maw_end
+ ; Tail Round 6
+ mov eax,DWORD [24+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [24+edi]
+ adc edx,0
+ mov DWORD [24+edi],eax
+ mov esi,edx
+L$008maw_end:
+ mov eax,esi
+ pop ecx
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _bn_mul_words
+align 16
+_bn_mul_words:
+L$_bn_mul_words_begin:
+ lea eax,[_OPENSSL_ia32cap_P]
+ bt DWORD [eax],26
+ jnc NEAR L$009mw_non_sse2
+ mov eax,DWORD [4+esp]
+ mov edx,DWORD [8+esp]
+ mov ecx,DWORD [12+esp]
+ movd mm0,DWORD [16+esp]
+ pxor mm1,mm1
+align 16
+L$010mw_sse2_loop:
+ movd mm2,DWORD [edx]
+ pmuludq mm2,mm0
+ lea edx,[4+edx]
+ paddq mm1,mm2
+ movd DWORD [eax],mm1
+ sub ecx,1
+ psrlq mm1,32
+ lea eax,[4+eax]
+ jnz NEAR L$010mw_sse2_loop
+ movd eax,mm1
+ emms
+ ret
+align 16
+L$009mw_non_sse2:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ ;
+ xor esi,esi
+ mov edi,DWORD [20+esp]
+ mov ebx,DWORD [24+esp]
+ mov ebp,DWORD [28+esp]
+ mov ecx,DWORD [32+esp]
+ and ebp,4294967288
+ jz NEAR L$011mw_finish
+L$012mw_loop:
+ ; Round 0
+ mov eax,DWORD [ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [edi],eax
+ mov esi,edx
+ ; Round 4
+ mov eax,DWORD [4+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [4+edi],eax
+ mov esi,edx
+ ; Round 8
+ mov eax,DWORD [8+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [8+edi],eax
+ mov esi,edx
+ ; Round 12
+ mov eax,DWORD [12+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [12+edi],eax
+ mov esi,edx
+ ; Round 16
+ mov eax,DWORD [16+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [16+edi],eax
+ mov esi,edx
+ ; Round 20
+ mov eax,DWORD [20+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [20+edi],eax
+ mov esi,edx
+ ; Round 24
+ mov eax,DWORD [24+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [24+edi],eax
+ mov esi,edx
+ ; Round 28
+ mov eax,DWORD [28+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [28+edi],eax
+ mov esi,edx
+ ;
+ add ebx,32
+ add edi,32
+ sub ebp,8
+ jz NEAR L$011mw_finish
+ jmp NEAR L$012mw_loop
+L$011mw_finish:
+ mov ebp,DWORD [28+esp]
+ and ebp,7
+ jnz NEAR L$013mw_finish2
+ jmp NEAR L$014mw_end
+L$013mw_finish2:
+ ; Tail Round 0
+ mov eax,DWORD [ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [edi],eax
+ mov esi,edx
+ dec ebp
+ jz NEAR L$014mw_end
+ ; Tail Round 1
+ mov eax,DWORD [4+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [4+edi],eax
+ mov esi,edx
+ dec ebp
+ jz NEAR L$014mw_end
+ ; Tail Round 2
+ mov eax,DWORD [8+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [8+edi],eax
+ mov esi,edx
+ dec ebp
+ jz NEAR L$014mw_end
+ ; Tail Round 3
+ mov eax,DWORD [12+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [12+edi],eax
+ mov esi,edx
+ dec ebp
+ jz NEAR L$014mw_end
+ ; Tail Round 4
+ mov eax,DWORD [16+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [16+edi],eax
+ mov esi,edx
+ dec ebp
+ jz NEAR L$014mw_end
+ ; Tail Round 5
+ mov eax,DWORD [20+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [20+edi],eax
+ mov esi,edx
+ dec ebp
+ jz NEAR L$014mw_end
+ ; Tail Round 6
+ mov eax,DWORD [24+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [24+edi],eax
+ mov esi,edx
+L$014mw_end:
+ mov eax,esi
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _bn_sqr_words
+align 16
+_bn_sqr_words:
+L$_bn_sqr_words_begin:
+ lea eax,[_OPENSSL_ia32cap_P]
+ bt DWORD [eax],26
+ jnc NEAR L$015sqr_non_sse2
+ mov eax,DWORD [4+esp]
+ mov edx,DWORD [8+esp]
+ mov ecx,DWORD [12+esp]
+align 16
+L$016sqr_sse2_loop:
+ movd mm0,DWORD [edx]
+ pmuludq mm0,mm0
+ lea edx,[4+edx]
+ movq [eax],mm0
+ sub ecx,1
+ lea eax,[8+eax]
+ jnz NEAR L$016sqr_sse2_loop
+ emms
+ ret
+align 16
+L$015sqr_non_sse2:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ ;
+ mov esi,DWORD [20+esp]
+ mov edi,DWORD [24+esp]
+ mov ebx,DWORD [28+esp]
+ and ebx,4294967288
+ jz NEAR L$017sw_finish
+L$018sw_loop:
+ ; Round 0
+ mov eax,DWORD [edi]
+ mul eax
+ mov DWORD [esi],eax
+ mov DWORD [4+esi],edx
+ ; Round 4
+ mov eax,DWORD [4+edi]
+ mul eax
+ mov DWORD [8+esi],eax
+ mov DWORD [12+esi],edx
+ ; Round 8
+ mov eax,DWORD [8+edi]
+ mul eax
+ mov DWORD [16+esi],eax
+ mov DWORD [20+esi],edx
+ ; Round 12
+ mov eax,DWORD [12+edi]
+ mul eax
+ mov DWORD [24+esi],eax
+ mov DWORD [28+esi],edx
+ ; Round 16
+ mov eax,DWORD [16+edi]
+ mul eax
+ mov DWORD [32+esi],eax
+ mov DWORD [36+esi],edx
+ ; Round 20
+ mov eax,DWORD [20+edi]
+ mul eax
+ mov DWORD [40+esi],eax
+ mov DWORD [44+esi],edx
+ ; Round 24
+ mov eax,DWORD [24+edi]
+ mul eax
+ mov DWORD [48+esi],eax
+ mov DWORD [52+esi],edx
+ ; Round 28
+ mov eax,DWORD [28+edi]
+ mul eax
+ mov DWORD [56+esi],eax
+ mov DWORD [60+esi],edx
+ ;
+ add edi,32
+ add esi,64
+ sub ebx,8
+ jnz NEAR L$018sw_loop
+L$017sw_finish:
+ mov ebx,DWORD [28+esp]
+ and ebx,7
+ jz NEAR L$019sw_end
+ ; Tail Round 0
+ mov eax,DWORD [edi]
+ mul eax
+ mov DWORD [esi],eax
+ dec ebx
+ mov DWORD [4+esi],edx
+ jz NEAR L$019sw_end
+ ; Tail Round 1
+ mov eax,DWORD [4+edi]
+ mul eax
+ mov DWORD [8+esi],eax
+ dec ebx
+ mov DWORD [12+esi],edx
+ jz NEAR L$019sw_end
+ ; Tail Round 2
+ mov eax,DWORD [8+edi]
+ mul eax
+ mov DWORD [16+esi],eax
+ dec ebx
+ mov DWORD [20+esi],edx
+ jz NEAR L$019sw_end
+ ; Tail Round 3
+ mov eax,DWORD [12+edi]
+ mul eax
+ mov DWORD [24+esi],eax
+ dec ebx
+ mov DWORD [28+esi],edx
+ jz NEAR L$019sw_end
+ ; Tail Round 4
+ mov eax,DWORD [16+edi]
+ mul eax
+ mov DWORD [32+esi],eax
+ dec ebx
+ mov DWORD [36+esi],edx
+ jz NEAR L$019sw_end
+ ; Tail Round 5
+ mov eax,DWORD [20+edi]
+ mul eax
+ mov DWORD [40+esi],eax
+ dec ebx
+ mov DWORD [44+esi],edx
+ jz NEAR L$019sw_end
+ ; Tail Round 6
+ mov eax,DWORD [24+edi]
+ mul eax
+ mov DWORD [48+esi],eax
+ mov DWORD [52+esi],edx
+L$019sw_end:
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _bn_div_words
+align 16
+_bn_div_words:
+L$_bn_div_words_begin:
+ mov edx,DWORD [4+esp]
+ mov eax,DWORD [8+esp]
+ mov ecx,DWORD [12+esp]
+ div ecx
+ ret
+global _bn_add_words
+align 16
+_bn_add_words:
+L$_bn_add_words_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ ;
+ mov ebx,DWORD [20+esp]
+ mov esi,DWORD [24+esp]
+ mov edi,DWORD [28+esp]
+ mov ebp,DWORD [32+esp]
+ xor eax,eax
+ and ebp,4294967288
+ jz NEAR L$020aw_finish
+L$021aw_loop:
+ ; Round 0
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ ; Round 1
+ mov ecx,DWORD [4+esi]
+ mov edx,DWORD [4+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ mov DWORD [4+ebx],ecx
+ ; Round 2
+ mov ecx,DWORD [8+esi]
+ mov edx,DWORD [8+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ mov DWORD [8+ebx],ecx
+ ; Round 3
+ mov ecx,DWORD [12+esi]
+ mov edx,DWORD [12+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ mov DWORD [12+ebx],ecx
+ ; Round 4
+ mov ecx,DWORD [16+esi]
+ mov edx,DWORD [16+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ mov DWORD [16+ebx],ecx
+ ; Round 5
+ mov ecx,DWORD [20+esi]
+ mov edx,DWORD [20+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ mov DWORD [20+ebx],ecx
+ ; Round 6
+ mov ecx,DWORD [24+esi]
+ mov edx,DWORD [24+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ mov DWORD [24+ebx],ecx
+ ; Round 7
+ mov ecx,DWORD [28+esi]
+ mov edx,DWORD [28+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ mov DWORD [28+ebx],ecx
+ ;
+ add esi,32
+ add edi,32
+ add ebx,32
+ sub ebp,8
+ jnz NEAR L$021aw_loop
+L$020aw_finish:
+ mov ebp,DWORD [32+esp]
+ and ebp,7
+ jz NEAR L$022aw_end
+ ; Tail Round 0
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [ebx],ecx
+ jz NEAR L$022aw_end
+ ; Tail Round 1
+ mov ecx,DWORD [4+esi]
+ mov edx,DWORD [4+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [4+ebx],ecx
+ jz NEAR L$022aw_end
+ ; Tail Round 2
+ mov ecx,DWORD [8+esi]
+ mov edx,DWORD [8+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [8+ebx],ecx
+ jz NEAR L$022aw_end
+ ; Tail Round 3
+ mov ecx,DWORD [12+esi]
+ mov edx,DWORD [12+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [12+ebx],ecx
+ jz NEAR L$022aw_end
+ ; Tail Round 4
+ mov ecx,DWORD [16+esi]
+ mov edx,DWORD [16+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [16+ebx],ecx
+ jz NEAR L$022aw_end
+ ; Tail Round 5
+ mov ecx,DWORD [20+esi]
+ mov edx,DWORD [20+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [20+ebx],ecx
+ jz NEAR L$022aw_end
+ ; Tail Round 6
+ mov ecx,DWORD [24+esi]
+ mov edx,DWORD [24+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ mov DWORD [24+ebx],ecx
+L$022aw_end:
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _bn_sub_words
+align 16
+_bn_sub_words:
+L$_bn_sub_words_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ ;
+ mov ebx,DWORD [20+esp]
+ mov esi,DWORD [24+esp]
+ mov edi,DWORD [28+esp]
+ mov ebp,DWORD [32+esp]
+ xor eax,eax
+ and ebp,4294967288
+ jz NEAR L$023aw_finish
+L$024aw_loop:
+ ; Round 0
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ ; Round 1
+ mov ecx,DWORD [4+esi]
+ mov edx,DWORD [4+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [4+ebx],ecx
+ ; Round 2
+ mov ecx,DWORD [8+esi]
+ mov edx,DWORD [8+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [8+ebx],ecx
+ ; Round 3
+ mov ecx,DWORD [12+esi]
+ mov edx,DWORD [12+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [12+ebx],ecx
+ ; Round 4
+ mov ecx,DWORD [16+esi]
+ mov edx,DWORD [16+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [16+ebx],ecx
+ ; Round 5
+ mov ecx,DWORD [20+esi]
+ mov edx,DWORD [20+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [20+ebx],ecx
+ ; Round 6
+ mov ecx,DWORD [24+esi]
+ mov edx,DWORD [24+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [24+ebx],ecx
+ ; Round 7
+ mov ecx,DWORD [28+esi]
+ mov edx,DWORD [28+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [28+ebx],ecx
+ ;
+ add esi,32
+ add edi,32
+ add ebx,32
+ sub ebp,8
+ jnz NEAR L$024aw_loop
+L$023aw_finish:
+ mov ebp,DWORD [32+esp]
+ and ebp,7
+ jz NEAR L$025aw_end
+ ; Tail Round 0
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [ebx],ecx
+ jz NEAR L$025aw_end
+ ; Tail Round 1
+ mov ecx,DWORD [4+esi]
+ mov edx,DWORD [4+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [4+ebx],ecx
+ jz NEAR L$025aw_end
+ ; Tail Round 2
+ mov ecx,DWORD [8+esi]
+ mov edx,DWORD [8+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [8+ebx],ecx
+ jz NEAR L$025aw_end
+ ; Tail Round 3
+ mov ecx,DWORD [12+esi]
+ mov edx,DWORD [12+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [12+ebx],ecx
+ jz NEAR L$025aw_end
+ ; Tail Round 4
+ mov ecx,DWORD [16+esi]
+ mov edx,DWORD [16+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [16+ebx],ecx
+ jz NEAR L$025aw_end
+ ; Tail Round 5
+ mov ecx,DWORD [20+esi]
+ mov edx,DWORD [20+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [20+ebx],ecx
+ jz NEAR L$025aw_end
+ ; Tail Round 6
+ mov ecx,DWORD [24+esi]
+ mov edx,DWORD [24+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [24+ebx],ecx
+L$025aw_end:
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _bn_sub_part_words
+align 16
+_bn_sub_part_words:
+L$_bn_sub_part_words_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ ;
+ mov ebx,DWORD [20+esp]
+ mov esi,DWORD [24+esp]
+ mov edi,DWORD [28+esp]
+ mov ebp,DWORD [32+esp]
+ xor eax,eax
+ and ebp,4294967288
+ jz NEAR L$026aw_finish
+L$027aw_loop:
+ ; Round 0
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ ; Round 1
+ mov ecx,DWORD [4+esi]
+ mov edx,DWORD [4+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [4+ebx],ecx
+ ; Round 2
+ mov ecx,DWORD [8+esi]
+ mov edx,DWORD [8+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [8+ebx],ecx
+ ; Round 3
+ mov ecx,DWORD [12+esi]
+ mov edx,DWORD [12+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [12+ebx],ecx
+ ; Round 4
+ mov ecx,DWORD [16+esi]
+ mov edx,DWORD [16+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [16+ebx],ecx
+ ; Round 5
+ mov ecx,DWORD [20+esi]
+ mov edx,DWORD [20+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [20+ebx],ecx
+ ; Round 6
+ mov ecx,DWORD [24+esi]
+ mov edx,DWORD [24+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [24+ebx],ecx
+ ; Round 7
+ mov ecx,DWORD [28+esi]
+ mov edx,DWORD [28+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [28+ebx],ecx
+ ;
+ add esi,32
+ add edi,32
+ add ebx,32
+ sub ebp,8
+ jnz NEAR L$027aw_loop
+L$026aw_finish:
+ mov ebp,DWORD [32+esp]
+ and ebp,7
+ jz NEAR L$028aw_end
+ ; Tail Round 0
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ add esi,4
+ add edi,4
+ add ebx,4
+ dec ebp
+ jz NEAR L$028aw_end
+ ; Tail Round 1
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ add esi,4
+ add edi,4
+ add ebx,4
+ dec ebp
+ jz NEAR L$028aw_end
+ ; Tail Round 2
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ add esi,4
+ add edi,4
+ add ebx,4
+ dec ebp
+ jz NEAR L$028aw_end
+ ; Tail Round 3
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ add esi,4
+ add edi,4
+ add ebx,4
+ dec ebp
+ jz NEAR L$028aw_end
+ ; Tail Round 4
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ add esi,4
+ add edi,4
+ add ebx,4
+ dec ebp
+ jz NEAR L$028aw_end
+ ; Tail Round 5
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ add esi,4
+ add edi,4
+ add ebx,4
+ dec ebp
+ jz NEAR L$028aw_end
+ ; Tail Round 6
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ add esi,4
+ add edi,4
+ add ebx,4
+L$028aw_end:
+ cmp DWORD [36+esp],0
+ je NEAR L$029pw_end
+ mov ebp,DWORD [36+esp]
+ cmp ebp,0
+ je NEAR L$029pw_end
+ jge NEAR L$030pw_pos
+ ; pw_neg
+ mov edx,0
+ sub edx,ebp
+ mov ebp,edx
+ and ebp,4294967288
+ jz NEAR L$031pw_neg_finish
+L$032pw_neg_loop:
+ ; dl<0 Round 0
+ mov ecx,0
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ ; dl<0 Round 1
+ mov ecx,0
+ mov edx,DWORD [4+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [4+ebx],ecx
+ ; dl<0 Round 2
+ mov ecx,0
+ mov edx,DWORD [8+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [8+ebx],ecx
+ ; dl<0 Round 3
+ mov ecx,0
+ mov edx,DWORD [12+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [12+ebx],ecx
+ ; dl<0 Round 4
+ mov ecx,0
+ mov edx,DWORD [16+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [16+ebx],ecx
+ ; dl<0 Round 5
+ mov ecx,0
+ mov edx,DWORD [20+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [20+ebx],ecx
+ ; dl<0 Round 6
+ mov ecx,0
+ mov edx,DWORD [24+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [24+ebx],ecx
+ ; dl<0 Round 7
+ mov ecx,0
+ mov edx,DWORD [28+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [28+ebx],ecx
+ ;
+ add edi,32
+ add ebx,32
+ sub ebp,8
+ jnz NEAR L$032pw_neg_loop
+L$031pw_neg_finish:
+ mov edx,DWORD [36+esp]
+ mov ebp,0
+ sub ebp,edx
+ and ebp,7
+ jz NEAR L$029pw_end
+ ; dl<0 Tail Round 0
+ mov ecx,0
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [ebx],ecx
+ jz NEAR L$029pw_end
+ ; dl<0 Tail Round 1
+ mov ecx,0
+ mov edx,DWORD [4+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [4+ebx],ecx
+ jz NEAR L$029pw_end
+ ; dl<0 Tail Round 2
+ mov ecx,0
+ mov edx,DWORD [8+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [8+ebx],ecx
+ jz NEAR L$029pw_end
+ ; dl<0 Tail Round 3
+ mov ecx,0
+ mov edx,DWORD [12+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [12+ebx],ecx
+ jz NEAR L$029pw_end
+ ; dl<0 Tail Round 4
+ mov ecx,0
+ mov edx,DWORD [16+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [16+ebx],ecx
+ jz NEAR L$029pw_end
+ ; dl<0 Tail Round 5
+ mov ecx,0
+ mov edx,DWORD [20+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [20+ebx],ecx
+ jz NEAR L$029pw_end
+ ; dl<0 Tail Round 6
+ mov ecx,0
+ mov edx,DWORD [24+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [24+ebx],ecx
+ jmp NEAR L$029pw_end
+L$030pw_pos:
+ and ebp,4294967288
+ jz NEAR L$033pw_pos_finish
+L$034pw_pos_loop:
+ ; dl>0 Round 0
+ mov ecx,DWORD [esi]
+ sub ecx,eax
+ mov DWORD [ebx],ecx
+ jnc NEAR L$035pw_nc0
+ ; dl>0 Round 1
+ mov ecx,DWORD [4+esi]
+ sub ecx,eax
+ mov DWORD [4+ebx],ecx
+ jnc NEAR L$036pw_nc1
+ ; dl>0 Round 2
+ mov ecx,DWORD [8+esi]
+ sub ecx,eax
+ mov DWORD [8+ebx],ecx
+ jnc NEAR L$037pw_nc2
+ ; dl>0 Round 3
+ mov ecx,DWORD [12+esi]
+ sub ecx,eax
+ mov DWORD [12+ebx],ecx
+ jnc NEAR L$038pw_nc3
+ ; dl>0 Round 4
+ mov ecx,DWORD [16+esi]
+ sub ecx,eax
+ mov DWORD [16+ebx],ecx
+ jnc NEAR L$039pw_nc4
+ ; dl>0 Round 5
+ mov ecx,DWORD [20+esi]
+ sub ecx,eax
+ mov DWORD [20+ebx],ecx
+ jnc NEAR L$040pw_nc5
+ ; dl>0 Round 6
+ mov ecx,DWORD [24+esi]
+ sub ecx,eax
+ mov DWORD [24+ebx],ecx
+ jnc NEAR L$041pw_nc6
+ ; dl>0 Round 7
+ mov ecx,DWORD [28+esi]
+ sub ecx,eax
+ mov DWORD [28+ebx],ecx
+ jnc NEAR L$042pw_nc7
+ ;
+ add esi,32
+ add ebx,32
+ sub ebp,8
+ jnz NEAR L$034pw_pos_loop
+L$033pw_pos_finish:
+ mov ebp,DWORD [36+esp]
+ and ebp,7
+ jz NEAR L$029pw_end
+ ; dl>0 Tail Round 0
+ mov ecx,DWORD [esi]
+ sub ecx,eax
+ mov DWORD [ebx],ecx
+ jnc NEAR L$043pw_tail_nc0
+ dec ebp
+ jz NEAR L$029pw_end
+ ; dl>0 Tail Round 1
+ mov ecx,DWORD [4+esi]
+ sub ecx,eax
+ mov DWORD [4+ebx],ecx
+ jnc NEAR L$044pw_tail_nc1
+ dec ebp
+ jz NEAR L$029pw_end
+ ; dl>0 Tail Round 2
+ mov ecx,DWORD [8+esi]
+ sub ecx,eax
+ mov DWORD [8+ebx],ecx
+ jnc NEAR L$045pw_tail_nc2
+ dec ebp
+ jz NEAR L$029pw_end
+ ; dl>0 Tail Round 3
+ mov ecx,DWORD [12+esi]
+ sub ecx,eax
+ mov DWORD [12+ebx],ecx
+ jnc NEAR L$046pw_tail_nc3
+ dec ebp
+ jz NEAR L$029pw_end
+ ; dl>0 Tail Round 4
+ mov ecx,DWORD [16+esi]
+ sub ecx,eax
+ mov DWORD [16+ebx],ecx
+ jnc NEAR L$047pw_tail_nc4
+ dec ebp
+ jz NEAR L$029pw_end
+ ; dl>0 Tail Round 5
+ mov ecx,DWORD [20+esi]
+ sub ecx,eax
+ mov DWORD [20+ebx],ecx
+ jnc NEAR L$048pw_tail_nc5
+ dec ebp
+ jz NEAR L$029pw_end
+ ; dl>0 Tail Round 6
+ mov ecx,DWORD [24+esi]
+ sub ecx,eax
+ mov DWORD [24+ebx],ecx
+ jnc NEAR L$049pw_tail_nc6
+ mov eax,1
+ jmp NEAR L$029pw_end
+L$050pw_nc_loop:
+ mov ecx,DWORD [esi]
+ mov DWORD [ebx],ecx
+L$035pw_nc0:
+ mov ecx,DWORD [4+esi]
+ mov DWORD [4+ebx],ecx
+L$036pw_nc1:
+ mov ecx,DWORD [8+esi]
+ mov DWORD [8+ebx],ecx
+L$037pw_nc2:
+ mov ecx,DWORD [12+esi]
+ mov DWORD [12+ebx],ecx
+L$038pw_nc3:
+ mov ecx,DWORD [16+esi]
+ mov DWORD [16+ebx],ecx
+L$039pw_nc4:
+ mov ecx,DWORD [20+esi]
+ mov DWORD [20+ebx],ecx
+L$040pw_nc5:
+ mov ecx,DWORD [24+esi]
+ mov DWORD [24+ebx],ecx
+L$041pw_nc6:
+ mov ecx,DWORD [28+esi]
+ mov DWORD [28+ebx],ecx
+L$042pw_nc7:
+ ;
+ add esi,32
+ add ebx,32
+ sub ebp,8
+ jnz NEAR L$050pw_nc_loop
+ mov ebp,DWORD [36+esp]
+ and ebp,7
+ jz NEAR L$051pw_nc_end
+ mov ecx,DWORD [esi]
+ mov DWORD [ebx],ecx
+L$043pw_tail_nc0:
+ dec ebp
+ jz NEAR L$051pw_nc_end
+ mov ecx,DWORD [4+esi]
+ mov DWORD [4+ebx],ecx
+L$044pw_tail_nc1:
+ dec ebp
+ jz NEAR L$051pw_nc_end
+ mov ecx,DWORD [8+esi]
+ mov DWORD [8+ebx],ecx
+L$045pw_tail_nc2:
+ dec ebp
+ jz NEAR L$051pw_nc_end
+ mov ecx,DWORD [12+esi]
+ mov DWORD [12+ebx],ecx
+L$046pw_tail_nc3:
+ dec ebp
+ jz NEAR L$051pw_nc_end
+ mov ecx,DWORD [16+esi]
+ mov DWORD [16+ebx],ecx
+L$047pw_tail_nc4:
+ dec ebp
+ jz NEAR L$051pw_nc_end
+ mov ecx,DWORD [20+esi]
+ mov DWORD [20+ebx],ecx
+L$048pw_tail_nc5:
+ dec ebp
+ jz NEAR L$051pw_nc_end
+ mov ecx,DWORD [24+esi]
+ mov DWORD [24+ebx],ecx
+L$049pw_tail_nc6:
+L$051pw_nc_end:
+ mov eax,0
+L$029pw_end:
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+segment .bss
+common _OPENSSL_ia32cap_P 16
diff --git a/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/co-586.asm b/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/co-586.asm
new file mode 100644
index 0000000000..d57f0b5ffe
--- /dev/null
+++ b/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/co-586.asm
@@ -0,0 +1,1252 @@
+%ifidn __OUTPUT_FORMAT__,obj
+section code use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+$@feat.00 equ 1
+section .text code align=64
+%else
+section .text code
+%endif
+global _bn_mul_comba8
+align 16
+_bn_mul_comba8:
+L$_bn_mul_comba8_begin:
+ push esi
+ mov esi,DWORD [12+esp]
+ push edi
+ mov edi,DWORD [20+esp]
+ push ebp
+ push ebx
+ xor ebx,ebx
+ mov eax,DWORD [esi]
+ xor ecx,ecx
+ mov edx,DWORD [edi]
+ ; ################## Calculate word 0
+ xor ebp,ebp
+ ; mul a[0]*b[0]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [20+esp]
+ adc ecx,edx
+ mov edx,DWORD [edi]
+ adc ebp,0
+ mov DWORD [eax],ebx
+ mov eax,DWORD [4+esi]
+ ; saved r[0]
+ ; ################## Calculate word 1
+ xor ebx,ebx
+ ; mul a[1]*b[0]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [esi]
+ adc ebp,edx
+ mov edx,DWORD [4+edi]
+ adc ebx,0
+ ; mul a[0]*b[1]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [20+esp]
+ adc ebp,edx
+ mov edx,DWORD [edi]
+ adc ebx,0
+ mov DWORD [4+eax],ecx
+ mov eax,DWORD [8+esi]
+ ; saved r[1]
+ ; ################## Calculate word 2
+ xor ecx,ecx
+ ; mul a[2]*b[0]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [4+esi]
+ adc ebx,edx
+ mov edx,DWORD [4+edi]
+ adc ecx,0
+ ; mul a[1]*b[1]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [esi]
+ adc ebx,edx
+ mov edx,DWORD [8+edi]
+ adc ecx,0
+ ; mul a[0]*b[2]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [20+esp]
+ adc ebx,edx
+ mov edx,DWORD [edi]
+ adc ecx,0
+ mov DWORD [8+eax],ebp
+ mov eax,DWORD [12+esi]
+ ; saved r[2]
+ ; ################## Calculate word 3
+ xor ebp,ebp
+ ; mul a[3]*b[0]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [8+esi]
+ adc ecx,edx
+ mov edx,DWORD [4+edi]
+ adc ebp,0
+ ; mul a[2]*b[1]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [4+esi]
+ adc ecx,edx
+ mov edx,DWORD [8+edi]
+ adc ebp,0
+ ; mul a[1]*b[2]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [esi]
+ adc ecx,edx
+ mov edx,DWORD [12+edi]
+ adc ebp,0
+ ; mul a[0]*b[3]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [20+esp]
+ adc ecx,edx
+ mov edx,DWORD [edi]
+ adc ebp,0
+ mov DWORD [12+eax],ebx
+ mov eax,DWORD [16+esi]
+ ; saved r[3]
+ ; ################## Calculate word 4
+ xor ebx,ebx
+ ; mul a[4]*b[0]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [12+esi]
+ adc ebp,edx
+ mov edx,DWORD [4+edi]
+ adc ebx,0
+ ; mul a[3]*b[1]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [8+esi]
+ adc ebp,edx
+ mov edx,DWORD [8+edi]
+ adc ebx,0
+ ; mul a[2]*b[2]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [4+esi]
+ adc ebp,edx
+ mov edx,DWORD [12+edi]
+ adc ebx,0
+ ; mul a[1]*b[3]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [esi]
+ adc ebp,edx
+ mov edx,DWORD [16+edi]
+ adc ebx,0
+ ; mul a[0]*b[4]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [20+esp]
+ adc ebp,edx
+ mov edx,DWORD [edi]
+ adc ebx,0
+ mov DWORD [16+eax],ecx
+ mov eax,DWORD [20+esi]
+ ; saved r[4]
+ ; ################## Calculate word 5
+ xor ecx,ecx
+ ; mul a[5]*b[0]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [16+esi]
+ adc ebx,edx
+ mov edx,DWORD [4+edi]
+ adc ecx,0
+ ; mul a[4]*b[1]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [12+esi]
+ adc ebx,edx
+ mov edx,DWORD [8+edi]
+ adc ecx,0
+ ; mul a[3]*b[2]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [8+esi]
+ adc ebx,edx
+ mov edx,DWORD [12+edi]
+ adc ecx,0
+ ; mul a[2]*b[3]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [4+esi]
+ adc ebx,edx
+ mov edx,DWORD [16+edi]
+ adc ecx,0
+ ; mul a[1]*b[4]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [esi]
+ adc ebx,edx
+ mov edx,DWORD [20+edi]
+ adc ecx,0
+ ; mul a[0]*b[5]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [20+esp]
+ adc ebx,edx
+ mov edx,DWORD [edi]
+ adc ecx,0
+ mov DWORD [20+eax],ebp
+ mov eax,DWORD [24+esi]
+ ; saved r[5]
+ ; ################## Calculate word 6
+ xor ebp,ebp
+ ; mul a[6]*b[0]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [20+esi]
+ adc ecx,edx
+ mov edx,DWORD [4+edi]
+ adc ebp,0
+ ; mul a[5]*b[1]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [16+esi]
+ adc ecx,edx
+ mov edx,DWORD [8+edi]
+ adc ebp,0
+ ; mul a[4]*b[2]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [12+esi]
+ adc ecx,edx
+ mov edx,DWORD [12+edi]
+ adc ebp,0
+ ; mul a[3]*b[3]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [8+esi]
+ adc ecx,edx
+ mov edx,DWORD [16+edi]
+ adc ebp,0
+ ; mul a[2]*b[4]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [4+esi]
+ adc ecx,edx
+ mov edx,DWORD [20+edi]
+ adc ebp,0
+ ; mul a[1]*b[5]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [esi]
+ adc ecx,edx
+ mov edx,DWORD [24+edi]
+ adc ebp,0
+ ; mul a[0]*b[6]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [20+esp]
+ adc ecx,edx
+ mov edx,DWORD [edi]
+ adc ebp,0
+ mov DWORD [24+eax],ebx
+ mov eax,DWORD [28+esi]
+ ; saved r[6]
+ ; ################## Calculate word 7
+ xor ebx,ebx
+ ; mul a[7]*b[0]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [24+esi]
+ adc ebp,edx
+ mov edx,DWORD [4+edi]
+ adc ebx,0
+ ; mul a[6]*b[1]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [20+esi]
+ adc ebp,edx
+ mov edx,DWORD [8+edi]
+ adc ebx,0
+ ; mul a[5]*b[2]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [16+esi]
+ adc ebp,edx
+ mov edx,DWORD [12+edi]
+ adc ebx,0
+ ; mul a[4]*b[3]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [12+esi]
+ adc ebp,edx
+ mov edx,DWORD [16+edi]
+ adc ebx,0
+ ; mul a[3]*b[4]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [8+esi]
+ adc ebp,edx
+ mov edx,DWORD [20+edi]
+ adc ebx,0
+ ; mul a[2]*b[5]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [4+esi]
+ adc ebp,edx
+ mov edx,DWORD [24+edi]
+ adc ebx,0
+ ; mul a[1]*b[6]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [esi]
+ adc ebp,edx
+ mov edx,DWORD [28+edi]
+ adc ebx,0
+ ; mul a[0]*b[7]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [20+esp]
+ adc ebp,edx
+ mov edx,DWORD [4+edi]
+ adc ebx,0
+ mov DWORD [28+eax],ecx
+ mov eax,DWORD [28+esi]
+ ; saved r[7]
+ ; ################## Calculate word 8
+ xor ecx,ecx
+ ; mul a[7]*b[1]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [24+esi]
+ adc ebx,edx
+ mov edx,DWORD [8+edi]
+ adc ecx,0
+ ; mul a[6]*b[2]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [20+esi]
+ adc ebx,edx
+ mov edx,DWORD [12+edi]
+ adc ecx,0
+ ; mul a[5]*b[3]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [16+esi]
+ adc ebx,edx
+ mov edx,DWORD [16+edi]
+ adc ecx,0
+ ; mul a[4]*b[4]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [12+esi]
+ adc ebx,edx
+ mov edx,DWORD [20+edi]
+ adc ecx,0
+ ; mul a[3]*b[5]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [8+esi]
+ adc ebx,edx
+ mov edx,DWORD [24+edi]
+ adc ecx,0
+ ; mul a[2]*b[6]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [4+esi]
+ adc ebx,edx
+ mov edx,DWORD [28+edi]
+ adc ecx,0
+ ; mul a[1]*b[7]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [20+esp]
+ adc ebx,edx
+ mov edx,DWORD [8+edi]
+ adc ecx,0
+ mov DWORD [32+eax],ebp
+ mov eax,DWORD [28+esi]
+ ; saved r[8]
+ ; ################## Calculate word 9
+ xor ebp,ebp
+ ; mul a[7]*b[2]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [24+esi]
+ adc ecx,edx
+ mov edx,DWORD [12+edi]
+ adc ebp,0
+ ; mul a[6]*b[3]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [20+esi]
+ adc ecx,edx
+ mov edx,DWORD [16+edi]
+ adc ebp,0
+ ; mul a[5]*b[4]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [16+esi]
+ adc ecx,edx
+ mov edx,DWORD [20+edi]
+ adc ebp,0
+ ; mul a[4]*b[5]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [12+esi]
+ adc ecx,edx
+ mov edx,DWORD [24+edi]
+ adc ebp,0
+ ; mul a[3]*b[6]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [8+esi]
+ adc ecx,edx
+ mov edx,DWORD [28+edi]
+ adc ebp,0
+ ; mul a[2]*b[7]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [20+esp]
+ adc ecx,edx
+ mov edx,DWORD [12+edi]
+ adc ebp,0
+ mov DWORD [36+eax],ebx
+ mov eax,DWORD [28+esi]
+ ; saved r[9]
+ ; ################## Calculate word 10
+ xor ebx,ebx
+ ; mul a[7]*b[3]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [24+esi]
+ adc ebp,edx
+ mov edx,DWORD [16+edi]
+ adc ebx,0
+ ; mul a[6]*b[4]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [20+esi]
+ adc ebp,edx
+ mov edx,DWORD [20+edi]
+ adc ebx,0
+ ; mul a[5]*b[5]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [16+esi]
+ adc ebp,edx
+ mov edx,DWORD [24+edi]
+ adc ebx,0
+ ; mul a[4]*b[6]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [12+esi]
+ adc ebp,edx
+ mov edx,DWORD [28+edi]
+ adc ebx,0
+ ; mul a[3]*b[7]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [20+esp]
+ adc ebp,edx
+ mov edx,DWORD [16+edi]
+ adc ebx,0
+ mov DWORD [40+eax],ecx
+ mov eax,DWORD [28+esi]
+ ; saved r[10]
+ ; ################## Calculate word 11
+ xor ecx,ecx
+ ; mul a[7]*b[4]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [24+esi]
+ adc ebx,edx
+ mov edx,DWORD [20+edi]
+ adc ecx,0
+ ; mul a[6]*b[5]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [20+esi]
+ adc ebx,edx
+ mov edx,DWORD [24+edi]
+ adc ecx,0
+ ; mul a[5]*b[6]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [16+esi]
+ adc ebx,edx
+ mov edx,DWORD [28+edi]
+ adc ecx,0
+ ; mul a[4]*b[7]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [20+esp]
+ adc ebx,edx
+ mov edx,DWORD [20+edi]
+ adc ecx,0
+ mov DWORD [44+eax],ebp
+ mov eax,DWORD [28+esi]
+ ; saved r[11]
+ ; ################## Calculate word 12
+ xor ebp,ebp
+ ; mul a[7]*b[5]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [24+esi]
+ adc ecx,edx
+ mov edx,DWORD [24+edi]
+ adc ebp,0
+ ; mul a[6]*b[6]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [20+esi]
+ adc ecx,edx
+ mov edx,DWORD [28+edi]
+ adc ebp,0
+ ; mul a[5]*b[7]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [20+esp]
+ adc ecx,edx
+ mov edx,DWORD [24+edi]
+ adc ebp,0
+ mov DWORD [48+eax],ebx
+ mov eax,DWORD [28+esi]
+ ; saved r[12]
+ ; ################## Calculate word 13
+ xor ebx,ebx
+ ; mul a[7]*b[6]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [24+esi]
+ adc ebp,edx
+ mov edx,DWORD [28+edi]
+ adc ebx,0
+ ; mul a[6]*b[7]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [20+esp]
+ adc ebp,edx
+ mov edx,DWORD [28+edi]
+ adc ebx,0
+ mov DWORD [52+eax],ecx
+ mov eax,DWORD [28+esi]
+ ; saved r[13]
+ ; ################## Calculate word 14
+ xor ecx,ecx
+ ; mul a[7]*b[7]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [20+esp]
+ adc ebx,edx
+ adc ecx,0
+ mov DWORD [56+eax],ebp
+ ; saved r[14]
+ ; save r[15]
+ mov DWORD [60+eax],ebx
+ pop ebx
+ pop ebp
+ pop edi
+ pop esi
+ ret
+global _bn_mul_comba4
+align 16
+_bn_mul_comba4:
+L$_bn_mul_comba4_begin:
+ push esi
+ mov esi,DWORD [12+esp]
+ push edi
+ mov edi,DWORD [20+esp]
+ push ebp
+ push ebx
+ xor ebx,ebx
+ mov eax,DWORD [esi]
+ xor ecx,ecx
+ mov edx,DWORD [edi]
+ ; ################## Calculate word 0
+ xor ebp,ebp
+ ; mul a[0]*b[0]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [20+esp]
+ adc ecx,edx
+ mov edx,DWORD [edi]
+ adc ebp,0
+ mov DWORD [eax],ebx
+ mov eax,DWORD [4+esi]
+ ; saved r[0]
+ ; ################## Calculate word 1
+ xor ebx,ebx
+ ; mul a[1]*b[0]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [esi]
+ adc ebp,edx
+ mov edx,DWORD [4+edi]
+ adc ebx,0
+ ; mul a[0]*b[1]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [20+esp]
+ adc ebp,edx
+ mov edx,DWORD [edi]
+ adc ebx,0
+ mov DWORD [4+eax],ecx
+ mov eax,DWORD [8+esi]
+ ; saved r[1]
+ ; ################## Calculate word 2
+ xor ecx,ecx
+ ; mul a[2]*b[0]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [4+esi]
+ adc ebx,edx
+ mov edx,DWORD [4+edi]
+ adc ecx,0
+ ; mul a[1]*b[1]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [esi]
+ adc ebx,edx
+ mov edx,DWORD [8+edi]
+ adc ecx,0
+ ; mul a[0]*b[2]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [20+esp]
+ adc ebx,edx
+ mov edx,DWORD [edi]
+ adc ecx,0
+ mov DWORD [8+eax],ebp
+ mov eax,DWORD [12+esi]
+ ; saved r[2]
+ ; ################## Calculate word 3
+ xor ebp,ebp
+ ; mul a[3]*b[0]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [8+esi]
+ adc ecx,edx
+ mov edx,DWORD [4+edi]
+ adc ebp,0
+ ; mul a[2]*b[1]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [4+esi]
+ adc ecx,edx
+ mov edx,DWORD [8+edi]
+ adc ebp,0
+ ; mul a[1]*b[2]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [esi]
+ adc ecx,edx
+ mov edx,DWORD [12+edi]
+ adc ebp,0
+ ; mul a[0]*b[3]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [20+esp]
+ adc ecx,edx
+ mov edx,DWORD [4+edi]
+ adc ebp,0
+ mov DWORD [12+eax],ebx
+ mov eax,DWORD [12+esi]
+ ; saved r[3]
+ ; ################## Calculate word 4
+ xor ebx,ebx
+ ; mul a[3]*b[1]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [8+esi]
+ adc ebp,edx
+ mov edx,DWORD [8+edi]
+ adc ebx,0
+ ; mul a[2]*b[2]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [4+esi]
+ adc ebp,edx
+ mov edx,DWORD [12+edi]
+ adc ebx,0
+ ; mul a[1]*b[3]
+ mul edx
+ add ecx,eax
+ mov eax,DWORD [20+esp]
+ adc ebp,edx
+ mov edx,DWORD [8+edi]
+ adc ebx,0
+ mov DWORD [16+eax],ecx
+ mov eax,DWORD [12+esi]
+ ; saved r[4]
+ ; ################## Calculate word 5
+ xor ecx,ecx
+ ; mul a[3]*b[2]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [8+esi]
+ adc ebx,edx
+ mov edx,DWORD [12+edi]
+ adc ecx,0
+ ; mul a[2]*b[3]
+ mul edx
+ add ebp,eax
+ mov eax,DWORD [20+esp]
+ adc ebx,edx
+ mov edx,DWORD [12+edi]
+ adc ecx,0
+ mov DWORD [20+eax],ebp
+ mov eax,DWORD [12+esi]
+ ; saved r[5]
+ ; ################## Calculate word 6
+ xor ebp,ebp
+ ; mul a[3]*b[3]
+ mul edx
+ add ebx,eax
+ mov eax,DWORD [20+esp]
+ adc ecx,edx
+ adc ebp,0
+ mov DWORD [24+eax],ebx
+ ; saved r[6]
+ ; save r[7]
+ mov DWORD [28+eax],ecx
+ pop ebx
+ pop ebp
+ pop edi
+ pop esi
+ ret
+global _bn_sqr_comba8
+align 16
+_bn_sqr_comba8:
+L$_bn_sqr_comba8_begin:
+ push esi
+ push edi
+ push ebp
+ push ebx
+ mov edi,DWORD [20+esp]
+ mov esi,DWORD [24+esp]
+ xor ebx,ebx
+ xor ecx,ecx
+ mov eax,DWORD [esi]
+ ; ############### Calculate word 0
+ xor ebp,ebp
+ ; sqr a[0]*a[0]
+ mul eax
+ add ebx,eax
+ adc ecx,edx
+ mov edx,DWORD [esi]
+ adc ebp,0
+ mov DWORD [edi],ebx
+ mov eax,DWORD [4+esi]
+ ; saved r[0]
+ ; ############### Calculate word 1
+ xor ebx,ebx
+ ; sqr a[1]*a[0]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebx,0
+ add ecx,eax
+ adc ebp,edx
+ mov eax,DWORD [8+esi]
+ adc ebx,0
+ mov DWORD [4+edi],ecx
+ mov edx,DWORD [esi]
+ ; saved r[1]
+ ; ############### Calculate word 2
+ xor ecx,ecx
+ ; sqr a[2]*a[0]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ecx,0
+ add ebp,eax
+ adc ebx,edx
+ mov eax,DWORD [4+esi]
+ adc ecx,0
+ ; sqr a[1]*a[1]
+ mul eax
+ add ebp,eax
+ adc ebx,edx
+ mov edx,DWORD [esi]
+ adc ecx,0
+ mov DWORD [8+edi],ebp
+ mov eax,DWORD [12+esi]
+ ; saved r[2]
+ ; ############### Calculate word 3
+ xor ebp,ebp
+ ; sqr a[3]*a[0]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebp,0
+ add ebx,eax
+ adc ecx,edx
+ mov eax,DWORD [8+esi]
+ adc ebp,0
+ mov edx,DWORD [4+esi]
+ ; sqr a[2]*a[1]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebp,0
+ add ebx,eax
+ adc ecx,edx
+ mov eax,DWORD [16+esi]
+ adc ebp,0
+ mov DWORD [12+edi],ebx
+ mov edx,DWORD [esi]
+ ; saved r[3]
+ ; ############### Calculate word 4
+ xor ebx,ebx
+ ; sqr a[4]*a[0]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebx,0
+ add ecx,eax
+ adc ebp,edx
+ mov eax,DWORD [12+esi]
+ adc ebx,0
+ mov edx,DWORD [4+esi]
+ ; sqr a[3]*a[1]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebx,0
+ add ecx,eax
+ adc ebp,edx
+ mov eax,DWORD [8+esi]
+ adc ebx,0
+ ; sqr a[2]*a[2]
+ mul eax
+ add ecx,eax
+ adc ebp,edx
+ mov edx,DWORD [esi]
+ adc ebx,0
+ mov DWORD [16+edi],ecx
+ mov eax,DWORD [20+esi]
+ ; saved r[4]
+ ; ############### Calculate word 5
+ xor ecx,ecx
+ ; sqr a[5]*a[0]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ecx,0
+ add ebp,eax
+ adc ebx,edx
+ mov eax,DWORD [16+esi]
+ adc ecx,0
+ mov edx,DWORD [4+esi]
+ ; sqr a[4]*a[1]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ecx,0
+ add ebp,eax
+ adc ebx,edx
+ mov eax,DWORD [12+esi]
+ adc ecx,0
+ mov edx,DWORD [8+esi]
+ ; sqr a[3]*a[2]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ecx,0
+ add ebp,eax
+ adc ebx,edx
+ mov eax,DWORD [24+esi]
+ adc ecx,0
+ mov DWORD [20+edi],ebp
+ mov edx,DWORD [esi]
+ ; saved r[5]
+ ; ############### Calculate word 6
+ xor ebp,ebp
+ ; sqr a[6]*a[0]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebp,0
+ add ebx,eax
+ adc ecx,edx
+ mov eax,DWORD [20+esi]
+ adc ebp,0
+ mov edx,DWORD [4+esi]
+ ; sqr a[5]*a[1]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebp,0
+ add ebx,eax
+ adc ecx,edx
+ mov eax,DWORD [16+esi]
+ adc ebp,0
+ mov edx,DWORD [8+esi]
+ ; sqr a[4]*a[2]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebp,0
+ add ebx,eax
+ adc ecx,edx
+ mov eax,DWORD [12+esi]
+ adc ebp,0
+ ; sqr a[3]*a[3]
+ mul eax
+ add ebx,eax
+ adc ecx,edx
+ mov edx,DWORD [esi]
+ adc ebp,0
+ mov DWORD [24+edi],ebx
+ mov eax,DWORD [28+esi]
+ ; saved r[6]
+ ; ############### Calculate word 7
+ xor ebx,ebx
+ ; sqr a[7]*a[0]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebx,0
+ add ecx,eax
+ adc ebp,edx
+ mov eax,DWORD [24+esi]
+ adc ebx,0
+ mov edx,DWORD [4+esi]
+ ; sqr a[6]*a[1]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebx,0
+ add ecx,eax
+ adc ebp,edx
+ mov eax,DWORD [20+esi]
+ adc ebx,0
+ mov edx,DWORD [8+esi]
+ ; sqr a[5]*a[2]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebx,0
+ add ecx,eax
+ adc ebp,edx
+ mov eax,DWORD [16+esi]
+ adc ebx,0
+ mov edx,DWORD [12+esi]
+ ; sqr a[4]*a[3]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebx,0
+ add ecx,eax
+ adc ebp,edx
+ mov eax,DWORD [28+esi]
+ adc ebx,0
+ mov DWORD [28+edi],ecx
+ mov edx,DWORD [4+esi]
+ ; saved r[7]
+ ; ############### Calculate word 8
+ xor ecx,ecx
+ ; sqr a[7]*a[1]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ecx,0
+ add ebp,eax
+ adc ebx,edx
+ mov eax,DWORD [24+esi]
+ adc ecx,0
+ mov edx,DWORD [8+esi]
+ ; sqr a[6]*a[2]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ecx,0
+ add ebp,eax
+ adc ebx,edx
+ mov eax,DWORD [20+esi]
+ adc ecx,0
+ mov edx,DWORD [12+esi]
+ ; sqr a[5]*a[3]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ecx,0
+ add ebp,eax
+ adc ebx,edx
+ mov eax,DWORD [16+esi]
+ adc ecx,0
+ ; sqr a[4]*a[4]
+ mul eax
+ add ebp,eax
+ adc ebx,edx
+ mov edx,DWORD [8+esi]
+ adc ecx,0
+ mov DWORD [32+edi],ebp
+ mov eax,DWORD [28+esi]
+ ; saved r[8]
+ ; ############### Calculate word 9
+ xor ebp,ebp
+ ; sqr a[7]*a[2]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebp,0
+ add ebx,eax
+ adc ecx,edx
+ mov eax,DWORD [24+esi]
+ adc ebp,0
+ mov edx,DWORD [12+esi]
+ ; sqr a[6]*a[3]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebp,0
+ add ebx,eax
+ adc ecx,edx
+ mov eax,DWORD [20+esi]
+ adc ebp,0
+ mov edx,DWORD [16+esi]
+ ; sqr a[5]*a[4]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebp,0
+ add ebx,eax
+ adc ecx,edx
+ mov eax,DWORD [28+esi]
+ adc ebp,0
+ mov DWORD [36+edi],ebx
+ mov edx,DWORD [12+esi]
+ ; saved r[9]
+ ; ############### Calculate word 10
+ xor ebx,ebx
+ ; sqr a[7]*a[3]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebx,0
+ add ecx,eax
+ adc ebp,edx
+ mov eax,DWORD [24+esi]
+ adc ebx,0
+ mov edx,DWORD [16+esi]
+ ; sqr a[6]*a[4]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebx,0
+ add ecx,eax
+ adc ebp,edx
+ mov eax,DWORD [20+esi]
+ adc ebx,0
+ ; sqr a[5]*a[5]
+ mul eax
+ add ecx,eax
+ adc ebp,edx
+ mov edx,DWORD [16+esi]
+ adc ebx,0
+ mov DWORD [40+edi],ecx
+ mov eax,DWORD [28+esi]
+ ; saved r[10]
+ ; ############### Calculate word 11
+ xor ecx,ecx
+ ; sqr a[7]*a[4]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ecx,0
+ add ebp,eax
+ adc ebx,edx
+ mov eax,DWORD [24+esi]
+ adc ecx,0
+ mov edx,DWORD [20+esi]
+ ; sqr a[6]*a[5]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ecx,0
+ add ebp,eax
+ adc ebx,edx
+ mov eax,DWORD [28+esi]
+ adc ecx,0
+ mov DWORD [44+edi],ebp
+ mov edx,DWORD [20+esi]
+ ; saved r[11]
+ ; ############### Calculate word 12
+ xor ebp,ebp
+ ; sqr a[7]*a[5]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebp,0
+ add ebx,eax
+ adc ecx,edx
+ mov eax,DWORD [24+esi]
+ adc ebp,0
+ ; sqr a[6]*a[6]
+ mul eax
+ add ebx,eax
+ adc ecx,edx
+ mov edx,DWORD [24+esi]
+ adc ebp,0
+ mov DWORD [48+edi],ebx
+ mov eax,DWORD [28+esi]
+ ; saved r[12]
+ ; ############### Calculate word 13
+ xor ebx,ebx
+ ; sqr a[7]*a[6]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebx,0
+ add ecx,eax
+ adc ebp,edx
+ mov eax,DWORD [28+esi]
+ adc ebx,0
+ mov DWORD [52+edi],ecx
+ ; saved r[13]
+ ; ############### Calculate word 14
+ xor ecx,ecx
+ ; sqr a[7]*a[7]
+ mul eax
+ add ebp,eax
+ adc ebx,edx
+ adc ecx,0
+ mov DWORD [56+edi],ebp
+ ; saved r[14]
+ mov DWORD [60+edi],ebx
+ pop ebx
+ pop ebp
+ pop edi
+ pop esi
+ ret
+global _bn_sqr_comba4
+align 16
+_bn_sqr_comba4:
+L$_bn_sqr_comba4_begin:
+ push esi
+ push edi
+ push ebp
+ push ebx
+ mov edi,DWORD [20+esp]
+ mov esi,DWORD [24+esp]
+ xor ebx,ebx
+ xor ecx,ecx
+ mov eax,DWORD [esi]
+ ; ############### Calculate word 0
+ xor ebp,ebp
+ ; sqr a[0]*a[0]
+ mul eax
+ add ebx,eax
+ adc ecx,edx
+ mov edx,DWORD [esi]
+ adc ebp,0
+ mov DWORD [edi],ebx
+ mov eax,DWORD [4+esi]
+ ; saved r[0]
+ ; ############### Calculate word 1
+ xor ebx,ebx
+ ; sqr a[1]*a[0]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebx,0
+ add ecx,eax
+ adc ebp,edx
+ mov eax,DWORD [8+esi]
+ adc ebx,0
+ mov DWORD [4+edi],ecx
+ mov edx,DWORD [esi]
+ ; saved r[1]
+ ; ############### Calculate word 2
+ xor ecx,ecx
+ ; sqr a[2]*a[0]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ecx,0
+ add ebp,eax
+ adc ebx,edx
+ mov eax,DWORD [4+esi]
+ adc ecx,0
+ ; sqr a[1]*a[1]
+ mul eax
+ add ebp,eax
+ adc ebx,edx
+ mov edx,DWORD [esi]
+ adc ecx,0
+ mov DWORD [8+edi],ebp
+ mov eax,DWORD [12+esi]
+ ; saved r[2]
+ ; ############### Calculate word 3
+ xor ebp,ebp
+ ; sqr a[3]*a[0]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebp,0
+ add ebx,eax
+ adc ecx,edx
+ mov eax,DWORD [8+esi]
+ adc ebp,0
+ mov edx,DWORD [4+esi]
+ ; sqr a[2]*a[1]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebp,0
+ add ebx,eax
+ adc ecx,edx
+ mov eax,DWORD [12+esi]
+ adc ebp,0
+ mov DWORD [12+edi],ebx
+ mov edx,DWORD [4+esi]
+ ; saved r[3]
+ ; ############### Calculate word 4
+ xor ebx,ebx
+ ; sqr a[3]*a[1]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ebx,0
+ add ecx,eax
+ adc ebp,edx
+ mov eax,DWORD [8+esi]
+ adc ebx,0
+ ; sqr a[2]*a[2]
+ mul eax
+ add ecx,eax
+ adc ebp,edx
+ mov edx,DWORD [8+esi]
+ adc ebx,0
+ mov DWORD [16+edi],ecx
+ mov eax,DWORD [12+esi]
+ ; saved r[4]
+ ; ############### Calculate word 5
+ xor ecx,ecx
+ ; sqr a[3]*a[2]
+ mul edx
+ add eax,eax
+ adc edx,edx
+ adc ecx,0
+ add ebp,eax
+ adc ebx,edx
+ mov eax,DWORD [12+esi]
+ adc ecx,0
+ mov DWORD [20+edi],ebp
+ ; saved r[5]
+ ; ############### Calculate word 6
+ xor ebp,ebp
+ ; sqr a[3]*a[3]
+ mul eax
+ add ebx,eax
+ adc ecx,edx
+ adc ebp,0
+ mov DWORD [24+edi],ebx
+ ; saved r[6]
+ mov DWORD [28+edi],ecx
+ pop ebx
+ pop ebp
+ pop edi
+ pop esi
+ ret
diff --git a/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/x86-gf2m.asm b/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/x86-gf2m.asm
new file mode 100644
index 0000000000..709f4a9e50
--- /dev/null
+++ b/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/x86-gf2m.asm
@@ -0,0 +1,345 @@
+%ifidn __OUTPUT_FORMAT__,obj
+section code use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+$@feat.00 equ 1
+section .text code align=64
+%else
+section .text code
+%endif
+;extern _OPENSSL_ia32cap_P
+align 16
+__mul_1x1_mmx:
+ sub esp,36
+ mov ecx,eax
+ lea edx,[eax*1+eax]
+ and ecx,1073741823
+ lea ebp,[edx*1+edx]
+ mov DWORD [esp],0
+ and edx,2147483647
+ movd mm2,eax
+ movd mm3,ebx
+ mov DWORD [4+esp],ecx
+ xor ecx,edx
+ pxor mm5,mm5
+ pxor mm4,mm4
+ mov DWORD [8+esp],edx
+ xor edx,ebp
+ mov DWORD [12+esp],ecx
+ pcmpgtd mm5,mm2
+ paddd mm2,mm2
+ xor ecx,edx
+ mov DWORD [16+esp],ebp
+ xor ebp,edx
+ pand mm5,mm3
+ pcmpgtd mm4,mm2
+ mov DWORD [20+esp],ecx
+ xor ebp,ecx
+ psllq mm5,31
+ pand mm4,mm3
+ mov DWORD [24+esp],edx
+ mov esi,7
+ mov DWORD [28+esp],ebp
+ mov ebp,esi
+ and esi,ebx
+ shr ebx,3
+ mov edi,ebp
+ psllq mm4,30
+ and edi,ebx
+ shr ebx,3
+ movd mm0,DWORD [esi*4+esp]
+ mov esi,ebp
+ and esi,ebx
+ shr ebx,3
+ movd mm2,DWORD [edi*4+esp]
+ mov edi,ebp
+ psllq mm2,3
+ and edi,ebx
+ shr ebx,3
+ pxor mm0,mm2
+ movd mm1,DWORD [esi*4+esp]
+ mov esi,ebp
+ psllq mm1,6
+ and esi,ebx
+ shr ebx,3
+ pxor mm0,mm1
+ movd mm2,DWORD [edi*4+esp]
+ mov edi,ebp
+ psllq mm2,9
+ and edi,ebx
+ shr ebx,3
+ pxor mm0,mm2
+ movd mm1,DWORD [esi*4+esp]
+ mov esi,ebp
+ psllq mm1,12
+ and esi,ebx
+ shr ebx,3
+ pxor mm0,mm1
+ movd mm2,DWORD [edi*4+esp]
+ mov edi,ebp
+ psllq mm2,15
+ and edi,ebx
+ shr ebx,3
+ pxor mm0,mm2
+ movd mm1,DWORD [esi*4+esp]
+ mov esi,ebp
+ psllq mm1,18
+ and esi,ebx
+ shr ebx,3
+ pxor mm0,mm1
+ movd mm2,DWORD [edi*4+esp]
+ mov edi,ebp
+ psllq mm2,21
+ and edi,ebx
+ shr ebx,3
+ pxor mm0,mm2
+ movd mm1,DWORD [esi*4+esp]
+ mov esi,ebp
+ psllq mm1,24
+ and esi,ebx
+ shr ebx,3
+ pxor mm0,mm1
+ movd mm2,DWORD [edi*4+esp]
+ pxor mm0,mm4
+ psllq mm2,27
+ pxor mm0,mm2
+ movd mm1,DWORD [esi*4+esp]
+ pxor mm0,mm5
+ psllq mm1,30
+ add esp,36
+ pxor mm0,mm1
+ ret
+align 16
+__mul_1x1_ialu:
+ sub esp,36
+ mov ecx,eax
+ lea edx,[eax*1+eax]
+ lea ebp,[eax*4]
+ and ecx,1073741823
+ lea edi,[eax*1+eax]
+ sar eax,31
+ mov DWORD [esp],0
+ and edx,2147483647
+ mov DWORD [4+esp],ecx
+ xor ecx,edx
+ mov DWORD [8+esp],edx
+ xor edx,ebp
+ mov DWORD [12+esp],ecx
+ xor ecx,edx
+ mov DWORD [16+esp],ebp
+ xor ebp,edx
+ mov DWORD [20+esp],ecx
+ xor ebp,ecx
+ sar edi,31
+ and eax,ebx
+ mov DWORD [24+esp],edx
+ and edi,ebx
+ mov DWORD [28+esp],ebp
+ mov edx,eax
+ shl eax,31
+ mov ecx,edi
+ shr edx,1
+ mov esi,7
+ shl edi,30
+ and esi,ebx
+ shr ecx,2
+ xor eax,edi
+ shr ebx,3
+ mov edi,7
+ and edi,ebx
+ shr ebx,3
+ xor edx,ecx
+ xor eax,DWORD [esi*4+esp]
+ mov esi,7
+ and esi,ebx
+ shr ebx,3
+ mov ebp,DWORD [edi*4+esp]
+ mov edi,7
+ mov ecx,ebp
+ shl ebp,3
+ and edi,ebx
+ shr ecx,29
+ xor eax,ebp
+ shr ebx,3
+ xor edx,ecx
+ mov ecx,DWORD [esi*4+esp]
+ mov esi,7
+ mov ebp,ecx
+ shl ecx,6
+ and esi,ebx
+ shr ebp,26
+ xor eax,ecx
+ shr ebx,3
+ xor edx,ebp
+ mov ebp,DWORD [edi*4+esp]
+ mov edi,7
+ mov ecx,ebp
+ shl ebp,9
+ and edi,ebx
+ shr ecx,23
+ xor eax,ebp
+ shr ebx,3
+ xor edx,ecx
+ mov ecx,DWORD [esi*4+esp]
+ mov esi,7
+ mov ebp,ecx
+ shl ecx,12
+ and esi,ebx
+ shr ebp,20
+ xor eax,ecx
+ shr ebx,3
+ xor edx,ebp
+ mov ebp,DWORD [edi*4+esp]
+ mov edi,7
+ mov ecx,ebp
+ shl ebp,15
+ and edi,ebx
+ shr ecx,17
+ xor eax,ebp
+ shr ebx,3
+ xor edx,ecx
+ mov ecx,DWORD [esi*4+esp]
+ mov esi,7
+ mov ebp,ecx
+ shl ecx,18
+ and esi,ebx
+ shr ebp,14
+ xor eax,ecx
+ shr ebx,3
+ xor edx,ebp
+ mov ebp,DWORD [edi*4+esp]
+ mov edi,7
+ mov ecx,ebp
+ shl ebp,21
+ and edi,ebx
+ shr ecx,11
+ xor eax,ebp
+ shr ebx,3
+ xor edx,ecx
+ mov ecx,DWORD [esi*4+esp]
+ mov esi,7
+ mov ebp,ecx
+ shl ecx,24
+ and esi,ebx
+ shr ebp,8
+ xor eax,ecx
+ shr ebx,3
+ xor edx,ebp
+ mov ebp,DWORD [edi*4+esp]
+ mov ecx,ebp
+ shl ebp,27
+ mov edi,DWORD [esi*4+esp]
+ shr ecx,5
+ mov esi,edi
+ xor eax,ebp
+ shl edi,30
+ xor edx,ecx
+ shr esi,2
+ xor eax,edi
+ xor edx,esi
+ add esp,36
+ ret
+global _bn_GF2m_mul_2x2
+align 16
+_bn_GF2m_mul_2x2:
+L$_bn_GF2m_mul_2x2_begin:
+ lea edx,[_OPENSSL_ia32cap_P]
+ mov eax,DWORD [edx]
+ mov edx,DWORD [4+edx]
+ test eax,8388608
+ jz NEAR L$000ialu
+ test eax,16777216
+ jz NEAR L$001mmx
+ test edx,2
+ jz NEAR L$001mmx
+ movups xmm0,[8+esp]
+ shufps xmm0,xmm0,177
+db 102,15,58,68,192,1
+ mov eax,DWORD [4+esp]
+ movups [eax],xmm0
+ ret
+align 16
+L$001mmx:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ mov eax,DWORD [24+esp]
+ mov ebx,DWORD [32+esp]
+ call __mul_1x1_mmx
+ movq mm7,mm0
+ mov eax,DWORD [28+esp]
+ mov ebx,DWORD [36+esp]
+ call __mul_1x1_mmx
+ movq mm6,mm0
+ mov eax,DWORD [24+esp]
+ mov ebx,DWORD [32+esp]
+ xor eax,DWORD [28+esp]
+ xor ebx,DWORD [36+esp]
+ call __mul_1x1_mmx
+ pxor mm0,mm7
+ mov eax,DWORD [20+esp]
+ pxor mm0,mm6
+ movq mm2,mm0
+ psllq mm0,32
+ pop edi
+ psrlq mm2,32
+ pop esi
+ pxor mm0,mm6
+ pop ebx
+ pxor mm2,mm7
+ movq [eax],mm0
+ pop ebp
+ movq [8+eax],mm2
+ emms
+ ret
+align 16
+L$000ialu:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ sub esp,20
+ mov eax,DWORD [44+esp]
+ mov ebx,DWORD [52+esp]
+ call __mul_1x1_ialu
+ mov DWORD [8+esp],eax
+ mov DWORD [12+esp],edx
+ mov eax,DWORD [48+esp]
+ mov ebx,DWORD [56+esp]
+ call __mul_1x1_ialu
+ mov DWORD [esp],eax
+ mov DWORD [4+esp],edx
+ mov eax,DWORD [44+esp]
+ mov ebx,DWORD [52+esp]
+ xor eax,DWORD [48+esp]
+ xor ebx,DWORD [56+esp]
+ call __mul_1x1_ialu
+ mov ebp,DWORD [40+esp]
+ mov ebx,DWORD [esp]
+ mov ecx,DWORD [4+esp]
+ mov edi,DWORD [8+esp]
+ mov esi,DWORD [12+esp]
+ xor eax,edx
+ xor edx,ecx
+ xor eax,ebx
+ mov DWORD [ebp],ebx
+ xor edx,edi
+ mov DWORD [12+ebp],esi
+ xor eax,esi
+ add esp,20
+ xor edx,esi
+ pop edi
+ xor eax,edx
+ pop esi
+ mov DWORD [8+ebp],edx
+ pop ebx
+ mov DWORD [4+ebp],eax
+ pop ebp
+ ret
+db 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105
+db 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32
+db 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
+db 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
+db 62,0
+segment .bss
+common _OPENSSL_ia32cap_P 16
diff --git a/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/x86-mont.asm b/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/x86-mont.asm
new file mode 100644
index 0000000000..090630c3a0
--- /dev/null
+++ b/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/x86-mont.asm
@@ -0,0 +1,479 @@
+%ifidn __OUTPUT_FORMAT__,obj
+section code use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+$@feat.00 equ 1
+section .text code align=64
+%else
+section .text code
+%endif
+;extern _OPENSSL_ia32cap_P
+global _bn_mul_mont
+align 16
+_bn_mul_mont:
+L$_bn_mul_mont_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ xor eax,eax
+ mov edi,DWORD [40+esp]
+ cmp edi,4
+ jl NEAR L$000just_leave
+ lea esi,[20+esp]
+ lea edx,[24+esp]
+ add edi,2
+ neg edi
+ lea ebp,[edi*4+esp-32]
+ neg edi
+ mov eax,ebp
+ sub eax,edx
+ and eax,2047
+ sub ebp,eax
+ xor edx,ebp
+ and edx,2048
+ xor edx,2048
+ sub ebp,edx
+ and ebp,-64
+ mov eax,esp
+ sub eax,ebp
+ and eax,-4096
+ mov edx,esp
+ lea esp,[eax*1+ebp]
+ mov eax,DWORD [esp]
+ cmp esp,ebp
+ ja NEAR L$001page_walk
+ jmp NEAR L$002page_walk_done
+align 16
+L$001page_walk:
+ lea esp,[esp-4096]
+ mov eax,DWORD [esp]
+ cmp esp,ebp
+ ja NEAR L$001page_walk
+L$002page_walk_done:
+ mov eax,DWORD [esi]
+ mov ebx,DWORD [4+esi]
+ mov ecx,DWORD [8+esi]
+ mov ebp,DWORD [12+esi]
+ mov esi,DWORD [16+esi]
+ mov esi,DWORD [esi]
+ mov DWORD [4+esp],eax
+ mov DWORD [8+esp],ebx
+ mov DWORD [12+esp],ecx
+ mov DWORD [16+esp],ebp
+ mov DWORD [20+esp],esi
+ lea ebx,[edi-3]
+ mov DWORD [24+esp],edx
+ lea eax,[_OPENSSL_ia32cap_P]
+ bt DWORD [eax],26
+ jnc NEAR L$003non_sse2
+ mov eax,-1
+ movd mm7,eax
+ mov esi,DWORD [8+esp]
+ mov edi,DWORD [12+esp]
+ mov ebp,DWORD [16+esp]
+ xor edx,edx
+ xor ecx,ecx
+ movd mm4,DWORD [edi]
+ movd mm5,DWORD [esi]
+ movd mm3,DWORD [ebp]
+ pmuludq mm5,mm4
+ movq mm2,mm5
+ movq mm0,mm5
+ pand mm0,mm7
+ pmuludq mm5,[20+esp]
+ pmuludq mm3,mm5
+ paddq mm3,mm0
+ movd mm1,DWORD [4+ebp]
+ movd mm0,DWORD [4+esi]
+ psrlq mm2,32
+ psrlq mm3,32
+ inc ecx
+align 16
+L$0041st:
+ pmuludq mm0,mm4
+ pmuludq mm1,mm5
+ paddq mm2,mm0
+ paddq mm3,mm1
+ movq mm0,mm2
+ pand mm0,mm7
+ movd mm1,DWORD [4+ecx*4+ebp]
+ paddq mm3,mm0
+ movd mm0,DWORD [4+ecx*4+esi]
+ psrlq mm2,32
+ movd DWORD [28+ecx*4+esp],mm3
+ psrlq mm3,32
+ lea ecx,[1+ecx]
+ cmp ecx,ebx
+ jl NEAR L$0041st
+ pmuludq mm0,mm4
+ pmuludq mm1,mm5
+ paddq mm2,mm0
+ paddq mm3,mm1
+ movq mm0,mm2
+ pand mm0,mm7
+ paddq mm3,mm0
+ movd DWORD [28+ecx*4+esp],mm3
+ psrlq mm2,32
+ psrlq mm3,32
+ paddq mm3,mm2
+ movq [32+ebx*4+esp],mm3
+ inc edx
+L$005outer:
+ xor ecx,ecx
+ movd mm4,DWORD [edx*4+edi]
+ movd mm5,DWORD [esi]
+ movd mm6,DWORD [32+esp]
+ movd mm3,DWORD [ebp]
+ pmuludq mm5,mm4
+ paddq mm5,mm6
+ movq mm0,mm5
+ movq mm2,mm5
+ pand mm0,mm7
+ pmuludq mm5,[20+esp]
+ pmuludq mm3,mm5
+ paddq mm3,mm0
+ movd mm6,DWORD [36+esp]
+ movd mm1,DWORD [4+ebp]
+ movd mm0,DWORD [4+esi]
+ psrlq mm2,32
+ psrlq mm3,32
+ paddq mm2,mm6
+ inc ecx
+ dec ebx
+L$006inner:
+ pmuludq mm0,mm4
+ pmuludq mm1,mm5
+ paddq mm2,mm0
+ paddq mm3,mm1
+ movq mm0,mm2
+ movd mm6,DWORD [36+ecx*4+esp]
+ pand mm0,mm7
+ movd mm1,DWORD [4+ecx*4+ebp]
+ paddq mm3,mm0
+ movd mm0,DWORD [4+ecx*4+esi]
+ psrlq mm2,32
+ movd DWORD [28+ecx*4+esp],mm3
+ psrlq mm3,32
+ paddq mm2,mm6
+ dec ebx
+ lea ecx,[1+ecx]
+ jnz NEAR L$006inner
+ mov ebx,ecx
+ pmuludq mm0,mm4
+ pmuludq mm1,mm5
+ paddq mm2,mm0
+ paddq mm3,mm1
+ movq mm0,mm2
+ pand mm0,mm7
+ paddq mm3,mm0
+ movd DWORD [28+ecx*4+esp],mm3
+ psrlq mm2,32
+ psrlq mm3,32
+ movd mm6,DWORD [36+ebx*4+esp]
+ paddq mm3,mm2
+ paddq mm3,mm6
+ movq [32+ebx*4+esp],mm3
+ lea edx,[1+edx]
+ cmp edx,ebx
+ jle NEAR L$005outer
+ emms
+ jmp NEAR L$007common_tail
+align 16
+L$003non_sse2:
+ mov esi,DWORD [8+esp]
+ lea ebp,[1+ebx]
+ mov edi,DWORD [12+esp]
+ xor ecx,ecx
+ mov edx,esi
+ and ebp,1
+ sub edx,edi
+ lea eax,[4+ebx*4+edi]
+ or ebp,edx
+ mov edi,DWORD [edi]
+ jz NEAR L$008bn_sqr_mont
+ mov DWORD [28+esp],eax
+ mov eax,DWORD [esi]
+ xor edx,edx
+align 16
+L$009mull:
+ mov ebp,edx
+ mul edi
+ add ebp,eax
+ lea ecx,[1+ecx]
+ adc edx,0
+ mov eax,DWORD [ecx*4+esi]
+ cmp ecx,ebx
+ mov DWORD [28+ecx*4+esp],ebp
+ jl NEAR L$009mull
+ mov ebp,edx
+ mul edi
+ mov edi,DWORD [20+esp]
+ add eax,ebp
+ mov esi,DWORD [16+esp]
+ adc edx,0
+ imul edi,DWORD [32+esp]
+ mov DWORD [32+ebx*4+esp],eax
+ xor ecx,ecx
+ mov DWORD [36+ebx*4+esp],edx
+ mov DWORD [40+ebx*4+esp],ecx
+ mov eax,DWORD [esi]
+ mul edi
+ add eax,DWORD [32+esp]
+ mov eax,DWORD [4+esi]
+ adc edx,0
+ inc ecx
+ jmp NEAR L$0102ndmadd
+align 16
+L$0111stmadd:
+ mov ebp,edx
+ mul edi
+ add ebp,DWORD [32+ecx*4+esp]
+ lea ecx,[1+ecx]
+ adc edx,0
+ add ebp,eax
+ mov eax,DWORD [ecx*4+esi]
+ adc edx,0
+ cmp ecx,ebx
+ mov DWORD [28+ecx*4+esp],ebp
+ jl NEAR L$0111stmadd
+ mov ebp,edx
+ mul edi
+ add eax,DWORD [32+ebx*4+esp]
+ mov edi,DWORD [20+esp]
+ adc edx,0
+ mov esi,DWORD [16+esp]
+ add ebp,eax
+ adc edx,0
+ imul edi,DWORD [32+esp]
+ xor ecx,ecx
+ add edx,DWORD [36+ebx*4+esp]
+ mov DWORD [32+ebx*4+esp],ebp
+ adc ecx,0
+ mov eax,DWORD [esi]
+ mov DWORD [36+ebx*4+esp],edx
+ mov DWORD [40+ebx*4+esp],ecx
+ mul edi
+ add eax,DWORD [32+esp]
+ mov eax,DWORD [4+esi]
+ adc edx,0
+ mov ecx,1
+align 16
+L$0102ndmadd:
+ mov ebp,edx
+ mul edi
+ add ebp,DWORD [32+ecx*4+esp]
+ lea ecx,[1+ecx]
+ adc edx,0
+ add ebp,eax
+ mov eax,DWORD [ecx*4+esi]
+ adc edx,0
+ cmp ecx,ebx
+ mov DWORD [24+ecx*4+esp],ebp
+ jl NEAR L$0102ndmadd
+ mov ebp,edx
+ mul edi
+ add ebp,DWORD [32+ebx*4+esp]
+ adc edx,0
+ add ebp,eax
+ adc edx,0
+ mov DWORD [28+ebx*4+esp],ebp
+ xor eax,eax
+ mov ecx,DWORD [12+esp]
+ add edx,DWORD [36+ebx*4+esp]
+ adc eax,DWORD [40+ebx*4+esp]
+ lea ecx,[4+ecx]
+ mov DWORD [32+ebx*4+esp],edx
+ cmp ecx,DWORD [28+esp]
+ mov DWORD [36+ebx*4+esp],eax
+ je NEAR L$007common_tail
+ mov edi,DWORD [ecx]
+ mov esi,DWORD [8+esp]
+ mov DWORD [12+esp],ecx
+ xor ecx,ecx
+ xor edx,edx
+ mov eax,DWORD [esi]
+ jmp NEAR L$0111stmadd
+align 16
+L$008bn_sqr_mont:
+ mov DWORD [esp],ebx
+ mov DWORD [12+esp],ecx
+ mov eax,edi
+ mul edi
+ mov DWORD [32+esp],eax
+ mov ebx,edx
+ shr edx,1
+ and ebx,1
+ inc ecx
+align 16
+L$012sqr:
+ mov eax,DWORD [ecx*4+esi]
+ mov ebp,edx
+ mul edi
+ add eax,ebp
+ lea ecx,[1+ecx]
+ adc edx,0
+ lea ebp,[eax*2+ebx]
+ shr eax,31
+ cmp ecx,DWORD [esp]
+ mov ebx,eax
+ mov DWORD [28+ecx*4+esp],ebp
+ jl NEAR L$012sqr
+ mov eax,DWORD [ecx*4+esi]
+ mov ebp,edx
+ mul edi
+ add eax,ebp
+ mov edi,DWORD [20+esp]
+ adc edx,0
+ mov esi,DWORD [16+esp]
+ lea ebp,[eax*2+ebx]
+ imul edi,DWORD [32+esp]
+ shr eax,31
+ mov DWORD [32+ecx*4+esp],ebp
+ lea ebp,[edx*2+eax]
+ mov eax,DWORD [esi]
+ shr edx,31
+ mov DWORD [36+ecx*4+esp],ebp
+ mov DWORD [40+ecx*4+esp],edx
+ mul edi
+ add eax,DWORD [32+esp]
+ mov ebx,ecx
+ adc edx,0
+ mov eax,DWORD [4+esi]
+ mov ecx,1
+align 16
+L$0133rdmadd:
+ mov ebp,edx
+ mul edi
+ add ebp,DWORD [32+ecx*4+esp]
+ adc edx,0
+ add ebp,eax
+ mov eax,DWORD [4+ecx*4+esi]
+ adc edx,0
+ mov DWORD [28+ecx*4+esp],ebp
+ mov ebp,edx
+ mul edi
+ add ebp,DWORD [36+ecx*4+esp]
+ lea ecx,[2+ecx]
+ adc edx,0
+ add ebp,eax
+ mov eax,DWORD [ecx*4+esi]
+ adc edx,0
+ cmp ecx,ebx
+ mov DWORD [24+ecx*4+esp],ebp
+ jl NEAR L$0133rdmadd
+ mov ebp,edx
+ mul edi
+ add ebp,DWORD [32+ebx*4+esp]
+ adc edx,0
+ add ebp,eax
+ adc edx,0
+ mov DWORD [28+ebx*4+esp],ebp
+ mov ecx,DWORD [12+esp]
+ xor eax,eax
+ mov esi,DWORD [8+esp]
+ add edx,DWORD [36+ebx*4+esp]
+ adc eax,DWORD [40+ebx*4+esp]
+ mov DWORD [32+ebx*4+esp],edx
+ cmp ecx,ebx
+ mov DWORD [36+ebx*4+esp],eax
+ je NEAR L$007common_tail
+ mov edi,DWORD [4+ecx*4+esi]
+ lea ecx,[1+ecx]
+ mov eax,edi
+ mov DWORD [12+esp],ecx
+ mul edi
+ add eax,DWORD [32+ecx*4+esp]
+ adc edx,0
+ mov DWORD [32+ecx*4+esp],eax
+ xor ebp,ebp
+ cmp ecx,ebx
+ lea ecx,[1+ecx]
+ je NEAR L$014sqrlast
+ mov ebx,edx
+ shr edx,1
+ and ebx,1
+align 16
+L$015sqradd:
+ mov eax,DWORD [ecx*4+esi]
+ mov ebp,edx
+ mul edi
+ add eax,ebp
+ lea ebp,[eax*1+eax]
+ adc edx,0
+ shr eax,31
+ add ebp,DWORD [32+ecx*4+esp]
+ lea ecx,[1+ecx]
+ adc eax,0
+ add ebp,ebx
+ adc eax,0
+ cmp ecx,DWORD [esp]
+ mov DWORD [28+ecx*4+esp],ebp
+ mov ebx,eax
+ jle NEAR L$015sqradd
+ mov ebp,edx
+ add edx,edx
+ shr ebp,31
+ add edx,ebx
+ adc ebp,0
+L$014sqrlast:
+ mov edi,DWORD [20+esp]
+ mov esi,DWORD [16+esp]
+ imul edi,DWORD [32+esp]
+ add edx,DWORD [32+ecx*4+esp]
+ mov eax,DWORD [esi]
+ adc ebp,0
+ mov DWORD [32+ecx*4+esp],edx
+ mov DWORD [36+ecx*4+esp],ebp
+ mul edi
+ add eax,DWORD [32+esp]
+ lea ebx,[ecx-1]
+ adc edx,0
+ mov ecx,1
+ mov eax,DWORD [4+esi]
+ jmp NEAR L$0133rdmadd
+align 16
+L$007common_tail:
+ mov ebp,DWORD [16+esp]
+ mov edi,DWORD [4+esp]
+ lea esi,[32+esp]
+ mov eax,DWORD [esi]
+ mov ecx,ebx
+ xor edx,edx
+align 16
+L$016sub:
+ sbb eax,DWORD [edx*4+ebp]
+ mov DWORD [edx*4+edi],eax
+ dec ecx
+ mov eax,DWORD [4+edx*4+esi]
+ lea edx,[1+edx]
+ jge NEAR L$016sub
+ sbb eax,0
+ mov edx,-1
+ xor edx,eax
+ jmp NEAR L$017copy
+align 16
+L$017copy:
+ mov esi,DWORD [32+ebx*4+esp]
+ mov ebp,DWORD [ebx*4+edi]
+ mov DWORD [32+ebx*4+esp],ecx
+ and esi,eax
+ and ebp,edx
+ or ebp,esi
+ mov DWORD [ebx*4+edi],ebp
+ dec ebx
+ jge NEAR L$017copy
+ mov esp,DWORD [24+esp]
+ mov eax,1
+L$000just_leave:
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+db 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
+db 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
+db 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
+db 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
+db 111,114,103,62,0
+segment .bss
+common _OPENSSL_ia32cap_P 16