summaryrefslogtreecommitdiff
path: root/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/bn-586.asm
diff options
context:
space:
mode:
Diffstat (limited to 'deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/bn-586.asm')
-rw-r--r--deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/bn-586.asm1515
1 files changed, 1515 insertions, 0 deletions
diff --git a/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/bn-586.asm b/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/bn-586.asm
new file mode 100644
index 0000000000..82002b353b
--- /dev/null
+++ b/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/bn-586.asm
@@ -0,0 +1,1515 @@
+%ifidn __OUTPUT_FORMAT__,obj
+section code use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+$@feat.00 equ 1
+section .text code align=64
+%else
+section .text code
+%endif
+;extern _OPENSSL_ia32cap_P
+global _bn_mul_add_words
+align 16
+_bn_mul_add_words:
+L$_bn_mul_add_words_begin:
+ lea eax,[_OPENSSL_ia32cap_P]
+ bt DWORD [eax],26
+ jnc NEAR L$000maw_non_sse2
+ mov eax,DWORD [4+esp]
+ mov edx,DWORD [8+esp]
+ mov ecx,DWORD [12+esp]
+ movd mm0,DWORD [16+esp]
+ pxor mm1,mm1
+ jmp NEAR L$001maw_sse2_entry
+align 16
+L$002maw_sse2_unrolled:
+ movd mm3,DWORD [eax]
+ paddq mm1,mm3
+ movd mm2,DWORD [edx]
+ pmuludq mm2,mm0
+ movd mm4,DWORD [4+edx]
+ pmuludq mm4,mm0
+ movd mm6,DWORD [8+edx]
+ pmuludq mm6,mm0
+ movd mm7,DWORD [12+edx]
+ pmuludq mm7,mm0
+ paddq mm1,mm2
+ movd mm3,DWORD [4+eax]
+ paddq mm3,mm4
+ movd mm5,DWORD [8+eax]
+ paddq mm5,mm6
+ movd mm4,DWORD [12+eax]
+ paddq mm7,mm4
+ movd DWORD [eax],mm1
+ movd mm2,DWORD [16+edx]
+ pmuludq mm2,mm0
+ psrlq mm1,32
+ movd mm4,DWORD [20+edx]
+ pmuludq mm4,mm0
+ paddq mm1,mm3
+ movd mm6,DWORD [24+edx]
+ pmuludq mm6,mm0
+ movd DWORD [4+eax],mm1
+ psrlq mm1,32
+ movd mm3,DWORD [28+edx]
+ add edx,32
+ pmuludq mm3,mm0
+ paddq mm1,mm5
+ movd mm5,DWORD [16+eax]
+ paddq mm2,mm5
+ movd DWORD [8+eax],mm1
+ psrlq mm1,32
+ paddq mm1,mm7
+ movd mm5,DWORD [20+eax]
+ paddq mm4,mm5
+ movd DWORD [12+eax],mm1
+ psrlq mm1,32
+ paddq mm1,mm2
+ movd mm5,DWORD [24+eax]
+ paddq mm6,mm5
+ movd DWORD [16+eax],mm1
+ psrlq mm1,32
+ paddq mm1,mm4
+ movd mm5,DWORD [28+eax]
+ paddq mm3,mm5
+ movd DWORD [20+eax],mm1
+ psrlq mm1,32
+ paddq mm1,mm6
+ movd DWORD [24+eax],mm1
+ psrlq mm1,32
+ paddq mm1,mm3
+ movd DWORD [28+eax],mm1
+ lea eax,[32+eax]
+ psrlq mm1,32
+ sub ecx,8
+ jz NEAR L$003maw_sse2_exit
+L$001maw_sse2_entry:
+ test ecx,4294967288
+ jnz NEAR L$002maw_sse2_unrolled
+align 4
+L$004maw_sse2_loop:
+ movd mm2,DWORD [edx]
+ movd mm3,DWORD [eax]
+ pmuludq mm2,mm0
+ lea edx,[4+edx]
+ paddq mm1,mm3
+ paddq mm1,mm2
+ movd DWORD [eax],mm1
+ sub ecx,1
+ psrlq mm1,32
+ lea eax,[4+eax]
+ jnz NEAR L$004maw_sse2_loop
+L$003maw_sse2_exit:
+ movd eax,mm1
+ emms
+ ret
+align 16
+L$000maw_non_sse2:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ ;
+ xor esi,esi
+ mov edi,DWORD [20+esp]
+ mov ecx,DWORD [28+esp]
+ mov ebx,DWORD [24+esp]
+ and ecx,4294967288
+ mov ebp,DWORD [32+esp]
+ push ecx
+ jz NEAR L$005maw_finish
+align 16
+L$006maw_loop:
+ ; Round 0
+ mov eax,DWORD [ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [edi]
+ adc edx,0
+ mov DWORD [edi],eax
+ mov esi,edx
+ ; Round 4
+ mov eax,DWORD [4+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [4+edi]
+ adc edx,0
+ mov DWORD [4+edi],eax
+ mov esi,edx
+ ; Round 8
+ mov eax,DWORD [8+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [8+edi]
+ adc edx,0
+ mov DWORD [8+edi],eax
+ mov esi,edx
+ ; Round 12
+ mov eax,DWORD [12+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [12+edi]
+ adc edx,0
+ mov DWORD [12+edi],eax
+ mov esi,edx
+ ; Round 16
+ mov eax,DWORD [16+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [16+edi]
+ adc edx,0
+ mov DWORD [16+edi],eax
+ mov esi,edx
+ ; Round 20
+ mov eax,DWORD [20+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [20+edi]
+ adc edx,0
+ mov DWORD [20+edi],eax
+ mov esi,edx
+ ; Round 24
+ mov eax,DWORD [24+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [24+edi]
+ adc edx,0
+ mov DWORD [24+edi],eax
+ mov esi,edx
+ ; Round 28
+ mov eax,DWORD [28+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [28+edi]
+ adc edx,0
+ mov DWORD [28+edi],eax
+ mov esi,edx
+ ;
+ sub ecx,8
+ lea ebx,[32+ebx]
+ lea edi,[32+edi]
+ jnz NEAR L$006maw_loop
+L$005maw_finish:
+ mov ecx,DWORD [32+esp]
+ and ecx,7
+ jnz NEAR L$007maw_finish2
+ jmp NEAR L$008maw_end
+L$007maw_finish2:
+ ; Tail Round 0
+ mov eax,DWORD [ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [edi]
+ adc edx,0
+ dec ecx
+ mov DWORD [edi],eax
+ mov esi,edx
+ jz NEAR L$008maw_end
+ ; Tail Round 1
+ mov eax,DWORD [4+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [4+edi]
+ adc edx,0
+ dec ecx
+ mov DWORD [4+edi],eax
+ mov esi,edx
+ jz NEAR L$008maw_end
+ ; Tail Round 2
+ mov eax,DWORD [8+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [8+edi]
+ adc edx,0
+ dec ecx
+ mov DWORD [8+edi],eax
+ mov esi,edx
+ jz NEAR L$008maw_end
+ ; Tail Round 3
+ mov eax,DWORD [12+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [12+edi]
+ adc edx,0
+ dec ecx
+ mov DWORD [12+edi],eax
+ mov esi,edx
+ jz NEAR L$008maw_end
+ ; Tail Round 4
+ mov eax,DWORD [16+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [16+edi]
+ adc edx,0
+ dec ecx
+ mov DWORD [16+edi],eax
+ mov esi,edx
+ jz NEAR L$008maw_end
+ ; Tail Round 5
+ mov eax,DWORD [20+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [20+edi]
+ adc edx,0
+ dec ecx
+ mov DWORD [20+edi],eax
+ mov esi,edx
+ jz NEAR L$008maw_end
+ ; Tail Round 6
+ mov eax,DWORD [24+ebx]
+ mul ebp
+ add eax,esi
+ adc edx,0
+ add eax,DWORD [24+edi]
+ adc edx,0
+ mov DWORD [24+edi],eax
+ mov esi,edx
+L$008maw_end:
+ mov eax,esi
+ pop ecx
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _bn_mul_words
+align 16
+_bn_mul_words:
+L$_bn_mul_words_begin:
+ lea eax,[_OPENSSL_ia32cap_P]
+ bt DWORD [eax],26
+ jnc NEAR L$009mw_non_sse2
+ mov eax,DWORD [4+esp]
+ mov edx,DWORD [8+esp]
+ mov ecx,DWORD [12+esp]
+ movd mm0,DWORD [16+esp]
+ pxor mm1,mm1
+align 16
+L$010mw_sse2_loop:
+ movd mm2,DWORD [edx]
+ pmuludq mm2,mm0
+ lea edx,[4+edx]
+ paddq mm1,mm2
+ movd DWORD [eax],mm1
+ sub ecx,1
+ psrlq mm1,32
+ lea eax,[4+eax]
+ jnz NEAR L$010mw_sse2_loop
+ movd eax,mm1
+ emms
+ ret
+align 16
+L$009mw_non_sse2:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ ;
+ xor esi,esi
+ mov edi,DWORD [20+esp]
+ mov ebx,DWORD [24+esp]
+ mov ebp,DWORD [28+esp]
+ mov ecx,DWORD [32+esp]
+ and ebp,4294967288
+ jz NEAR L$011mw_finish
+L$012mw_loop:
+ ; Round 0
+ mov eax,DWORD [ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [edi],eax
+ mov esi,edx
+ ; Round 4
+ mov eax,DWORD [4+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [4+edi],eax
+ mov esi,edx
+ ; Round 8
+ mov eax,DWORD [8+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [8+edi],eax
+ mov esi,edx
+ ; Round 12
+ mov eax,DWORD [12+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [12+edi],eax
+ mov esi,edx
+ ; Round 16
+ mov eax,DWORD [16+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [16+edi],eax
+ mov esi,edx
+ ; Round 20
+ mov eax,DWORD [20+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [20+edi],eax
+ mov esi,edx
+ ; Round 24
+ mov eax,DWORD [24+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [24+edi],eax
+ mov esi,edx
+ ; Round 28
+ mov eax,DWORD [28+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [28+edi],eax
+ mov esi,edx
+ ;
+ add ebx,32
+ add edi,32
+ sub ebp,8
+ jz NEAR L$011mw_finish
+ jmp NEAR L$012mw_loop
+L$011mw_finish:
+ mov ebp,DWORD [28+esp]
+ and ebp,7
+ jnz NEAR L$013mw_finish2
+ jmp NEAR L$014mw_end
+L$013mw_finish2:
+ ; Tail Round 0
+ mov eax,DWORD [ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [edi],eax
+ mov esi,edx
+ dec ebp
+ jz NEAR L$014mw_end
+ ; Tail Round 1
+ mov eax,DWORD [4+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [4+edi],eax
+ mov esi,edx
+ dec ebp
+ jz NEAR L$014mw_end
+ ; Tail Round 2
+ mov eax,DWORD [8+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [8+edi],eax
+ mov esi,edx
+ dec ebp
+ jz NEAR L$014mw_end
+ ; Tail Round 3
+ mov eax,DWORD [12+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [12+edi],eax
+ mov esi,edx
+ dec ebp
+ jz NEAR L$014mw_end
+ ; Tail Round 4
+ mov eax,DWORD [16+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [16+edi],eax
+ mov esi,edx
+ dec ebp
+ jz NEAR L$014mw_end
+ ; Tail Round 5
+ mov eax,DWORD [20+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [20+edi],eax
+ mov esi,edx
+ dec ebp
+ jz NEAR L$014mw_end
+ ; Tail Round 6
+ mov eax,DWORD [24+ebx]
+ mul ecx
+ add eax,esi
+ adc edx,0
+ mov DWORD [24+edi],eax
+ mov esi,edx
+L$014mw_end:
+ mov eax,esi
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _bn_sqr_words
+align 16
+_bn_sqr_words:
+L$_bn_sqr_words_begin:
+ lea eax,[_OPENSSL_ia32cap_P]
+ bt DWORD [eax],26
+ jnc NEAR L$015sqr_non_sse2
+ mov eax,DWORD [4+esp]
+ mov edx,DWORD [8+esp]
+ mov ecx,DWORD [12+esp]
+align 16
+L$016sqr_sse2_loop:
+ movd mm0,DWORD [edx]
+ pmuludq mm0,mm0
+ lea edx,[4+edx]
+ movq [eax],mm0
+ sub ecx,1
+ lea eax,[8+eax]
+ jnz NEAR L$016sqr_sse2_loop
+ emms
+ ret
+align 16
+L$015sqr_non_sse2:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ ;
+ mov esi,DWORD [20+esp]
+ mov edi,DWORD [24+esp]
+ mov ebx,DWORD [28+esp]
+ and ebx,4294967288
+ jz NEAR L$017sw_finish
+L$018sw_loop:
+ ; Round 0
+ mov eax,DWORD [edi]
+ mul eax
+ mov DWORD [esi],eax
+ mov DWORD [4+esi],edx
+ ; Round 4
+ mov eax,DWORD [4+edi]
+ mul eax
+ mov DWORD [8+esi],eax
+ mov DWORD [12+esi],edx
+ ; Round 8
+ mov eax,DWORD [8+edi]
+ mul eax
+ mov DWORD [16+esi],eax
+ mov DWORD [20+esi],edx
+ ; Round 12
+ mov eax,DWORD [12+edi]
+ mul eax
+ mov DWORD [24+esi],eax
+ mov DWORD [28+esi],edx
+ ; Round 16
+ mov eax,DWORD [16+edi]
+ mul eax
+ mov DWORD [32+esi],eax
+ mov DWORD [36+esi],edx
+ ; Round 20
+ mov eax,DWORD [20+edi]
+ mul eax
+ mov DWORD [40+esi],eax
+ mov DWORD [44+esi],edx
+ ; Round 24
+ mov eax,DWORD [24+edi]
+ mul eax
+ mov DWORD [48+esi],eax
+ mov DWORD [52+esi],edx
+ ; Round 28
+ mov eax,DWORD [28+edi]
+ mul eax
+ mov DWORD [56+esi],eax
+ mov DWORD [60+esi],edx
+ ;
+ add edi,32
+ add esi,64
+ sub ebx,8
+ jnz NEAR L$018sw_loop
+L$017sw_finish:
+ mov ebx,DWORD [28+esp]
+ and ebx,7
+ jz NEAR L$019sw_end
+ ; Tail Round 0
+ mov eax,DWORD [edi]
+ mul eax
+ mov DWORD [esi],eax
+ dec ebx
+ mov DWORD [4+esi],edx
+ jz NEAR L$019sw_end
+ ; Tail Round 1
+ mov eax,DWORD [4+edi]
+ mul eax
+ mov DWORD [8+esi],eax
+ dec ebx
+ mov DWORD [12+esi],edx
+ jz NEAR L$019sw_end
+ ; Tail Round 2
+ mov eax,DWORD [8+edi]
+ mul eax
+ mov DWORD [16+esi],eax
+ dec ebx
+ mov DWORD [20+esi],edx
+ jz NEAR L$019sw_end
+ ; Tail Round 3
+ mov eax,DWORD [12+edi]
+ mul eax
+ mov DWORD [24+esi],eax
+ dec ebx
+ mov DWORD [28+esi],edx
+ jz NEAR L$019sw_end
+ ; Tail Round 4
+ mov eax,DWORD [16+edi]
+ mul eax
+ mov DWORD [32+esi],eax
+ dec ebx
+ mov DWORD [36+esi],edx
+ jz NEAR L$019sw_end
+ ; Tail Round 5
+ mov eax,DWORD [20+edi]
+ mul eax
+ mov DWORD [40+esi],eax
+ dec ebx
+ mov DWORD [44+esi],edx
+ jz NEAR L$019sw_end
+ ; Tail Round 6
+ mov eax,DWORD [24+edi]
+ mul eax
+ mov DWORD [48+esi],eax
+ mov DWORD [52+esi],edx
+L$019sw_end:
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _bn_div_words
+align 16
+_bn_div_words:
+L$_bn_div_words_begin:
+ mov edx,DWORD [4+esp]
+ mov eax,DWORD [8+esp]
+ mov ecx,DWORD [12+esp]
+ div ecx
+ ret
+global _bn_add_words
+align 16
+_bn_add_words:
+L$_bn_add_words_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ ;
+ mov ebx,DWORD [20+esp]
+ mov esi,DWORD [24+esp]
+ mov edi,DWORD [28+esp]
+ mov ebp,DWORD [32+esp]
+ xor eax,eax
+ and ebp,4294967288
+ jz NEAR L$020aw_finish
+L$021aw_loop:
+ ; Round 0
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ ; Round 1
+ mov ecx,DWORD [4+esi]
+ mov edx,DWORD [4+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ mov DWORD [4+ebx],ecx
+ ; Round 2
+ mov ecx,DWORD [8+esi]
+ mov edx,DWORD [8+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ mov DWORD [8+ebx],ecx
+ ; Round 3
+ mov ecx,DWORD [12+esi]
+ mov edx,DWORD [12+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ mov DWORD [12+ebx],ecx
+ ; Round 4
+ mov ecx,DWORD [16+esi]
+ mov edx,DWORD [16+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ mov DWORD [16+ebx],ecx
+ ; Round 5
+ mov ecx,DWORD [20+esi]
+ mov edx,DWORD [20+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ mov DWORD [20+ebx],ecx
+ ; Round 6
+ mov ecx,DWORD [24+esi]
+ mov edx,DWORD [24+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ mov DWORD [24+ebx],ecx
+ ; Round 7
+ mov ecx,DWORD [28+esi]
+ mov edx,DWORD [28+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ mov DWORD [28+ebx],ecx
+ ;
+ add esi,32
+ add edi,32
+ add ebx,32
+ sub ebp,8
+ jnz NEAR L$021aw_loop
+L$020aw_finish:
+ mov ebp,DWORD [32+esp]
+ and ebp,7
+ jz NEAR L$022aw_end
+ ; Tail Round 0
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [ebx],ecx
+ jz NEAR L$022aw_end
+ ; Tail Round 1
+ mov ecx,DWORD [4+esi]
+ mov edx,DWORD [4+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [4+ebx],ecx
+ jz NEAR L$022aw_end
+ ; Tail Round 2
+ mov ecx,DWORD [8+esi]
+ mov edx,DWORD [8+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [8+ebx],ecx
+ jz NEAR L$022aw_end
+ ; Tail Round 3
+ mov ecx,DWORD [12+esi]
+ mov edx,DWORD [12+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [12+ebx],ecx
+ jz NEAR L$022aw_end
+ ; Tail Round 4
+ mov ecx,DWORD [16+esi]
+ mov edx,DWORD [16+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [16+ebx],ecx
+ jz NEAR L$022aw_end
+ ; Tail Round 5
+ mov ecx,DWORD [20+esi]
+ mov edx,DWORD [20+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [20+ebx],ecx
+ jz NEAR L$022aw_end
+ ; Tail Round 6
+ mov ecx,DWORD [24+esi]
+ mov edx,DWORD [24+edi]
+ add ecx,eax
+ mov eax,0
+ adc eax,eax
+ add ecx,edx
+ adc eax,0
+ mov DWORD [24+ebx],ecx
+L$022aw_end:
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _bn_sub_words
+align 16
+_bn_sub_words:
+L$_bn_sub_words_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ ;
+ mov ebx,DWORD [20+esp]
+ mov esi,DWORD [24+esp]
+ mov edi,DWORD [28+esp]
+ mov ebp,DWORD [32+esp]
+ xor eax,eax
+ and ebp,4294967288
+ jz NEAR L$023aw_finish
+L$024aw_loop:
+ ; Round 0
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ ; Round 1
+ mov ecx,DWORD [4+esi]
+ mov edx,DWORD [4+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [4+ebx],ecx
+ ; Round 2
+ mov ecx,DWORD [8+esi]
+ mov edx,DWORD [8+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [8+ebx],ecx
+ ; Round 3
+ mov ecx,DWORD [12+esi]
+ mov edx,DWORD [12+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [12+ebx],ecx
+ ; Round 4
+ mov ecx,DWORD [16+esi]
+ mov edx,DWORD [16+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [16+ebx],ecx
+ ; Round 5
+ mov ecx,DWORD [20+esi]
+ mov edx,DWORD [20+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [20+ebx],ecx
+ ; Round 6
+ mov ecx,DWORD [24+esi]
+ mov edx,DWORD [24+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [24+ebx],ecx
+ ; Round 7
+ mov ecx,DWORD [28+esi]
+ mov edx,DWORD [28+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [28+ebx],ecx
+ ;
+ add esi,32
+ add edi,32
+ add ebx,32
+ sub ebp,8
+ jnz NEAR L$024aw_loop
+L$023aw_finish:
+ mov ebp,DWORD [32+esp]
+ and ebp,7
+ jz NEAR L$025aw_end
+ ; Tail Round 0
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [ebx],ecx
+ jz NEAR L$025aw_end
+ ; Tail Round 1
+ mov ecx,DWORD [4+esi]
+ mov edx,DWORD [4+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [4+ebx],ecx
+ jz NEAR L$025aw_end
+ ; Tail Round 2
+ mov ecx,DWORD [8+esi]
+ mov edx,DWORD [8+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [8+ebx],ecx
+ jz NEAR L$025aw_end
+ ; Tail Round 3
+ mov ecx,DWORD [12+esi]
+ mov edx,DWORD [12+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [12+ebx],ecx
+ jz NEAR L$025aw_end
+ ; Tail Round 4
+ mov ecx,DWORD [16+esi]
+ mov edx,DWORD [16+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [16+ebx],ecx
+ jz NEAR L$025aw_end
+ ; Tail Round 5
+ mov ecx,DWORD [20+esi]
+ mov edx,DWORD [20+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [20+ebx],ecx
+ jz NEAR L$025aw_end
+ ; Tail Round 6
+ mov ecx,DWORD [24+esi]
+ mov edx,DWORD [24+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [24+ebx],ecx
+L$025aw_end:
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+global _bn_sub_part_words
+align 16
+_bn_sub_part_words:
+L$_bn_sub_part_words_begin:
+ push ebp
+ push ebx
+ push esi
+ push edi
+ ;
+ mov ebx,DWORD [20+esp]
+ mov esi,DWORD [24+esp]
+ mov edi,DWORD [28+esp]
+ mov ebp,DWORD [32+esp]
+ xor eax,eax
+ and ebp,4294967288
+ jz NEAR L$026aw_finish
+L$027aw_loop:
+ ; Round 0
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ ; Round 1
+ mov ecx,DWORD [4+esi]
+ mov edx,DWORD [4+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [4+ebx],ecx
+ ; Round 2
+ mov ecx,DWORD [8+esi]
+ mov edx,DWORD [8+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [8+ebx],ecx
+ ; Round 3
+ mov ecx,DWORD [12+esi]
+ mov edx,DWORD [12+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [12+ebx],ecx
+ ; Round 4
+ mov ecx,DWORD [16+esi]
+ mov edx,DWORD [16+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [16+ebx],ecx
+ ; Round 5
+ mov ecx,DWORD [20+esi]
+ mov edx,DWORD [20+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [20+ebx],ecx
+ ; Round 6
+ mov ecx,DWORD [24+esi]
+ mov edx,DWORD [24+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [24+ebx],ecx
+ ; Round 7
+ mov ecx,DWORD [28+esi]
+ mov edx,DWORD [28+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [28+ebx],ecx
+ ;
+ add esi,32
+ add edi,32
+ add ebx,32
+ sub ebp,8
+ jnz NEAR L$027aw_loop
+L$026aw_finish:
+ mov ebp,DWORD [32+esp]
+ and ebp,7
+ jz NEAR L$028aw_end
+ ; Tail Round 0
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ add esi,4
+ add edi,4
+ add ebx,4
+ dec ebp
+ jz NEAR L$028aw_end
+ ; Tail Round 1
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ add esi,4
+ add edi,4
+ add ebx,4
+ dec ebp
+ jz NEAR L$028aw_end
+ ; Tail Round 2
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ add esi,4
+ add edi,4
+ add ebx,4
+ dec ebp
+ jz NEAR L$028aw_end
+ ; Tail Round 3
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ add esi,4
+ add edi,4
+ add ebx,4
+ dec ebp
+ jz NEAR L$028aw_end
+ ; Tail Round 4
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ add esi,4
+ add edi,4
+ add ebx,4
+ dec ebp
+ jz NEAR L$028aw_end
+ ; Tail Round 5
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ add esi,4
+ add edi,4
+ add ebx,4
+ dec ebp
+ jz NEAR L$028aw_end
+ ; Tail Round 6
+ mov ecx,DWORD [esi]
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ add esi,4
+ add edi,4
+ add ebx,4
+L$028aw_end:
+ cmp DWORD [36+esp],0
+ je NEAR L$029pw_end
+ mov ebp,DWORD [36+esp]
+ cmp ebp,0
+ je NEAR L$029pw_end
+ jge NEAR L$030pw_pos
+ ; pw_neg
+ mov edx,0
+ sub edx,ebp
+ mov ebp,edx
+ and ebp,4294967288
+ jz NEAR L$031pw_neg_finish
+L$032pw_neg_loop:
+ ; dl<0 Round 0
+ mov ecx,0
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [ebx],ecx
+ ; dl<0 Round 1
+ mov ecx,0
+ mov edx,DWORD [4+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [4+ebx],ecx
+ ; dl<0 Round 2
+ mov ecx,0
+ mov edx,DWORD [8+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [8+ebx],ecx
+ ; dl<0 Round 3
+ mov ecx,0
+ mov edx,DWORD [12+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [12+ebx],ecx
+ ; dl<0 Round 4
+ mov ecx,0
+ mov edx,DWORD [16+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [16+ebx],ecx
+ ; dl<0 Round 5
+ mov ecx,0
+ mov edx,DWORD [20+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [20+ebx],ecx
+ ; dl<0 Round 6
+ mov ecx,0
+ mov edx,DWORD [24+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [24+ebx],ecx
+ ; dl<0 Round 7
+ mov ecx,0
+ mov edx,DWORD [28+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [28+ebx],ecx
+ ;
+ add edi,32
+ add ebx,32
+ sub ebp,8
+ jnz NEAR L$032pw_neg_loop
+L$031pw_neg_finish:
+ mov edx,DWORD [36+esp]
+ mov ebp,0
+ sub ebp,edx
+ and ebp,7
+ jz NEAR L$029pw_end
+ ; dl<0 Tail Round 0
+ mov ecx,0
+ mov edx,DWORD [edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [ebx],ecx
+ jz NEAR L$029pw_end
+ ; dl<0 Tail Round 1
+ mov ecx,0
+ mov edx,DWORD [4+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [4+ebx],ecx
+ jz NEAR L$029pw_end
+ ; dl<0 Tail Round 2
+ mov ecx,0
+ mov edx,DWORD [8+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [8+ebx],ecx
+ jz NEAR L$029pw_end
+ ; dl<0 Tail Round 3
+ mov ecx,0
+ mov edx,DWORD [12+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [12+ebx],ecx
+ jz NEAR L$029pw_end
+ ; dl<0 Tail Round 4
+ mov ecx,0
+ mov edx,DWORD [16+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [16+ebx],ecx
+ jz NEAR L$029pw_end
+ ; dl<0 Tail Round 5
+ mov ecx,0
+ mov edx,DWORD [20+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ dec ebp
+ mov DWORD [20+ebx],ecx
+ jz NEAR L$029pw_end
+ ; dl<0 Tail Round 6
+ mov ecx,0
+ mov edx,DWORD [24+edi]
+ sub ecx,eax
+ mov eax,0
+ adc eax,eax
+ sub ecx,edx
+ adc eax,0
+ mov DWORD [24+ebx],ecx
+ jmp NEAR L$029pw_end
+L$030pw_pos:
+ and ebp,4294967288
+ jz NEAR L$033pw_pos_finish
+L$034pw_pos_loop:
+ ; dl>0 Round 0
+ mov ecx,DWORD [esi]
+ sub ecx,eax
+ mov DWORD [ebx],ecx
+ jnc NEAR L$035pw_nc0
+ ; dl>0 Round 1
+ mov ecx,DWORD [4+esi]
+ sub ecx,eax
+ mov DWORD [4+ebx],ecx
+ jnc NEAR L$036pw_nc1
+ ; dl>0 Round 2
+ mov ecx,DWORD [8+esi]
+ sub ecx,eax
+ mov DWORD [8+ebx],ecx
+ jnc NEAR L$037pw_nc2
+ ; dl>0 Round 3
+ mov ecx,DWORD [12+esi]
+ sub ecx,eax
+ mov DWORD [12+ebx],ecx
+ jnc NEAR L$038pw_nc3
+ ; dl>0 Round 4
+ mov ecx,DWORD [16+esi]
+ sub ecx,eax
+ mov DWORD [16+ebx],ecx
+ jnc NEAR L$039pw_nc4
+ ; dl>0 Round 5
+ mov ecx,DWORD [20+esi]
+ sub ecx,eax
+ mov DWORD [20+ebx],ecx
+ jnc NEAR L$040pw_nc5
+ ; dl>0 Round 6
+ mov ecx,DWORD [24+esi]
+ sub ecx,eax
+ mov DWORD [24+ebx],ecx
+ jnc NEAR L$041pw_nc6
+ ; dl>0 Round 7
+ mov ecx,DWORD [28+esi]
+ sub ecx,eax
+ mov DWORD [28+ebx],ecx
+ jnc NEAR L$042pw_nc7
+ ;
+ add esi,32
+ add ebx,32
+ sub ebp,8
+ jnz NEAR L$034pw_pos_loop
+L$033pw_pos_finish:
+ mov ebp,DWORD [36+esp]
+ and ebp,7
+ jz NEAR L$029pw_end
+ ; dl>0 Tail Round 0
+ mov ecx,DWORD [esi]
+ sub ecx,eax
+ mov DWORD [ebx],ecx
+ jnc NEAR L$043pw_tail_nc0
+ dec ebp
+ jz NEAR L$029pw_end
+ ; dl>0 Tail Round 1
+ mov ecx,DWORD [4+esi]
+ sub ecx,eax
+ mov DWORD [4+ebx],ecx
+ jnc NEAR L$044pw_tail_nc1
+ dec ebp
+ jz NEAR L$029pw_end
+ ; dl>0 Tail Round 2
+ mov ecx,DWORD [8+esi]
+ sub ecx,eax
+ mov DWORD [8+ebx],ecx
+ jnc NEAR L$045pw_tail_nc2
+ dec ebp
+ jz NEAR L$029pw_end
+ ; dl>0 Tail Round 3
+ mov ecx,DWORD [12+esi]
+ sub ecx,eax
+ mov DWORD [12+ebx],ecx
+ jnc NEAR L$046pw_tail_nc3
+ dec ebp
+ jz NEAR L$029pw_end
+ ; dl>0 Tail Round 4
+ mov ecx,DWORD [16+esi]
+ sub ecx,eax
+ mov DWORD [16+ebx],ecx
+ jnc NEAR L$047pw_tail_nc4
+ dec ebp
+ jz NEAR L$029pw_end
+ ; dl>0 Tail Round 5
+ mov ecx,DWORD [20+esi]
+ sub ecx,eax
+ mov DWORD [20+ebx],ecx
+ jnc NEAR L$048pw_tail_nc5
+ dec ebp
+ jz NEAR L$029pw_end
+ ; dl>0 Tail Round 6
+ mov ecx,DWORD [24+esi]
+ sub ecx,eax
+ mov DWORD [24+ebx],ecx
+ jnc NEAR L$049pw_tail_nc6
+ mov eax,1
+ jmp NEAR L$029pw_end
+L$050pw_nc_loop:
+ mov ecx,DWORD [esi]
+ mov DWORD [ebx],ecx
+L$035pw_nc0:
+ mov ecx,DWORD [4+esi]
+ mov DWORD [4+ebx],ecx
+L$036pw_nc1:
+ mov ecx,DWORD [8+esi]
+ mov DWORD [8+ebx],ecx
+L$037pw_nc2:
+ mov ecx,DWORD [12+esi]
+ mov DWORD [12+ebx],ecx
+L$038pw_nc3:
+ mov ecx,DWORD [16+esi]
+ mov DWORD [16+ebx],ecx
+L$039pw_nc4:
+ mov ecx,DWORD [20+esi]
+ mov DWORD [20+ebx],ecx
+L$040pw_nc5:
+ mov ecx,DWORD [24+esi]
+ mov DWORD [24+ebx],ecx
+L$041pw_nc6:
+ mov ecx,DWORD [28+esi]
+ mov DWORD [28+ebx],ecx
+L$042pw_nc7:
+ ;
+ add esi,32
+ add ebx,32
+ sub ebp,8
+ jnz NEAR L$050pw_nc_loop
+ mov ebp,DWORD [36+esp]
+ and ebp,7
+ jz NEAR L$051pw_nc_end
+ mov ecx,DWORD [esi]
+ mov DWORD [ebx],ecx
+L$043pw_tail_nc0:
+ dec ebp
+ jz NEAR L$051pw_nc_end
+ mov ecx,DWORD [4+esi]
+ mov DWORD [4+ebx],ecx
+L$044pw_tail_nc1:
+ dec ebp
+ jz NEAR L$051pw_nc_end
+ mov ecx,DWORD [8+esi]
+ mov DWORD [8+ebx],ecx
+L$045pw_tail_nc2:
+ dec ebp
+ jz NEAR L$051pw_nc_end
+ mov ecx,DWORD [12+esi]
+ mov DWORD [12+ebx],ecx
+L$046pw_tail_nc3:
+ dec ebp
+ jz NEAR L$051pw_nc_end
+ mov ecx,DWORD [16+esi]
+ mov DWORD [16+ebx],ecx
+L$047pw_tail_nc4:
+ dec ebp
+ jz NEAR L$051pw_nc_end
+ mov ecx,DWORD [20+esi]
+ mov DWORD [20+ebx],ecx
+L$048pw_tail_nc5:
+ dec ebp
+ jz NEAR L$051pw_nc_end
+ mov ecx,DWORD [24+esi]
+ mov DWORD [24+ebx],ecx
+L$049pw_tail_nc6:
+L$051pw_nc_end:
+ mov eax,0
+L$029pw_end:
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+segment .bss
+common _OPENSSL_ia32cap_P 16