diff options
Diffstat (limited to 'deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn')
4 files changed, 3591 insertions, 0 deletions
diff --git a/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/bn-586.asm b/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/bn-586.asm new file mode 100644 index 0000000000..82002b353b --- /dev/null +++ b/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/bn-586.asm @@ -0,0 +1,1515 @@ +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +$@feat.00 equ 1 +section .text code align=64 +%else +section .text code +%endif +;extern _OPENSSL_ia32cap_P +global _bn_mul_add_words +align 16 +_bn_mul_add_words: +L$_bn_mul_add_words_begin: + lea eax,[_OPENSSL_ia32cap_P] + bt DWORD [eax],26 + jnc NEAR L$000maw_non_sse2 + mov eax,DWORD [4+esp] + mov edx,DWORD [8+esp] + mov ecx,DWORD [12+esp] + movd mm0,DWORD [16+esp] + pxor mm1,mm1 + jmp NEAR L$001maw_sse2_entry +align 16 +L$002maw_sse2_unrolled: + movd mm3,DWORD [eax] + paddq mm1,mm3 + movd mm2,DWORD [edx] + pmuludq mm2,mm0 + movd mm4,DWORD [4+edx] + pmuludq mm4,mm0 + movd mm6,DWORD [8+edx] + pmuludq mm6,mm0 + movd mm7,DWORD [12+edx] + pmuludq mm7,mm0 + paddq mm1,mm2 + movd mm3,DWORD [4+eax] + paddq mm3,mm4 + movd mm5,DWORD [8+eax] + paddq mm5,mm6 + movd mm4,DWORD [12+eax] + paddq mm7,mm4 + movd DWORD [eax],mm1 + movd mm2,DWORD [16+edx] + pmuludq mm2,mm0 + psrlq mm1,32 + movd mm4,DWORD [20+edx] + pmuludq mm4,mm0 + paddq mm1,mm3 + movd mm6,DWORD [24+edx] + pmuludq mm6,mm0 + movd DWORD [4+eax],mm1 + psrlq mm1,32 + movd mm3,DWORD [28+edx] + add edx,32 + pmuludq mm3,mm0 + paddq mm1,mm5 + movd mm5,DWORD [16+eax] + paddq mm2,mm5 + movd DWORD [8+eax],mm1 + psrlq mm1,32 + paddq mm1,mm7 + movd mm5,DWORD [20+eax] + paddq mm4,mm5 + movd DWORD [12+eax],mm1 + psrlq mm1,32 + paddq mm1,mm2 + movd mm5,DWORD [24+eax] + paddq mm6,mm5 + movd DWORD [16+eax],mm1 + psrlq mm1,32 + paddq mm1,mm4 + movd mm5,DWORD [28+eax] + paddq mm3,mm5 + movd DWORD [20+eax],mm1 + psrlq mm1,32 + paddq mm1,mm6 + movd DWORD [24+eax],mm1 + psrlq mm1,32 + paddq mm1,mm3 + movd DWORD [28+eax],mm1 + lea eax,[32+eax] + psrlq mm1,32 + sub ecx,8 + jz NEAR L$003maw_sse2_exit +L$001maw_sse2_entry: + test ecx,4294967288 + jnz NEAR L$002maw_sse2_unrolled +align 4 +L$004maw_sse2_loop: + movd mm2,DWORD [edx] + movd mm3,DWORD [eax] + pmuludq mm2,mm0 + lea edx,[4+edx] + paddq mm1,mm3 + paddq mm1,mm2 + movd DWORD [eax],mm1 + sub ecx,1 + psrlq mm1,32 + lea eax,[4+eax] + jnz NEAR L$004maw_sse2_loop +L$003maw_sse2_exit: + movd eax,mm1 + emms + ret +align 16 +L$000maw_non_sse2: + push ebp + push ebx + push esi + push edi + ; + xor esi,esi + mov edi,DWORD [20+esp] + mov ecx,DWORD [28+esp] + mov ebx,DWORD [24+esp] + and ecx,4294967288 + mov ebp,DWORD [32+esp] + push ecx + jz NEAR L$005maw_finish +align 16 +L$006maw_loop: + ; Round 0 + mov eax,DWORD [ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [edi] + adc edx,0 + mov DWORD [edi],eax + mov esi,edx + ; Round 4 + mov eax,DWORD [4+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [4+edi] + adc edx,0 + mov DWORD [4+edi],eax + mov esi,edx + ; Round 8 + mov eax,DWORD [8+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [8+edi] + adc edx,0 + mov DWORD [8+edi],eax + mov esi,edx + ; Round 12 + mov eax,DWORD [12+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [12+edi] + adc edx,0 + mov DWORD [12+edi],eax + mov esi,edx + ; Round 16 + mov eax,DWORD [16+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [16+edi] + adc edx,0 + mov DWORD [16+edi],eax + mov esi,edx + ; Round 20 + mov eax,DWORD [20+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [20+edi] + adc edx,0 + mov DWORD [20+edi],eax + mov esi,edx + ; Round 24 + mov eax,DWORD [24+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [24+edi] + adc edx,0 + mov DWORD [24+edi],eax + mov esi,edx + ; Round 28 + mov eax,DWORD [28+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [28+edi] + adc edx,0 + mov DWORD [28+edi],eax + mov esi,edx + ; + sub ecx,8 + lea ebx,[32+ebx] + lea edi,[32+edi] + jnz NEAR L$006maw_loop +L$005maw_finish: + mov ecx,DWORD [32+esp] + and ecx,7 + jnz NEAR L$007maw_finish2 + jmp NEAR L$008maw_end +L$007maw_finish2: + ; Tail Round 0 + mov eax,DWORD [ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [edi] + adc edx,0 + dec ecx + mov DWORD [edi],eax + mov esi,edx + jz NEAR L$008maw_end + ; Tail Round 1 + mov eax,DWORD [4+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [4+edi] + adc edx,0 + dec ecx + mov DWORD [4+edi],eax + mov esi,edx + jz NEAR L$008maw_end + ; Tail Round 2 + mov eax,DWORD [8+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [8+edi] + adc edx,0 + dec ecx + mov DWORD [8+edi],eax + mov esi,edx + jz NEAR L$008maw_end + ; Tail Round 3 + mov eax,DWORD [12+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [12+edi] + adc edx,0 + dec ecx + mov DWORD [12+edi],eax + mov esi,edx + jz NEAR L$008maw_end + ; Tail Round 4 + mov eax,DWORD [16+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [16+edi] + adc edx,0 + dec ecx + mov DWORD [16+edi],eax + mov esi,edx + jz NEAR L$008maw_end + ; Tail Round 5 + mov eax,DWORD [20+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [20+edi] + adc edx,0 + dec ecx + mov DWORD [20+edi],eax + mov esi,edx + jz NEAR L$008maw_end + ; Tail Round 6 + mov eax,DWORD [24+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [24+edi] + adc edx,0 + mov DWORD [24+edi],eax + mov esi,edx +L$008maw_end: + mov eax,esi + pop ecx + pop edi + pop esi + pop ebx + pop ebp + ret +global _bn_mul_words +align 16 +_bn_mul_words: +L$_bn_mul_words_begin: + lea eax,[_OPENSSL_ia32cap_P] + bt DWORD [eax],26 + jnc NEAR L$009mw_non_sse2 + mov eax,DWORD [4+esp] + mov edx,DWORD [8+esp] + mov ecx,DWORD [12+esp] + movd mm0,DWORD [16+esp] + pxor mm1,mm1 +align 16 +L$010mw_sse2_loop: + movd mm2,DWORD [edx] + pmuludq mm2,mm0 + lea edx,[4+edx] + paddq mm1,mm2 + movd DWORD [eax],mm1 + sub ecx,1 + psrlq mm1,32 + lea eax,[4+eax] + jnz NEAR L$010mw_sse2_loop + movd eax,mm1 + emms + ret +align 16 +L$009mw_non_sse2: + push ebp + push ebx + push esi + push edi + ; + xor esi,esi + mov edi,DWORD [20+esp] + mov ebx,DWORD [24+esp] + mov ebp,DWORD [28+esp] + mov ecx,DWORD [32+esp] + and ebp,4294967288 + jz NEAR L$011mw_finish +L$012mw_loop: + ; Round 0 + mov eax,DWORD [ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [edi],eax + mov esi,edx + ; Round 4 + mov eax,DWORD [4+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [4+edi],eax + mov esi,edx + ; Round 8 + mov eax,DWORD [8+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [8+edi],eax + mov esi,edx + ; Round 12 + mov eax,DWORD [12+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [12+edi],eax + mov esi,edx + ; Round 16 + mov eax,DWORD [16+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [16+edi],eax + mov esi,edx + ; Round 20 + mov eax,DWORD [20+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [20+edi],eax + mov esi,edx + ; Round 24 + mov eax,DWORD [24+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [24+edi],eax + mov esi,edx + ; Round 28 + mov eax,DWORD [28+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [28+edi],eax + mov esi,edx + ; + add ebx,32 + add edi,32 + sub ebp,8 + jz NEAR L$011mw_finish + jmp NEAR L$012mw_loop +L$011mw_finish: + mov ebp,DWORD [28+esp] + and ebp,7 + jnz NEAR L$013mw_finish2 + jmp NEAR L$014mw_end +L$013mw_finish2: + ; Tail Round 0 + mov eax,DWORD [ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [edi],eax + mov esi,edx + dec ebp + jz NEAR L$014mw_end + ; Tail Round 1 + mov eax,DWORD [4+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [4+edi],eax + mov esi,edx + dec ebp + jz NEAR L$014mw_end + ; Tail Round 2 + mov eax,DWORD [8+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [8+edi],eax + mov esi,edx + dec ebp + jz NEAR L$014mw_end + ; Tail Round 3 + mov eax,DWORD [12+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [12+edi],eax + mov esi,edx + dec ebp + jz NEAR L$014mw_end + ; Tail Round 4 + mov eax,DWORD [16+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [16+edi],eax + mov esi,edx + dec ebp + jz NEAR L$014mw_end + ; Tail Round 5 + mov eax,DWORD [20+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [20+edi],eax + mov esi,edx + dec ebp + jz NEAR L$014mw_end + ; Tail Round 6 + mov eax,DWORD [24+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [24+edi],eax + mov esi,edx +L$014mw_end: + mov eax,esi + pop edi + pop esi + pop ebx + pop ebp + ret +global _bn_sqr_words +align 16 +_bn_sqr_words: +L$_bn_sqr_words_begin: + lea eax,[_OPENSSL_ia32cap_P] + bt DWORD [eax],26 + jnc NEAR L$015sqr_non_sse2 + mov eax,DWORD [4+esp] + mov edx,DWORD [8+esp] + mov ecx,DWORD [12+esp] +align 16 +L$016sqr_sse2_loop: + movd mm0,DWORD [edx] + pmuludq mm0,mm0 + lea edx,[4+edx] + movq [eax],mm0 + sub ecx,1 + lea eax,[8+eax] + jnz NEAR L$016sqr_sse2_loop + emms + ret +align 16 +L$015sqr_non_sse2: + push ebp + push ebx + push esi + push edi + ; + mov esi,DWORD [20+esp] + mov edi,DWORD [24+esp] + mov ebx,DWORD [28+esp] + and ebx,4294967288 + jz NEAR L$017sw_finish +L$018sw_loop: + ; Round 0 + mov eax,DWORD [edi] + mul eax + mov DWORD [esi],eax + mov DWORD [4+esi],edx + ; Round 4 + mov eax,DWORD [4+edi] + mul eax + mov DWORD [8+esi],eax + mov DWORD [12+esi],edx + ; Round 8 + mov eax,DWORD [8+edi] + mul eax + mov DWORD [16+esi],eax + mov DWORD [20+esi],edx + ; Round 12 + mov eax,DWORD [12+edi] + mul eax + mov DWORD [24+esi],eax + mov DWORD [28+esi],edx + ; Round 16 + mov eax,DWORD [16+edi] + mul eax + mov DWORD [32+esi],eax + mov DWORD [36+esi],edx + ; Round 20 + mov eax,DWORD [20+edi] + mul eax + mov DWORD [40+esi],eax + mov DWORD [44+esi],edx + ; Round 24 + mov eax,DWORD [24+edi] + mul eax + mov DWORD [48+esi],eax + mov DWORD [52+esi],edx + ; Round 28 + mov eax,DWORD [28+edi] + mul eax + mov DWORD [56+esi],eax + mov DWORD [60+esi],edx + ; + add edi,32 + add esi,64 + sub ebx,8 + jnz NEAR L$018sw_loop +L$017sw_finish: + mov ebx,DWORD [28+esp] + and ebx,7 + jz NEAR L$019sw_end + ; Tail Round 0 + mov eax,DWORD [edi] + mul eax + mov DWORD [esi],eax + dec ebx + mov DWORD [4+esi],edx + jz NEAR L$019sw_end + ; Tail Round 1 + mov eax,DWORD [4+edi] + mul eax + mov DWORD [8+esi],eax + dec ebx + mov DWORD [12+esi],edx + jz NEAR L$019sw_end + ; Tail Round 2 + mov eax,DWORD [8+edi] + mul eax + mov DWORD [16+esi],eax + dec ebx + mov DWORD [20+esi],edx + jz NEAR L$019sw_end + ; Tail Round 3 + mov eax,DWORD [12+edi] + mul eax + mov DWORD [24+esi],eax + dec ebx + mov DWORD [28+esi],edx + jz NEAR L$019sw_end + ; Tail Round 4 + mov eax,DWORD [16+edi] + mul eax + mov DWORD [32+esi],eax + dec ebx + mov DWORD [36+esi],edx + jz NEAR L$019sw_end + ; Tail Round 5 + mov eax,DWORD [20+edi] + mul eax + mov DWORD [40+esi],eax + dec ebx + mov DWORD [44+esi],edx + jz NEAR L$019sw_end + ; Tail Round 6 + mov eax,DWORD [24+edi] + mul eax + mov DWORD [48+esi],eax + mov DWORD [52+esi],edx +L$019sw_end: + pop edi + pop esi + pop ebx + pop ebp + ret +global _bn_div_words +align 16 +_bn_div_words: +L$_bn_div_words_begin: + mov edx,DWORD [4+esp] + mov eax,DWORD [8+esp] + mov ecx,DWORD [12+esp] + div ecx + ret +global _bn_add_words +align 16 +_bn_add_words: +L$_bn_add_words_begin: + push ebp + push ebx + push esi + push edi + ; + mov ebx,DWORD [20+esp] + mov esi,DWORD [24+esp] + mov edi,DWORD [28+esp] + mov ebp,DWORD [32+esp] + xor eax,eax + and ebp,4294967288 + jz NEAR L$020aw_finish +L$021aw_loop: + ; Round 0 + mov ecx,DWORD [esi] + mov edx,DWORD [edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD [ebx],ecx + ; Round 1 + mov ecx,DWORD [4+esi] + mov edx,DWORD [4+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD [4+ebx],ecx + ; Round 2 + mov ecx,DWORD [8+esi] + mov edx,DWORD [8+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD [8+ebx],ecx + ; Round 3 + mov ecx,DWORD [12+esi] + mov edx,DWORD [12+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD [12+ebx],ecx + ; Round 4 + mov ecx,DWORD [16+esi] + mov edx,DWORD [16+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD [16+ebx],ecx + ; Round 5 + mov ecx,DWORD [20+esi] + mov edx,DWORD [20+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD [20+ebx],ecx + ; Round 6 + mov ecx,DWORD [24+esi] + mov edx,DWORD [24+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD [24+ebx],ecx + ; Round 7 + mov ecx,DWORD [28+esi] + mov edx,DWORD [28+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD [28+ebx],ecx + ; + add esi,32 + add edi,32 + add ebx,32 + sub ebp,8 + jnz NEAR L$021aw_loop +L$020aw_finish: + mov ebp,DWORD [32+esp] + and ebp,7 + jz NEAR L$022aw_end + ; Tail Round 0 + mov ecx,DWORD [esi] + mov edx,DWORD [edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + dec ebp + mov DWORD [ebx],ecx + jz NEAR L$022aw_end + ; Tail Round 1 + mov ecx,DWORD [4+esi] + mov edx,DWORD [4+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + dec ebp + mov DWORD [4+ebx],ecx + jz NEAR L$022aw_end + ; Tail Round 2 + mov ecx,DWORD [8+esi] + mov edx,DWORD [8+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + dec ebp + mov DWORD [8+ebx],ecx + jz NEAR L$022aw_end + ; Tail Round 3 + mov ecx,DWORD [12+esi] + mov edx,DWORD [12+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + dec ebp + mov DWORD [12+ebx],ecx + jz NEAR L$022aw_end + ; Tail Round 4 + mov ecx,DWORD [16+esi] + mov edx,DWORD [16+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + dec ebp + mov DWORD [16+ebx],ecx + jz NEAR L$022aw_end + ; Tail Round 5 + mov ecx,DWORD [20+esi] + mov edx,DWORD [20+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + dec ebp + mov DWORD [20+ebx],ecx + jz NEAR L$022aw_end + ; Tail Round 6 + mov ecx,DWORD [24+esi] + mov edx,DWORD [24+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD [24+ebx],ecx +L$022aw_end: + pop edi + pop esi + pop ebx + pop ebp + ret +global _bn_sub_words +align 16 +_bn_sub_words: +L$_bn_sub_words_begin: + push ebp + push ebx + push esi + push edi + ; + mov ebx,DWORD [20+esp] + mov esi,DWORD [24+esp] + mov edi,DWORD [28+esp] + mov ebp,DWORD [32+esp] + xor eax,eax + and ebp,4294967288 + jz NEAR L$023aw_finish +L$024aw_loop: + ; Round 0 + mov ecx,DWORD [esi] + mov edx,DWORD [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [ebx],ecx + ; Round 1 + mov ecx,DWORD [4+esi] + mov edx,DWORD [4+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [4+ebx],ecx + ; Round 2 + mov ecx,DWORD [8+esi] + mov edx,DWORD [8+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [8+ebx],ecx + ; Round 3 + mov ecx,DWORD [12+esi] + mov edx,DWORD [12+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [12+ebx],ecx + ; Round 4 + mov ecx,DWORD [16+esi] + mov edx,DWORD [16+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [16+ebx],ecx + ; Round 5 + mov ecx,DWORD [20+esi] + mov edx,DWORD [20+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [20+ebx],ecx + ; Round 6 + mov ecx,DWORD [24+esi] + mov edx,DWORD [24+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [24+ebx],ecx + ; Round 7 + mov ecx,DWORD [28+esi] + mov edx,DWORD [28+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [28+ebx],ecx + ; + add esi,32 + add edi,32 + add ebx,32 + sub ebp,8 + jnz NEAR L$024aw_loop +L$023aw_finish: + mov ebp,DWORD [32+esp] + and ebp,7 + jz NEAR L$025aw_end + ; Tail Round 0 + mov ecx,DWORD [esi] + mov edx,DWORD [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD [ebx],ecx + jz NEAR L$025aw_end + ; Tail Round 1 + mov ecx,DWORD [4+esi] + mov edx,DWORD [4+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD [4+ebx],ecx + jz NEAR L$025aw_end + ; Tail Round 2 + mov ecx,DWORD [8+esi] + mov edx,DWORD [8+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD [8+ebx],ecx + jz NEAR L$025aw_end + ; Tail Round 3 + mov ecx,DWORD [12+esi] + mov edx,DWORD [12+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD [12+ebx],ecx + jz NEAR L$025aw_end + ; Tail Round 4 + mov ecx,DWORD [16+esi] + mov edx,DWORD [16+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD [16+ebx],ecx + jz NEAR L$025aw_end + ; Tail Round 5 + mov ecx,DWORD [20+esi] + mov edx,DWORD [20+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD [20+ebx],ecx + jz NEAR L$025aw_end + ; Tail Round 6 + mov ecx,DWORD [24+esi] + mov edx,DWORD [24+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [24+ebx],ecx +L$025aw_end: + pop edi + pop esi + pop ebx + pop ebp + ret +global _bn_sub_part_words +align 16 +_bn_sub_part_words: +L$_bn_sub_part_words_begin: + push ebp + push ebx + push esi + push edi + ; + mov ebx,DWORD [20+esp] + mov esi,DWORD [24+esp] + mov edi,DWORD [28+esp] + mov ebp,DWORD [32+esp] + xor eax,eax + and ebp,4294967288 + jz NEAR L$026aw_finish +L$027aw_loop: + ; Round 0 + mov ecx,DWORD [esi] + mov edx,DWORD [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [ebx],ecx + ; Round 1 + mov ecx,DWORD [4+esi] + mov edx,DWORD [4+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [4+ebx],ecx + ; Round 2 + mov ecx,DWORD [8+esi] + mov edx,DWORD [8+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [8+ebx],ecx + ; Round 3 + mov ecx,DWORD [12+esi] + mov edx,DWORD [12+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [12+ebx],ecx + ; Round 4 + mov ecx,DWORD [16+esi] + mov edx,DWORD [16+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [16+ebx],ecx + ; Round 5 + mov ecx,DWORD [20+esi] + mov edx,DWORD [20+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [20+ebx],ecx + ; Round 6 + mov ecx,DWORD [24+esi] + mov edx,DWORD [24+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [24+ebx],ecx + ; Round 7 + mov ecx,DWORD [28+esi] + mov edx,DWORD [28+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [28+ebx],ecx + ; + add esi,32 + add edi,32 + add ebx,32 + sub ebp,8 + jnz NEAR L$027aw_loop +L$026aw_finish: + mov ebp,DWORD [32+esp] + and ebp,7 + jz NEAR L$028aw_end + ; Tail Round 0 + mov ecx,DWORD [esi] + mov edx,DWORD [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [ebx],ecx + add esi,4 + add edi,4 + add ebx,4 + dec ebp + jz NEAR L$028aw_end + ; Tail Round 1 + mov ecx,DWORD [esi] + mov edx,DWORD [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [ebx],ecx + add esi,4 + add edi,4 + add ebx,4 + dec ebp + jz NEAR L$028aw_end + ; Tail Round 2 + mov ecx,DWORD [esi] + mov edx,DWORD [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [ebx],ecx + add esi,4 + add edi,4 + add ebx,4 + dec ebp + jz NEAR L$028aw_end + ; Tail Round 3 + mov ecx,DWORD [esi] + mov edx,DWORD [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [ebx],ecx + add esi,4 + add edi,4 + add ebx,4 + dec ebp + jz NEAR L$028aw_end + ; Tail Round 4 + mov ecx,DWORD [esi] + mov edx,DWORD [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [ebx],ecx + add esi,4 + add edi,4 + add ebx,4 + dec ebp + jz NEAR L$028aw_end + ; Tail Round 5 + mov ecx,DWORD [esi] + mov edx,DWORD [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [ebx],ecx + add esi,4 + add edi,4 + add ebx,4 + dec ebp + jz NEAR L$028aw_end + ; Tail Round 6 + mov ecx,DWORD [esi] + mov edx,DWORD [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [ebx],ecx + add esi,4 + add edi,4 + add ebx,4 +L$028aw_end: + cmp DWORD [36+esp],0 + je NEAR L$029pw_end + mov ebp,DWORD [36+esp] + cmp ebp,0 + je NEAR L$029pw_end + jge NEAR L$030pw_pos + ; pw_neg + mov edx,0 + sub edx,ebp + mov ebp,edx + and ebp,4294967288 + jz NEAR L$031pw_neg_finish +L$032pw_neg_loop: + ; dl<0 Round 0 + mov ecx,0 + mov edx,DWORD [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [ebx],ecx + ; dl<0 Round 1 + mov ecx,0 + mov edx,DWORD [4+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [4+ebx],ecx + ; dl<0 Round 2 + mov ecx,0 + mov edx,DWORD [8+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [8+ebx],ecx + ; dl<0 Round 3 + mov ecx,0 + mov edx,DWORD [12+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [12+ebx],ecx + ; dl<0 Round 4 + mov ecx,0 + mov edx,DWORD [16+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [16+ebx],ecx + ; dl<0 Round 5 + mov ecx,0 + mov edx,DWORD [20+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [20+ebx],ecx + ; dl<0 Round 6 + mov ecx,0 + mov edx,DWORD [24+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [24+ebx],ecx + ; dl<0 Round 7 + mov ecx,0 + mov edx,DWORD [28+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [28+ebx],ecx + ; + add edi,32 + add ebx,32 + sub ebp,8 + jnz NEAR L$032pw_neg_loop +L$031pw_neg_finish: + mov edx,DWORD [36+esp] + mov ebp,0 + sub ebp,edx + and ebp,7 + jz NEAR L$029pw_end + ; dl<0 Tail Round 0 + mov ecx,0 + mov edx,DWORD [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD [ebx],ecx + jz NEAR L$029pw_end + ; dl<0 Tail Round 1 + mov ecx,0 + mov edx,DWORD [4+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD [4+ebx],ecx + jz NEAR L$029pw_end + ; dl<0 Tail Round 2 + mov ecx,0 + mov edx,DWORD [8+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD [8+ebx],ecx + jz NEAR L$029pw_end + ; dl<0 Tail Round 3 + mov ecx,0 + mov edx,DWORD [12+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD [12+ebx],ecx + jz NEAR L$029pw_end + ; dl<0 Tail Round 4 + mov ecx,0 + mov edx,DWORD [16+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD [16+ebx],ecx + jz NEAR L$029pw_end + ; dl<0 Tail Round 5 + mov ecx,0 + mov edx,DWORD [20+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD [20+ebx],ecx + jz NEAR L$029pw_end + ; dl<0 Tail Round 6 + mov ecx,0 + mov edx,DWORD [24+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [24+ebx],ecx + jmp NEAR L$029pw_end +L$030pw_pos: + and ebp,4294967288 + jz NEAR L$033pw_pos_finish +L$034pw_pos_loop: + ; dl>0 Round 0 + mov ecx,DWORD [esi] + sub ecx,eax + mov DWORD [ebx],ecx + jnc NEAR L$035pw_nc0 + ; dl>0 Round 1 + mov ecx,DWORD [4+esi] + sub ecx,eax + mov DWORD [4+ebx],ecx + jnc NEAR L$036pw_nc1 + ; dl>0 Round 2 + mov ecx,DWORD [8+esi] + sub ecx,eax + mov DWORD [8+ebx],ecx + jnc NEAR L$037pw_nc2 + ; dl>0 Round 3 + mov ecx,DWORD [12+esi] + sub ecx,eax + mov DWORD [12+ebx],ecx + jnc NEAR L$038pw_nc3 + ; dl>0 Round 4 + mov ecx,DWORD [16+esi] + sub ecx,eax + mov DWORD [16+ebx],ecx + jnc NEAR L$039pw_nc4 + ; dl>0 Round 5 + mov ecx,DWORD [20+esi] + sub ecx,eax + mov DWORD [20+ebx],ecx + jnc NEAR L$040pw_nc5 + ; dl>0 Round 6 + mov ecx,DWORD [24+esi] + sub ecx,eax + mov DWORD [24+ebx],ecx + jnc NEAR L$041pw_nc6 + ; dl>0 Round 7 + mov ecx,DWORD [28+esi] + sub ecx,eax + mov DWORD [28+ebx],ecx + jnc NEAR L$042pw_nc7 + ; + add esi,32 + add ebx,32 + sub ebp,8 + jnz NEAR L$034pw_pos_loop +L$033pw_pos_finish: + mov ebp,DWORD [36+esp] + and ebp,7 + jz NEAR L$029pw_end + ; dl>0 Tail Round 0 + mov ecx,DWORD [esi] + sub ecx,eax + mov DWORD [ebx],ecx + jnc NEAR L$043pw_tail_nc0 + dec ebp + jz NEAR L$029pw_end + ; dl>0 Tail Round 1 + mov ecx,DWORD [4+esi] + sub ecx,eax + mov DWORD [4+ebx],ecx + jnc NEAR L$044pw_tail_nc1 + dec ebp + jz NEAR L$029pw_end + ; dl>0 Tail Round 2 + mov ecx,DWORD [8+esi] + sub ecx,eax + mov DWORD [8+ebx],ecx + jnc NEAR L$045pw_tail_nc2 + dec ebp + jz NEAR L$029pw_end + ; dl>0 Tail Round 3 + mov ecx,DWORD [12+esi] + sub ecx,eax + mov DWORD [12+ebx],ecx + jnc NEAR L$046pw_tail_nc3 + dec ebp + jz NEAR L$029pw_end + ; dl>0 Tail Round 4 + mov ecx,DWORD [16+esi] + sub ecx,eax + mov DWORD [16+ebx],ecx + jnc NEAR L$047pw_tail_nc4 + dec ebp + jz NEAR L$029pw_end + ; dl>0 Tail Round 5 + mov ecx,DWORD [20+esi] + sub ecx,eax + mov DWORD [20+ebx],ecx + jnc NEAR L$048pw_tail_nc5 + dec ebp + jz NEAR L$029pw_end + ; dl>0 Tail Round 6 + mov ecx,DWORD [24+esi] + sub ecx,eax + mov DWORD [24+ebx],ecx + jnc NEAR L$049pw_tail_nc6 + mov eax,1 + jmp NEAR L$029pw_end +L$050pw_nc_loop: + mov ecx,DWORD [esi] + mov DWORD [ebx],ecx +L$035pw_nc0: + mov ecx,DWORD [4+esi] + mov DWORD [4+ebx],ecx +L$036pw_nc1: + mov ecx,DWORD [8+esi] + mov DWORD [8+ebx],ecx +L$037pw_nc2: + mov ecx,DWORD [12+esi] + mov DWORD [12+ebx],ecx +L$038pw_nc3: + mov ecx,DWORD [16+esi] + mov DWORD [16+ebx],ecx +L$039pw_nc4: + mov ecx,DWORD [20+esi] + mov DWORD [20+ebx],ecx +L$040pw_nc5: + mov ecx,DWORD [24+esi] + mov DWORD [24+ebx],ecx +L$041pw_nc6: + mov ecx,DWORD [28+esi] + mov DWORD [28+ebx],ecx +L$042pw_nc7: + ; + add esi,32 + add ebx,32 + sub ebp,8 + jnz NEAR L$050pw_nc_loop + mov ebp,DWORD [36+esp] + and ebp,7 + jz NEAR L$051pw_nc_end + mov ecx,DWORD [esi] + mov DWORD [ebx],ecx +L$043pw_tail_nc0: + dec ebp + jz NEAR L$051pw_nc_end + mov ecx,DWORD [4+esi] + mov DWORD [4+ebx],ecx +L$044pw_tail_nc1: + dec ebp + jz NEAR L$051pw_nc_end + mov ecx,DWORD [8+esi] + mov DWORD [8+ebx],ecx +L$045pw_tail_nc2: + dec ebp + jz NEAR L$051pw_nc_end + mov ecx,DWORD [12+esi] + mov DWORD [12+ebx],ecx +L$046pw_tail_nc3: + dec ebp + jz NEAR L$051pw_nc_end + mov ecx,DWORD [16+esi] + mov DWORD [16+ebx],ecx +L$047pw_tail_nc4: + dec ebp + jz NEAR L$051pw_nc_end + mov ecx,DWORD [20+esi] + mov DWORD [20+ebx],ecx +L$048pw_tail_nc5: + dec ebp + jz NEAR L$051pw_nc_end + mov ecx,DWORD [24+esi] + mov DWORD [24+ebx],ecx +L$049pw_tail_nc6: +L$051pw_nc_end: + mov eax,0 +L$029pw_end: + pop edi + pop esi + pop ebx + pop ebp + ret +segment .bss +common _OPENSSL_ia32cap_P 16 diff --git a/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/co-586.asm b/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/co-586.asm new file mode 100644 index 0000000000..d57f0b5ffe --- /dev/null +++ b/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/co-586.asm @@ -0,0 +1,1252 @@ +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +$@feat.00 equ 1 +section .text code align=64 +%else +section .text code +%endif +global _bn_mul_comba8 +align 16 +_bn_mul_comba8: +L$_bn_mul_comba8_begin: + push esi + mov esi,DWORD [12+esp] + push edi + mov edi,DWORD [20+esp] + push ebp + push ebx + xor ebx,ebx + mov eax,DWORD [esi] + xor ecx,ecx + mov edx,DWORD [edi] + ; ################## Calculate word 0 + xor ebp,ebp + ; mul a[0]*b[0] + mul edx + add ebx,eax + mov eax,DWORD [20+esp] + adc ecx,edx + mov edx,DWORD [edi] + adc ebp,0 + mov DWORD [eax],ebx + mov eax,DWORD [4+esi] + ; saved r[0] + ; ################## Calculate word 1 + xor ebx,ebx + ; mul a[1]*b[0] + mul edx + add ecx,eax + mov eax,DWORD [esi] + adc ebp,edx + mov edx,DWORD [4+edi] + adc ebx,0 + ; mul a[0]*b[1] + mul edx + add ecx,eax + mov eax,DWORD [20+esp] + adc ebp,edx + mov edx,DWORD [edi] + adc ebx,0 + mov DWORD [4+eax],ecx + mov eax,DWORD [8+esi] + ; saved r[1] + ; ################## Calculate word 2 + xor ecx,ecx + ; mul a[2]*b[0] + mul edx + add ebp,eax + mov eax,DWORD [4+esi] + adc ebx,edx + mov edx,DWORD [4+edi] + adc ecx,0 + ; mul a[1]*b[1] + mul edx + add ebp,eax + mov eax,DWORD [esi] + adc ebx,edx + mov edx,DWORD [8+edi] + adc ecx,0 + ; mul a[0]*b[2] + mul edx + add ebp,eax + mov eax,DWORD [20+esp] + adc ebx,edx + mov edx,DWORD [edi] + adc ecx,0 + mov DWORD [8+eax],ebp + mov eax,DWORD [12+esi] + ; saved r[2] + ; ################## Calculate word 3 + xor ebp,ebp + ; mul a[3]*b[0] + mul edx + add ebx,eax + mov eax,DWORD [8+esi] + adc ecx,edx + mov edx,DWORD [4+edi] + adc ebp,0 + ; mul a[2]*b[1] + mul edx + add ebx,eax + mov eax,DWORD [4+esi] + adc ecx,edx + mov edx,DWORD [8+edi] + adc ebp,0 + ; mul a[1]*b[2] + mul edx + add ebx,eax + mov eax,DWORD [esi] + adc ecx,edx + mov edx,DWORD [12+edi] + adc ebp,0 + ; mul a[0]*b[3] + mul edx + add ebx,eax + mov eax,DWORD [20+esp] + adc ecx,edx + mov edx,DWORD [edi] + adc ebp,0 + mov DWORD [12+eax],ebx + mov eax,DWORD [16+esi] + ; saved r[3] + ; ################## Calculate word 4 + xor ebx,ebx + ; mul a[4]*b[0] + mul edx + add ecx,eax + mov eax,DWORD [12+esi] + adc ebp,edx + mov edx,DWORD [4+edi] + adc ebx,0 + ; mul a[3]*b[1] + mul edx + add ecx,eax + mov eax,DWORD [8+esi] + adc ebp,edx + mov edx,DWORD [8+edi] + adc ebx,0 + ; mul a[2]*b[2] + mul edx + add ecx,eax + mov eax,DWORD [4+esi] + adc ebp,edx + mov edx,DWORD [12+edi] + adc ebx,0 + ; mul a[1]*b[3] + mul edx + add ecx,eax + mov eax,DWORD [esi] + adc ebp,edx + mov edx,DWORD [16+edi] + adc ebx,0 + ; mul a[0]*b[4] + mul edx + add ecx,eax + mov eax,DWORD [20+esp] + adc ebp,edx + mov edx,DWORD [edi] + adc ebx,0 + mov DWORD [16+eax],ecx + mov eax,DWORD [20+esi] + ; saved r[4] + ; ################## Calculate word 5 + xor ecx,ecx + ; mul a[5]*b[0] + mul edx + add ebp,eax + mov eax,DWORD [16+esi] + adc ebx,edx + mov edx,DWORD [4+edi] + adc ecx,0 + ; mul a[4]*b[1] + mul edx + add ebp,eax + mov eax,DWORD [12+esi] + adc ebx,edx + mov edx,DWORD [8+edi] + adc ecx,0 + ; mul a[3]*b[2] + mul edx + add ebp,eax + mov eax,DWORD [8+esi] + adc ebx,edx + mov edx,DWORD [12+edi] + adc ecx,0 + ; mul a[2]*b[3] + mul edx + add ebp,eax + mov eax,DWORD [4+esi] + adc ebx,edx + mov edx,DWORD [16+edi] + adc ecx,0 + ; mul a[1]*b[4] + mul edx + add ebp,eax + mov eax,DWORD [esi] + adc ebx,edx + mov edx,DWORD [20+edi] + adc ecx,0 + ; mul a[0]*b[5] + mul edx + add ebp,eax + mov eax,DWORD [20+esp] + adc ebx,edx + mov edx,DWORD [edi] + adc ecx,0 + mov DWORD [20+eax],ebp + mov eax,DWORD [24+esi] + ; saved r[5] + ; ################## Calculate word 6 + xor ebp,ebp + ; mul a[6]*b[0] + mul edx + add ebx,eax + mov eax,DWORD [20+esi] + adc ecx,edx + mov edx,DWORD [4+edi] + adc ebp,0 + ; mul a[5]*b[1] + mul edx + add ebx,eax + mov eax,DWORD [16+esi] + adc ecx,edx + mov edx,DWORD [8+edi] + adc ebp,0 + ; mul a[4]*b[2] + mul edx + add ebx,eax + mov eax,DWORD [12+esi] + adc ecx,edx + mov edx,DWORD [12+edi] + adc ebp,0 + ; mul a[3]*b[3] + mul edx + add ebx,eax + mov eax,DWORD [8+esi] + adc ecx,edx + mov edx,DWORD [16+edi] + adc ebp,0 + ; mul a[2]*b[4] + mul edx + add ebx,eax + mov eax,DWORD [4+esi] + adc ecx,edx + mov edx,DWORD [20+edi] + adc ebp,0 + ; mul a[1]*b[5] + mul edx + add ebx,eax + mov eax,DWORD [esi] + adc ecx,edx + mov edx,DWORD [24+edi] + adc ebp,0 + ; mul a[0]*b[6] + mul edx + add ebx,eax + mov eax,DWORD [20+esp] + adc ecx,edx + mov edx,DWORD [edi] + adc ebp,0 + mov DWORD [24+eax],ebx + mov eax,DWORD [28+esi] + ; saved r[6] + ; ################## Calculate word 7 + xor ebx,ebx + ; mul a[7]*b[0] + mul edx + add ecx,eax + mov eax,DWORD [24+esi] + adc ebp,edx + mov edx,DWORD [4+edi] + adc ebx,0 + ; mul a[6]*b[1] + mul edx + add ecx,eax + mov eax,DWORD [20+esi] + adc ebp,edx + mov edx,DWORD [8+edi] + adc ebx,0 + ; mul a[5]*b[2] + mul edx + add ecx,eax + mov eax,DWORD [16+esi] + adc ebp,edx + mov edx,DWORD [12+edi] + adc ebx,0 + ; mul a[4]*b[3] + mul edx + add ecx,eax + mov eax,DWORD [12+esi] + adc ebp,edx + mov edx,DWORD [16+edi] + adc ebx,0 + ; mul a[3]*b[4] + mul edx + add ecx,eax + mov eax,DWORD [8+esi] + adc ebp,edx + mov edx,DWORD [20+edi] + adc ebx,0 + ; mul a[2]*b[5] + mul edx + add ecx,eax + mov eax,DWORD [4+esi] + adc ebp,edx + mov edx,DWORD [24+edi] + adc ebx,0 + ; mul a[1]*b[6] + mul edx + add ecx,eax + mov eax,DWORD [esi] + adc ebp,edx + mov edx,DWORD [28+edi] + adc ebx,0 + ; mul a[0]*b[7] + mul edx + add ecx,eax + mov eax,DWORD [20+esp] + adc ebp,edx + mov edx,DWORD [4+edi] + adc ebx,0 + mov DWORD [28+eax],ecx + mov eax,DWORD [28+esi] + ; saved r[7] + ; ################## Calculate word 8 + xor ecx,ecx + ; mul a[7]*b[1] + mul edx + add ebp,eax + mov eax,DWORD [24+esi] + adc ebx,edx + mov edx,DWORD [8+edi] + adc ecx,0 + ; mul a[6]*b[2] + mul edx + add ebp,eax + mov eax,DWORD [20+esi] + adc ebx,edx + mov edx,DWORD [12+edi] + adc ecx,0 + ; mul a[5]*b[3] + mul edx + add ebp,eax + mov eax,DWORD [16+esi] + adc ebx,edx + mov edx,DWORD [16+edi] + adc ecx,0 + ; mul a[4]*b[4] + mul edx + add ebp,eax + mov eax,DWORD [12+esi] + adc ebx,edx + mov edx,DWORD [20+edi] + adc ecx,0 + ; mul a[3]*b[5] + mul edx + add ebp,eax + mov eax,DWORD [8+esi] + adc ebx,edx + mov edx,DWORD [24+edi] + adc ecx,0 + ; mul a[2]*b[6] + mul edx + add ebp,eax + mov eax,DWORD [4+esi] + adc ebx,edx + mov edx,DWORD [28+edi] + adc ecx,0 + ; mul a[1]*b[7] + mul edx + add ebp,eax + mov eax,DWORD [20+esp] + adc ebx,edx + mov edx,DWORD [8+edi] + adc ecx,0 + mov DWORD [32+eax],ebp + mov eax,DWORD [28+esi] + ; saved r[8] + ; ################## Calculate word 9 + xor ebp,ebp + ; mul a[7]*b[2] + mul edx + add ebx,eax + mov eax,DWORD [24+esi] + adc ecx,edx + mov edx,DWORD [12+edi] + adc ebp,0 + ; mul a[6]*b[3] + mul edx + add ebx,eax + mov eax,DWORD [20+esi] + adc ecx,edx + mov edx,DWORD [16+edi] + adc ebp,0 + ; mul a[5]*b[4] + mul edx + add ebx,eax + mov eax,DWORD [16+esi] + adc ecx,edx + mov edx,DWORD [20+edi] + adc ebp,0 + ; mul a[4]*b[5] + mul edx + add ebx,eax + mov eax,DWORD [12+esi] + adc ecx,edx + mov edx,DWORD [24+edi] + adc ebp,0 + ; mul a[3]*b[6] + mul edx + add ebx,eax + mov eax,DWORD [8+esi] + adc ecx,edx + mov edx,DWORD [28+edi] + adc ebp,0 + ; mul a[2]*b[7] + mul edx + add ebx,eax + mov eax,DWORD [20+esp] + adc ecx,edx + mov edx,DWORD [12+edi] + adc ebp,0 + mov DWORD [36+eax],ebx + mov eax,DWORD [28+esi] + ; saved r[9] + ; ################## Calculate word 10 + xor ebx,ebx + ; mul a[7]*b[3] + mul edx + add ecx,eax + mov eax,DWORD [24+esi] + adc ebp,edx + mov edx,DWORD [16+edi] + adc ebx,0 + ; mul a[6]*b[4] + mul edx + add ecx,eax + mov eax,DWORD [20+esi] + adc ebp,edx + mov edx,DWORD [20+edi] + adc ebx,0 + ; mul a[5]*b[5] + mul edx + add ecx,eax + mov eax,DWORD [16+esi] + adc ebp,edx + mov edx,DWORD [24+edi] + adc ebx,0 + ; mul a[4]*b[6] + mul edx + add ecx,eax + mov eax,DWORD [12+esi] + adc ebp,edx + mov edx,DWORD [28+edi] + adc ebx,0 + ; mul a[3]*b[7] + mul edx + add ecx,eax + mov eax,DWORD [20+esp] + adc ebp,edx + mov edx,DWORD [16+edi] + adc ebx,0 + mov DWORD [40+eax],ecx + mov eax,DWORD [28+esi] + ; saved r[10] + ; ################## Calculate word 11 + xor ecx,ecx + ; mul a[7]*b[4] + mul edx + add ebp,eax + mov eax,DWORD [24+esi] + adc ebx,edx + mov edx,DWORD [20+edi] + adc ecx,0 + ; mul a[6]*b[5] + mul edx + add ebp,eax + mov eax,DWORD [20+esi] + adc ebx,edx + mov edx,DWORD [24+edi] + adc ecx,0 + ; mul a[5]*b[6] + mul edx + add ebp,eax + mov eax,DWORD [16+esi] + adc ebx,edx + mov edx,DWORD [28+edi] + adc ecx,0 + ; mul a[4]*b[7] + mul edx + add ebp,eax + mov eax,DWORD [20+esp] + adc ebx,edx + mov edx,DWORD [20+edi] + adc ecx,0 + mov DWORD [44+eax],ebp + mov eax,DWORD [28+esi] + ; saved r[11] + ; ################## Calculate word 12 + xor ebp,ebp + ; mul a[7]*b[5] + mul edx + add ebx,eax + mov eax,DWORD [24+esi] + adc ecx,edx + mov edx,DWORD [24+edi] + adc ebp,0 + ; mul a[6]*b[6] + mul edx + add ebx,eax + mov eax,DWORD [20+esi] + adc ecx,edx + mov edx,DWORD [28+edi] + adc ebp,0 + ; mul a[5]*b[7] + mul edx + add ebx,eax + mov eax,DWORD [20+esp] + adc ecx,edx + mov edx,DWORD [24+edi] + adc ebp,0 + mov DWORD [48+eax],ebx + mov eax,DWORD [28+esi] + ; saved r[12] + ; ################## Calculate word 13 + xor ebx,ebx + ; mul a[7]*b[6] + mul edx + add ecx,eax + mov eax,DWORD [24+esi] + adc ebp,edx + mov edx,DWORD [28+edi] + adc ebx,0 + ; mul a[6]*b[7] + mul edx + add ecx,eax + mov eax,DWORD [20+esp] + adc ebp,edx + mov edx,DWORD [28+edi] + adc ebx,0 + mov DWORD [52+eax],ecx + mov eax,DWORD [28+esi] + ; saved r[13] + ; ################## Calculate word 14 + xor ecx,ecx + ; mul a[7]*b[7] + mul edx + add ebp,eax + mov eax,DWORD [20+esp] + adc ebx,edx + adc ecx,0 + mov DWORD [56+eax],ebp + ; saved r[14] + ; save r[15] + mov DWORD [60+eax],ebx + pop ebx + pop ebp + pop edi + pop esi + ret +global _bn_mul_comba4 +align 16 +_bn_mul_comba4: +L$_bn_mul_comba4_begin: + push esi + mov esi,DWORD [12+esp] + push edi + mov edi,DWORD [20+esp] + push ebp + push ebx + xor ebx,ebx + mov eax,DWORD [esi] + xor ecx,ecx + mov edx,DWORD [edi] + ; ################## Calculate word 0 + xor ebp,ebp + ; mul a[0]*b[0] + mul edx + add ebx,eax + mov eax,DWORD [20+esp] + adc ecx,edx + mov edx,DWORD [edi] + adc ebp,0 + mov DWORD [eax],ebx + mov eax,DWORD [4+esi] + ; saved r[0] + ; ################## Calculate word 1 + xor ebx,ebx + ; mul a[1]*b[0] + mul edx + add ecx,eax + mov eax,DWORD [esi] + adc ebp,edx + mov edx,DWORD [4+edi] + adc ebx,0 + ; mul a[0]*b[1] + mul edx + add ecx,eax + mov eax,DWORD [20+esp] + adc ebp,edx + mov edx,DWORD [edi] + adc ebx,0 + mov DWORD [4+eax],ecx + mov eax,DWORD [8+esi] + ; saved r[1] + ; ################## Calculate word 2 + xor ecx,ecx + ; mul a[2]*b[0] + mul edx + add ebp,eax + mov eax,DWORD [4+esi] + adc ebx,edx + mov edx,DWORD [4+edi] + adc ecx,0 + ; mul a[1]*b[1] + mul edx + add ebp,eax + mov eax,DWORD [esi] + adc ebx,edx + mov edx,DWORD [8+edi] + adc ecx,0 + ; mul a[0]*b[2] + mul edx + add ebp,eax + mov eax,DWORD [20+esp] + adc ebx,edx + mov edx,DWORD [edi] + adc ecx,0 + mov DWORD [8+eax],ebp + mov eax,DWORD [12+esi] + ; saved r[2] + ; ################## Calculate word 3 + xor ebp,ebp + ; mul a[3]*b[0] + mul edx + add ebx,eax + mov eax,DWORD [8+esi] + adc ecx,edx + mov edx,DWORD [4+edi] + adc ebp,0 + ; mul a[2]*b[1] + mul edx + add ebx,eax + mov eax,DWORD [4+esi] + adc ecx,edx + mov edx,DWORD [8+edi] + adc ebp,0 + ; mul a[1]*b[2] + mul edx + add ebx,eax + mov eax,DWORD [esi] + adc ecx,edx + mov edx,DWORD [12+edi] + adc ebp,0 + ; mul a[0]*b[3] + mul edx + add ebx,eax + mov eax,DWORD [20+esp] + adc ecx,edx + mov edx,DWORD [4+edi] + adc ebp,0 + mov DWORD [12+eax],ebx + mov eax,DWORD [12+esi] + ; saved r[3] + ; ################## Calculate word 4 + xor ebx,ebx + ; mul a[3]*b[1] + mul edx + add ecx,eax + mov eax,DWORD [8+esi] + adc ebp,edx + mov edx,DWORD [8+edi] + adc ebx,0 + ; mul a[2]*b[2] + mul edx + add ecx,eax + mov eax,DWORD [4+esi] + adc ebp,edx + mov edx,DWORD [12+edi] + adc ebx,0 + ; mul a[1]*b[3] + mul edx + add ecx,eax + mov eax,DWORD [20+esp] + adc ebp,edx + mov edx,DWORD [8+edi] + adc ebx,0 + mov DWORD [16+eax],ecx + mov eax,DWORD [12+esi] + ; saved r[4] + ; ################## Calculate word 5 + xor ecx,ecx + ; mul a[3]*b[2] + mul edx + add ebp,eax + mov eax,DWORD [8+esi] + adc ebx,edx + mov edx,DWORD [12+edi] + adc ecx,0 + ; mul a[2]*b[3] + mul edx + add ebp,eax + mov eax,DWORD [20+esp] + adc ebx,edx + mov edx,DWORD [12+edi] + adc ecx,0 + mov DWORD [20+eax],ebp + mov eax,DWORD [12+esi] + ; saved r[5] + ; ################## Calculate word 6 + xor ebp,ebp + ; mul a[3]*b[3] + mul edx + add ebx,eax + mov eax,DWORD [20+esp] + adc ecx,edx + adc ebp,0 + mov DWORD [24+eax],ebx + ; saved r[6] + ; save r[7] + mov DWORD [28+eax],ecx + pop ebx + pop ebp + pop edi + pop esi + ret +global _bn_sqr_comba8 +align 16 +_bn_sqr_comba8: +L$_bn_sqr_comba8_begin: + push esi + push edi + push ebp + push ebx + mov edi,DWORD [20+esp] + mov esi,DWORD [24+esp] + xor ebx,ebx + xor ecx,ecx + mov eax,DWORD [esi] + ; ############### Calculate word 0 + xor ebp,ebp + ; sqr a[0]*a[0] + mul eax + add ebx,eax + adc ecx,edx + mov edx,DWORD [esi] + adc ebp,0 + mov DWORD [edi],ebx + mov eax,DWORD [4+esi] + ; saved r[0] + ; ############### Calculate word 1 + xor ebx,ebx + ; sqr a[1]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [8+esi] + adc ebx,0 + mov DWORD [4+edi],ecx + mov edx,DWORD [esi] + ; saved r[1] + ; ############### Calculate word 2 + xor ecx,ecx + ; sqr a[2]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [4+esi] + adc ecx,0 + ; sqr a[1]*a[1] + mul eax + add ebp,eax + adc ebx,edx + mov edx,DWORD [esi] + adc ecx,0 + mov DWORD [8+edi],ebp + mov eax,DWORD [12+esi] + ; saved r[2] + ; ############### Calculate word 3 + xor ebp,ebp + ; sqr a[3]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [8+esi] + adc ebp,0 + mov edx,DWORD [4+esi] + ; sqr a[2]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [16+esi] + adc ebp,0 + mov DWORD [12+edi],ebx + mov edx,DWORD [esi] + ; saved r[3] + ; ############### Calculate word 4 + xor ebx,ebx + ; sqr a[4]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [12+esi] + adc ebx,0 + mov edx,DWORD [4+esi] + ; sqr a[3]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [8+esi] + adc ebx,0 + ; sqr a[2]*a[2] + mul eax + add ecx,eax + adc ebp,edx + mov edx,DWORD [esi] + adc ebx,0 + mov DWORD [16+edi],ecx + mov eax,DWORD [20+esi] + ; saved r[4] + ; ############### Calculate word 5 + xor ecx,ecx + ; sqr a[5]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [16+esi] + adc ecx,0 + mov edx,DWORD [4+esi] + ; sqr a[4]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [12+esi] + adc ecx,0 + mov edx,DWORD [8+esi] + ; sqr a[3]*a[2] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [24+esi] + adc ecx,0 + mov DWORD [20+edi],ebp + mov edx,DWORD [esi] + ; saved r[5] + ; ############### Calculate word 6 + xor ebp,ebp + ; sqr a[6]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [20+esi] + adc ebp,0 + mov edx,DWORD [4+esi] + ; sqr a[5]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [16+esi] + adc ebp,0 + mov edx,DWORD [8+esi] + ; sqr a[4]*a[2] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [12+esi] + adc ebp,0 + ; sqr a[3]*a[3] + mul eax + add ebx,eax + adc ecx,edx + mov edx,DWORD [esi] + adc ebp,0 + mov DWORD [24+edi],ebx + mov eax,DWORD [28+esi] + ; saved r[6] + ; ############### Calculate word 7 + xor ebx,ebx + ; sqr a[7]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [24+esi] + adc ebx,0 + mov edx,DWORD [4+esi] + ; sqr a[6]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [20+esi] + adc ebx,0 + mov edx,DWORD [8+esi] + ; sqr a[5]*a[2] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [16+esi] + adc ebx,0 + mov edx,DWORD [12+esi] + ; sqr a[4]*a[3] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [28+esi] + adc ebx,0 + mov DWORD [28+edi],ecx + mov edx,DWORD [4+esi] + ; saved r[7] + ; ############### Calculate word 8 + xor ecx,ecx + ; sqr a[7]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [24+esi] + adc ecx,0 + mov edx,DWORD [8+esi] + ; sqr a[6]*a[2] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [20+esi] + adc ecx,0 + mov edx,DWORD [12+esi] + ; sqr a[5]*a[3] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [16+esi] + adc ecx,0 + ; sqr a[4]*a[4] + mul eax + add ebp,eax + adc ebx,edx + mov edx,DWORD [8+esi] + adc ecx,0 + mov DWORD [32+edi],ebp + mov eax,DWORD [28+esi] + ; saved r[8] + ; ############### Calculate word 9 + xor ebp,ebp + ; sqr a[7]*a[2] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [24+esi] + adc ebp,0 + mov edx,DWORD [12+esi] + ; sqr a[6]*a[3] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [20+esi] + adc ebp,0 + mov edx,DWORD [16+esi] + ; sqr a[5]*a[4] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [28+esi] + adc ebp,0 + mov DWORD [36+edi],ebx + mov edx,DWORD [12+esi] + ; saved r[9] + ; ############### Calculate word 10 + xor ebx,ebx + ; sqr a[7]*a[3] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [24+esi] + adc ebx,0 + mov edx,DWORD [16+esi] + ; sqr a[6]*a[4] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [20+esi] + adc ebx,0 + ; sqr a[5]*a[5] + mul eax + add ecx,eax + adc ebp,edx + mov edx,DWORD [16+esi] + adc ebx,0 + mov DWORD [40+edi],ecx + mov eax,DWORD [28+esi] + ; saved r[10] + ; ############### Calculate word 11 + xor ecx,ecx + ; sqr a[7]*a[4] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [24+esi] + adc ecx,0 + mov edx,DWORD [20+esi] + ; sqr a[6]*a[5] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [28+esi] + adc ecx,0 + mov DWORD [44+edi],ebp + mov edx,DWORD [20+esi] + ; saved r[11] + ; ############### Calculate word 12 + xor ebp,ebp + ; sqr a[7]*a[5] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [24+esi] + adc ebp,0 + ; sqr a[6]*a[6] + mul eax + add ebx,eax + adc ecx,edx + mov edx,DWORD [24+esi] + adc ebp,0 + mov DWORD [48+edi],ebx + mov eax,DWORD [28+esi] + ; saved r[12] + ; ############### Calculate word 13 + xor ebx,ebx + ; sqr a[7]*a[6] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [28+esi] + adc ebx,0 + mov DWORD [52+edi],ecx + ; saved r[13] + ; ############### Calculate word 14 + xor ecx,ecx + ; sqr a[7]*a[7] + mul eax + add ebp,eax + adc ebx,edx + adc ecx,0 + mov DWORD [56+edi],ebp + ; saved r[14] + mov DWORD [60+edi],ebx + pop ebx + pop ebp + pop edi + pop esi + ret +global _bn_sqr_comba4 +align 16 +_bn_sqr_comba4: +L$_bn_sqr_comba4_begin: + push esi + push edi + push ebp + push ebx + mov edi,DWORD [20+esp] + mov esi,DWORD [24+esp] + xor ebx,ebx + xor ecx,ecx + mov eax,DWORD [esi] + ; ############### Calculate word 0 + xor ebp,ebp + ; sqr a[0]*a[0] + mul eax + add ebx,eax + adc ecx,edx + mov edx,DWORD [esi] + adc ebp,0 + mov DWORD [edi],ebx + mov eax,DWORD [4+esi] + ; saved r[0] + ; ############### Calculate word 1 + xor ebx,ebx + ; sqr a[1]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [8+esi] + adc ebx,0 + mov DWORD [4+edi],ecx + mov edx,DWORD [esi] + ; saved r[1] + ; ############### Calculate word 2 + xor ecx,ecx + ; sqr a[2]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [4+esi] + adc ecx,0 + ; sqr a[1]*a[1] + mul eax + add ebp,eax + adc ebx,edx + mov edx,DWORD [esi] + adc ecx,0 + mov DWORD [8+edi],ebp + mov eax,DWORD [12+esi] + ; saved r[2] + ; ############### Calculate word 3 + xor ebp,ebp + ; sqr a[3]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [8+esi] + adc ebp,0 + mov edx,DWORD [4+esi] + ; sqr a[2]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [12+esi] + adc ebp,0 + mov DWORD [12+edi],ebx + mov edx,DWORD [4+esi] + ; saved r[3] + ; ############### Calculate word 4 + xor ebx,ebx + ; sqr a[3]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [8+esi] + adc ebx,0 + ; sqr a[2]*a[2] + mul eax + add ecx,eax + adc ebp,edx + mov edx,DWORD [8+esi] + adc ebx,0 + mov DWORD [16+edi],ecx + mov eax,DWORD [12+esi] + ; saved r[4] + ; ############### Calculate word 5 + xor ecx,ecx + ; sqr a[3]*a[2] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [12+esi] + adc ecx,0 + mov DWORD [20+edi],ebp + ; saved r[5] + ; ############### Calculate word 6 + xor ebp,ebp + ; sqr a[3]*a[3] + mul eax + add ebx,eax + adc ecx,edx + adc ebp,0 + mov DWORD [24+edi],ebx + ; saved r[6] + mov DWORD [28+edi],ecx + pop ebx + pop ebp + pop edi + pop esi + ret diff --git a/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/x86-gf2m.asm b/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/x86-gf2m.asm new file mode 100644 index 0000000000..709f4a9e50 --- /dev/null +++ b/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/x86-gf2m.asm @@ -0,0 +1,345 @@ +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +$@feat.00 equ 1 +section .text code align=64 +%else +section .text code +%endif +;extern _OPENSSL_ia32cap_P +align 16 +__mul_1x1_mmx: + sub esp,36 + mov ecx,eax + lea edx,[eax*1+eax] + and ecx,1073741823 + lea ebp,[edx*1+edx] + mov DWORD [esp],0 + and edx,2147483647 + movd mm2,eax + movd mm3,ebx + mov DWORD [4+esp],ecx + xor ecx,edx + pxor mm5,mm5 + pxor mm4,mm4 + mov DWORD [8+esp],edx + xor edx,ebp + mov DWORD [12+esp],ecx + pcmpgtd mm5,mm2 + paddd mm2,mm2 + xor ecx,edx + mov DWORD [16+esp],ebp + xor ebp,edx + pand mm5,mm3 + pcmpgtd mm4,mm2 + mov DWORD [20+esp],ecx + xor ebp,ecx + psllq mm5,31 + pand mm4,mm3 + mov DWORD [24+esp],edx + mov esi,7 + mov DWORD [28+esp],ebp + mov ebp,esi + and esi,ebx + shr ebx,3 + mov edi,ebp + psllq mm4,30 + and edi,ebx + shr ebx,3 + movd mm0,DWORD [esi*4+esp] + mov esi,ebp + and esi,ebx + shr ebx,3 + movd mm2,DWORD [edi*4+esp] + mov edi,ebp + psllq mm2,3 + and edi,ebx + shr ebx,3 + pxor mm0,mm2 + movd mm1,DWORD [esi*4+esp] + mov esi,ebp + psllq mm1,6 + and esi,ebx + shr ebx,3 + pxor mm0,mm1 + movd mm2,DWORD [edi*4+esp] + mov edi,ebp + psllq mm2,9 + and edi,ebx + shr ebx,3 + pxor mm0,mm2 + movd mm1,DWORD [esi*4+esp] + mov esi,ebp + psllq mm1,12 + and esi,ebx + shr ebx,3 + pxor mm0,mm1 + movd mm2,DWORD [edi*4+esp] + mov edi,ebp + psllq mm2,15 + and edi,ebx + shr ebx,3 + pxor mm0,mm2 + movd mm1,DWORD [esi*4+esp] + mov esi,ebp + psllq mm1,18 + and esi,ebx + shr ebx,3 + pxor mm0,mm1 + movd mm2,DWORD [edi*4+esp] + mov edi,ebp + psllq mm2,21 + and edi,ebx + shr ebx,3 + pxor mm0,mm2 + movd mm1,DWORD [esi*4+esp] + mov esi,ebp + psllq mm1,24 + and esi,ebx + shr ebx,3 + pxor mm0,mm1 + movd mm2,DWORD [edi*4+esp] + pxor mm0,mm4 + psllq mm2,27 + pxor mm0,mm2 + movd mm1,DWORD [esi*4+esp] + pxor mm0,mm5 + psllq mm1,30 + add esp,36 + pxor mm0,mm1 + ret +align 16 +__mul_1x1_ialu: + sub esp,36 + mov ecx,eax + lea edx,[eax*1+eax] + lea ebp,[eax*4] + and ecx,1073741823 + lea edi,[eax*1+eax] + sar eax,31 + mov DWORD [esp],0 + and edx,2147483647 + mov DWORD [4+esp],ecx + xor ecx,edx + mov DWORD [8+esp],edx + xor edx,ebp + mov DWORD [12+esp],ecx + xor ecx,edx + mov DWORD [16+esp],ebp + xor ebp,edx + mov DWORD [20+esp],ecx + xor ebp,ecx + sar edi,31 + and eax,ebx + mov DWORD [24+esp],edx + and edi,ebx + mov DWORD [28+esp],ebp + mov edx,eax + shl eax,31 + mov ecx,edi + shr edx,1 + mov esi,7 + shl edi,30 + and esi,ebx + shr ecx,2 + xor eax,edi + shr ebx,3 + mov edi,7 + and edi,ebx + shr ebx,3 + xor edx,ecx + xor eax,DWORD [esi*4+esp] + mov esi,7 + and esi,ebx + shr ebx,3 + mov ebp,DWORD [edi*4+esp] + mov edi,7 + mov ecx,ebp + shl ebp,3 + and edi,ebx + shr ecx,29 + xor eax,ebp + shr ebx,3 + xor edx,ecx + mov ecx,DWORD [esi*4+esp] + mov esi,7 + mov ebp,ecx + shl ecx,6 + and esi,ebx + shr ebp,26 + xor eax,ecx + shr ebx,3 + xor edx,ebp + mov ebp,DWORD [edi*4+esp] + mov edi,7 + mov ecx,ebp + shl ebp,9 + and edi,ebx + shr ecx,23 + xor eax,ebp + shr ebx,3 + xor edx,ecx + mov ecx,DWORD [esi*4+esp] + mov esi,7 + mov ebp,ecx + shl ecx,12 + and esi,ebx + shr ebp,20 + xor eax,ecx + shr ebx,3 + xor edx,ebp + mov ebp,DWORD [edi*4+esp] + mov edi,7 + mov ecx,ebp + shl ebp,15 + and edi,ebx + shr ecx,17 + xor eax,ebp + shr ebx,3 + xor edx,ecx + mov ecx,DWORD [esi*4+esp] + mov esi,7 + mov ebp,ecx + shl ecx,18 + and esi,ebx + shr ebp,14 + xor eax,ecx + shr ebx,3 + xor edx,ebp + mov ebp,DWORD [edi*4+esp] + mov edi,7 + mov ecx,ebp + shl ebp,21 + and edi,ebx + shr ecx,11 + xor eax,ebp + shr ebx,3 + xor edx,ecx + mov ecx,DWORD [esi*4+esp] + mov esi,7 + mov ebp,ecx + shl ecx,24 + and esi,ebx + shr ebp,8 + xor eax,ecx + shr ebx,3 + xor edx,ebp + mov ebp,DWORD [edi*4+esp] + mov ecx,ebp + shl ebp,27 + mov edi,DWORD [esi*4+esp] + shr ecx,5 + mov esi,edi + xor eax,ebp + shl edi,30 + xor edx,ecx + shr esi,2 + xor eax,edi + xor edx,esi + add esp,36 + ret +global _bn_GF2m_mul_2x2 +align 16 +_bn_GF2m_mul_2x2: +L$_bn_GF2m_mul_2x2_begin: + lea edx,[_OPENSSL_ia32cap_P] + mov eax,DWORD [edx] + mov edx,DWORD [4+edx] + test eax,8388608 + jz NEAR L$000ialu + test eax,16777216 + jz NEAR L$001mmx + test edx,2 + jz NEAR L$001mmx + movups xmm0,[8+esp] + shufps xmm0,xmm0,177 +db 102,15,58,68,192,1 + mov eax,DWORD [4+esp] + movups [eax],xmm0 + ret +align 16 +L$001mmx: + push ebp + push ebx + push esi + push edi + mov eax,DWORD [24+esp] + mov ebx,DWORD [32+esp] + call __mul_1x1_mmx + movq mm7,mm0 + mov eax,DWORD [28+esp] + mov ebx,DWORD [36+esp] + call __mul_1x1_mmx + movq mm6,mm0 + mov eax,DWORD [24+esp] + mov ebx,DWORD [32+esp] + xor eax,DWORD [28+esp] + xor ebx,DWORD [36+esp] + call __mul_1x1_mmx + pxor mm0,mm7 + mov eax,DWORD [20+esp] + pxor mm0,mm6 + movq mm2,mm0 + psllq mm0,32 + pop edi + psrlq mm2,32 + pop esi + pxor mm0,mm6 + pop ebx + pxor mm2,mm7 + movq [eax],mm0 + pop ebp + movq [8+eax],mm2 + emms + ret +align 16 +L$000ialu: + push ebp + push ebx + push esi + push edi + sub esp,20 + mov eax,DWORD [44+esp] + mov ebx,DWORD [52+esp] + call __mul_1x1_ialu + mov DWORD [8+esp],eax + mov DWORD [12+esp],edx + mov eax,DWORD [48+esp] + mov ebx,DWORD [56+esp] + call __mul_1x1_ialu + mov DWORD [esp],eax + mov DWORD [4+esp],edx + mov eax,DWORD [44+esp] + mov ebx,DWORD [52+esp] + xor eax,DWORD [48+esp] + xor ebx,DWORD [56+esp] + call __mul_1x1_ialu + mov ebp,DWORD [40+esp] + mov ebx,DWORD [esp] + mov ecx,DWORD [4+esp] + mov edi,DWORD [8+esp] + mov esi,DWORD [12+esp] + xor eax,edx + xor edx,ecx + xor eax,ebx + mov DWORD [ebp],ebx + xor edx,edi + mov DWORD [12+ebp],esi + xor eax,esi + add esp,20 + xor edx,esi + pop edi + xor eax,edx + pop esi + mov DWORD [8+ebp],edx + pop ebx + mov DWORD [4+ebp],eax + pop ebp + ret +db 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105 +db 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32 +db 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +db 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +db 62,0 +segment .bss +common _OPENSSL_ia32cap_P 16 diff --git a/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/x86-mont.asm b/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/x86-mont.asm new file mode 100644 index 0000000000..090630c3a0 --- /dev/null +++ b/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/x86-mont.asm @@ -0,0 +1,479 @@ +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +$@feat.00 equ 1 +section .text code align=64 +%else +section .text code +%endif +;extern _OPENSSL_ia32cap_P +global _bn_mul_mont +align 16 +_bn_mul_mont: +L$_bn_mul_mont_begin: + push ebp + push ebx + push esi + push edi + xor eax,eax + mov edi,DWORD [40+esp] + cmp edi,4 + jl NEAR L$000just_leave + lea esi,[20+esp] + lea edx,[24+esp] + add edi,2 + neg edi + lea ebp,[edi*4+esp-32] + neg edi + mov eax,ebp + sub eax,edx + and eax,2047 + sub ebp,eax + xor edx,ebp + and edx,2048 + xor edx,2048 + sub ebp,edx + and ebp,-64 + mov eax,esp + sub eax,ebp + and eax,-4096 + mov edx,esp + lea esp,[eax*1+ebp] + mov eax,DWORD [esp] + cmp esp,ebp + ja NEAR L$001page_walk + jmp NEAR L$002page_walk_done +align 16 +L$001page_walk: + lea esp,[esp-4096] + mov eax,DWORD [esp] + cmp esp,ebp + ja NEAR L$001page_walk +L$002page_walk_done: + mov eax,DWORD [esi] + mov ebx,DWORD [4+esi] + mov ecx,DWORD [8+esi] + mov ebp,DWORD [12+esi] + mov esi,DWORD [16+esi] + mov esi,DWORD [esi] + mov DWORD [4+esp],eax + mov DWORD [8+esp],ebx + mov DWORD [12+esp],ecx + mov DWORD [16+esp],ebp + mov DWORD [20+esp],esi + lea ebx,[edi-3] + mov DWORD [24+esp],edx + lea eax,[_OPENSSL_ia32cap_P] + bt DWORD [eax],26 + jnc NEAR L$003non_sse2 + mov eax,-1 + movd mm7,eax + mov esi,DWORD [8+esp] + mov edi,DWORD [12+esp] + mov ebp,DWORD [16+esp] + xor edx,edx + xor ecx,ecx + movd mm4,DWORD [edi] + movd mm5,DWORD [esi] + movd mm3,DWORD [ebp] + pmuludq mm5,mm4 + movq mm2,mm5 + movq mm0,mm5 + pand mm0,mm7 + pmuludq mm5,[20+esp] + pmuludq mm3,mm5 + paddq mm3,mm0 + movd mm1,DWORD [4+ebp] + movd mm0,DWORD [4+esi] + psrlq mm2,32 + psrlq mm3,32 + inc ecx +align 16 +L$0041st: + pmuludq mm0,mm4 + pmuludq mm1,mm5 + paddq mm2,mm0 + paddq mm3,mm1 + movq mm0,mm2 + pand mm0,mm7 + movd mm1,DWORD [4+ecx*4+ebp] + paddq mm3,mm0 + movd mm0,DWORD [4+ecx*4+esi] + psrlq mm2,32 + movd DWORD [28+ecx*4+esp],mm3 + psrlq mm3,32 + lea ecx,[1+ecx] + cmp ecx,ebx + jl NEAR L$0041st + pmuludq mm0,mm4 + pmuludq mm1,mm5 + paddq mm2,mm0 + paddq mm3,mm1 + movq mm0,mm2 + pand mm0,mm7 + paddq mm3,mm0 + movd DWORD [28+ecx*4+esp],mm3 + psrlq mm2,32 + psrlq mm3,32 + paddq mm3,mm2 + movq [32+ebx*4+esp],mm3 + inc edx +L$005outer: + xor ecx,ecx + movd mm4,DWORD [edx*4+edi] + movd mm5,DWORD [esi] + movd mm6,DWORD [32+esp] + movd mm3,DWORD [ebp] + pmuludq mm5,mm4 + paddq mm5,mm6 + movq mm0,mm5 + movq mm2,mm5 + pand mm0,mm7 + pmuludq mm5,[20+esp] + pmuludq mm3,mm5 + paddq mm3,mm0 + movd mm6,DWORD [36+esp] + movd mm1,DWORD [4+ebp] + movd mm0,DWORD [4+esi] + psrlq mm2,32 + psrlq mm3,32 + paddq mm2,mm6 + inc ecx + dec ebx +L$006inner: + pmuludq mm0,mm4 + pmuludq mm1,mm5 + paddq mm2,mm0 + paddq mm3,mm1 + movq mm0,mm2 + movd mm6,DWORD [36+ecx*4+esp] + pand mm0,mm7 + movd mm1,DWORD [4+ecx*4+ebp] + paddq mm3,mm0 + movd mm0,DWORD [4+ecx*4+esi] + psrlq mm2,32 + movd DWORD [28+ecx*4+esp],mm3 + psrlq mm3,32 + paddq mm2,mm6 + dec ebx + lea ecx,[1+ecx] + jnz NEAR L$006inner + mov ebx,ecx + pmuludq mm0,mm4 + pmuludq mm1,mm5 + paddq mm2,mm0 + paddq mm3,mm1 + movq mm0,mm2 + pand mm0,mm7 + paddq mm3,mm0 + movd DWORD [28+ecx*4+esp],mm3 + psrlq mm2,32 + psrlq mm3,32 + movd mm6,DWORD [36+ebx*4+esp] + paddq mm3,mm2 + paddq mm3,mm6 + movq [32+ebx*4+esp],mm3 + lea edx,[1+edx] + cmp edx,ebx + jle NEAR L$005outer + emms + jmp NEAR L$007common_tail +align 16 +L$003non_sse2: + mov esi,DWORD [8+esp] + lea ebp,[1+ebx] + mov edi,DWORD [12+esp] + xor ecx,ecx + mov edx,esi + and ebp,1 + sub edx,edi + lea eax,[4+ebx*4+edi] + or ebp,edx + mov edi,DWORD [edi] + jz NEAR L$008bn_sqr_mont + mov DWORD [28+esp],eax + mov eax,DWORD [esi] + xor edx,edx +align 16 +L$009mull: + mov ebp,edx + mul edi + add ebp,eax + lea ecx,[1+ecx] + adc edx,0 + mov eax,DWORD [ecx*4+esi] + cmp ecx,ebx + mov DWORD [28+ecx*4+esp],ebp + jl NEAR L$009mull + mov ebp,edx + mul edi + mov edi,DWORD [20+esp] + add eax,ebp + mov esi,DWORD [16+esp] + adc edx,0 + imul edi,DWORD [32+esp] + mov DWORD [32+ebx*4+esp],eax + xor ecx,ecx + mov DWORD [36+ebx*4+esp],edx + mov DWORD [40+ebx*4+esp],ecx + mov eax,DWORD [esi] + mul edi + add eax,DWORD [32+esp] + mov eax,DWORD [4+esi] + adc edx,0 + inc ecx + jmp NEAR L$0102ndmadd +align 16 +L$0111stmadd: + mov ebp,edx + mul edi + add ebp,DWORD [32+ecx*4+esp] + lea ecx,[1+ecx] + adc edx,0 + add ebp,eax + mov eax,DWORD [ecx*4+esi] + adc edx,0 + cmp ecx,ebx + mov DWORD [28+ecx*4+esp],ebp + jl NEAR L$0111stmadd + mov ebp,edx + mul edi + add eax,DWORD [32+ebx*4+esp] + mov edi,DWORD [20+esp] + adc edx,0 + mov esi,DWORD [16+esp] + add ebp,eax + adc edx,0 + imul edi,DWORD [32+esp] + xor ecx,ecx + add edx,DWORD [36+ebx*4+esp] + mov DWORD [32+ebx*4+esp],ebp + adc ecx,0 + mov eax,DWORD [esi] + mov DWORD [36+ebx*4+esp],edx + mov DWORD [40+ebx*4+esp],ecx + mul edi + add eax,DWORD [32+esp] + mov eax,DWORD [4+esi] + adc edx,0 + mov ecx,1 +align 16 +L$0102ndmadd: + mov ebp,edx + mul edi + add ebp,DWORD [32+ecx*4+esp] + lea ecx,[1+ecx] + adc edx,0 + add ebp,eax + mov eax,DWORD [ecx*4+esi] + adc edx,0 + cmp ecx,ebx + mov DWORD [24+ecx*4+esp],ebp + jl NEAR L$0102ndmadd + mov ebp,edx + mul edi + add ebp,DWORD [32+ebx*4+esp] + adc edx,0 + add ebp,eax + adc edx,0 + mov DWORD [28+ebx*4+esp],ebp + xor eax,eax + mov ecx,DWORD [12+esp] + add edx,DWORD [36+ebx*4+esp] + adc eax,DWORD [40+ebx*4+esp] + lea ecx,[4+ecx] + mov DWORD [32+ebx*4+esp],edx + cmp ecx,DWORD [28+esp] + mov DWORD [36+ebx*4+esp],eax + je NEAR L$007common_tail + mov edi,DWORD [ecx] + mov esi,DWORD [8+esp] + mov DWORD [12+esp],ecx + xor ecx,ecx + xor edx,edx + mov eax,DWORD [esi] + jmp NEAR L$0111stmadd +align 16 +L$008bn_sqr_mont: + mov DWORD [esp],ebx + mov DWORD [12+esp],ecx + mov eax,edi + mul edi + mov DWORD [32+esp],eax + mov ebx,edx + shr edx,1 + and ebx,1 + inc ecx +align 16 +L$012sqr: + mov eax,DWORD [ecx*4+esi] + mov ebp,edx + mul edi + add eax,ebp + lea ecx,[1+ecx] + adc edx,0 + lea ebp,[eax*2+ebx] + shr eax,31 + cmp ecx,DWORD [esp] + mov ebx,eax + mov DWORD [28+ecx*4+esp],ebp + jl NEAR L$012sqr + mov eax,DWORD [ecx*4+esi] + mov ebp,edx + mul edi + add eax,ebp + mov edi,DWORD [20+esp] + adc edx,0 + mov esi,DWORD [16+esp] + lea ebp,[eax*2+ebx] + imul edi,DWORD [32+esp] + shr eax,31 + mov DWORD [32+ecx*4+esp],ebp + lea ebp,[edx*2+eax] + mov eax,DWORD [esi] + shr edx,31 + mov DWORD [36+ecx*4+esp],ebp + mov DWORD [40+ecx*4+esp],edx + mul edi + add eax,DWORD [32+esp] + mov ebx,ecx + adc edx,0 + mov eax,DWORD [4+esi] + mov ecx,1 +align 16 +L$0133rdmadd: + mov ebp,edx + mul edi + add ebp,DWORD [32+ecx*4+esp] + adc edx,0 + add ebp,eax + mov eax,DWORD [4+ecx*4+esi] + adc edx,0 + mov DWORD [28+ecx*4+esp],ebp + mov ebp,edx + mul edi + add ebp,DWORD [36+ecx*4+esp] + lea ecx,[2+ecx] + adc edx,0 + add ebp,eax + mov eax,DWORD [ecx*4+esi] + adc edx,0 + cmp ecx,ebx + mov DWORD [24+ecx*4+esp],ebp + jl NEAR L$0133rdmadd + mov ebp,edx + mul edi + add ebp,DWORD [32+ebx*4+esp] + adc edx,0 + add ebp,eax + adc edx,0 + mov DWORD [28+ebx*4+esp],ebp + mov ecx,DWORD [12+esp] + xor eax,eax + mov esi,DWORD [8+esp] + add edx,DWORD [36+ebx*4+esp] + adc eax,DWORD [40+ebx*4+esp] + mov DWORD [32+ebx*4+esp],edx + cmp ecx,ebx + mov DWORD [36+ebx*4+esp],eax + je NEAR L$007common_tail + mov edi,DWORD [4+ecx*4+esi] + lea ecx,[1+ecx] + mov eax,edi + mov DWORD [12+esp],ecx + mul edi + add eax,DWORD [32+ecx*4+esp] + adc edx,0 + mov DWORD [32+ecx*4+esp],eax + xor ebp,ebp + cmp ecx,ebx + lea ecx,[1+ecx] + je NEAR L$014sqrlast + mov ebx,edx + shr edx,1 + and ebx,1 +align 16 +L$015sqradd: + mov eax,DWORD [ecx*4+esi] + mov ebp,edx + mul edi + add eax,ebp + lea ebp,[eax*1+eax] + adc edx,0 + shr eax,31 + add ebp,DWORD [32+ecx*4+esp] + lea ecx,[1+ecx] + adc eax,0 + add ebp,ebx + adc eax,0 + cmp ecx,DWORD [esp] + mov DWORD [28+ecx*4+esp],ebp + mov ebx,eax + jle NEAR L$015sqradd + mov ebp,edx + add edx,edx + shr ebp,31 + add edx,ebx + adc ebp,0 +L$014sqrlast: + mov edi,DWORD [20+esp] + mov esi,DWORD [16+esp] + imul edi,DWORD [32+esp] + add edx,DWORD [32+ecx*4+esp] + mov eax,DWORD [esi] + adc ebp,0 + mov DWORD [32+ecx*4+esp],edx + mov DWORD [36+ecx*4+esp],ebp + mul edi + add eax,DWORD [32+esp] + lea ebx,[ecx-1] + adc edx,0 + mov ecx,1 + mov eax,DWORD [4+esi] + jmp NEAR L$0133rdmadd +align 16 +L$007common_tail: + mov ebp,DWORD [16+esp] + mov edi,DWORD [4+esp] + lea esi,[32+esp] + mov eax,DWORD [esi] + mov ecx,ebx + xor edx,edx +align 16 +L$016sub: + sbb eax,DWORD [edx*4+ebp] + mov DWORD [edx*4+edi],eax + dec ecx + mov eax,DWORD [4+edx*4+esi] + lea edx,[1+edx] + jge NEAR L$016sub + sbb eax,0 + mov edx,-1 + xor edx,eax + jmp NEAR L$017copy +align 16 +L$017copy: + mov esi,DWORD [32+ebx*4+esp] + mov ebp,DWORD [ebx*4+edi] + mov DWORD [32+ebx*4+esp],ecx + and esi,eax + and ebp,edx + or ebp,esi + mov DWORD [ebx*4+edi],ebp + dec ebx + jge NEAR L$017copy + mov esp,DWORD [24+esp] + mov eax,1 +L$000just_leave: + pop edi + pop esi + pop ebx + pop ebp + ret +db 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 +db 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 +db 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 +db 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 +db 111,114,103,62,0 +segment .bss +common _OPENSSL_ia32cap_P 16 |