%ifidn __OUTPUT_FORMAT__,obj section code use32 class=code align=64 %elifidn __OUTPUT_FORMAT__,win32 $@feat.00 equ 1 section .text code align=64 %else section .text code %endif ;extern _OPENSSL_ia32cap_P align 16 __mul_1x1_mmx: sub esp,36 mov ecx,eax lea edx,[eax*1+eax] and ecx,1073741823 lea ebp,[edx*1+edx] mov DWORD [esp],0 and edx,2147483647 movd mm2,eax movd mm3,ebx mov DWORD [4+esp],ecx xor ecx,edx pxor mm5,mm5 pxor mm4,mm4 mov DWORD [8+esp],edx xor edx,ebp mov DWORD [12+esp],ecx pcmpgtd mm5,mm2 paddd mm2,mm2 xor ecx,edx mov DWORD [16+esp],ebp xor ebp,edx pand mm5,mm3 pcmpgtd mm4,mm2 mov DWORD [20+esp],ecx xor ebp,ecx psllq mm5,31 pand mm4,mm3 mov DWORD [24+esp],edx mov esi,7 mov DWORD [28+esp],ebp mov ebp,esi and esi,ebx shr ebx,3 mov edi,ebp psllq mm4,30 and edi,ebx shr ebx,3 movd mm0,DWORD [esi*4+esp] mov esi,ebp and esi,ebx shr ebx,3 movd mm2,DWORD [edi*4+esp] mov edi,ebp psllq mm2,3 and edi,ebx shr ebx,3 pxor mm0,mm2 movd mm1,DWORD [esi*4+esp] mov esi,ebp psllq mm1,6 and esi,ebx shr ebx,3 pxor mm0,mm1 movd mm2,DWORD [edi*4+esp] mov edi,ebp psllq mm2,9 and edi,ebx shr ebx,3 pxor mm0,mm2 movd mm1,DWORD [esi*4+esp] mov esi,ebp psllq mm1,12 and esi,ebx shr ebx,3 pxor mm0,mm1 movd mm2,DWORD [edi*4+esp] mov edi,ebp psllq mm2,15 and edi,ebx shr ebx,3 pxor mm0,mm2 movd mm1,DWORD [esi*4+esp] mov esi,ebp psllq mm1,18 and esi,ebx shr ebx,3 pxor mm0,mm1 movd mm2,DWORD [edi*4+esp] mov edi,ebp psllq mm2,21 and edi,ebx shr ebx,3 pxor mm0,mm2 movd mm1,DWORD [esi*4+esp] mov esi,ebp psllq mm1,24 and esi,ebx shr ebx,3 pxor mm0,mm1 movd mm2,DWORD [edi*4+esp] pxor mm0,mm4 psllq mm2,27 pxor mm0,mm2 movd mm1,DWORD [esi*4+esp] pxor mm0,mm5 psllq mm1,30 add esp,36 pxor mm0,mm1 ret align 16 __mul_1x1_ialu: sub esp,36 mov ecx,eax lea edx,[eax*1+eax] lea ebp,[eax*4] and ecx,1073741823 lea edi,[eax*1+eax] sar eax,31 mov DWORD [esp],0 and edx,2147483647 mov DWORD [4+esp],ecx xor ecx,edx mov DWORD [8+esp],edx xor edx,ebp mov DWORD [12+esp],ecx xor ecx,edx mov DWORD [16+esp],ebp xor ebp,edx mov DWORD [20+esp],ecx xor ebp,ecx sar edi,31 and eax,ebx mov DWORD [24+esp],edx and edi,ebx mov DWORD [28+esp],ebp mov edx,eax shl eax,31 mov ecx,edi shr edx,1 mov esi,7 shl edi,30 and esi,ebx shr ecx,2 xor eax,edi shr ebx,3 mov edi,7 and edi,ebx shr ebx,3 xor edx,ecx xor eax,DWORD [esi*4+esp] mov esi,7 and esi,ebx shr ebx,3 mov ebp,DWORD [edi*4+esp] mov edi,7 mov ecx,ebp shl ebp,3 and edi,ebx shr ecx,29 xor eax,ebp shr ebx,3 xor edx,ecx mov ecx,DWORD [esi*4+esp] mov esi,7 mov ebp,ecx shl ecx,6 and esi,ebx shr ebp,26 xor eax,ecx shr ebx,3 xor edx,ebp mov ebp,DWORD [edi*4+esp] mov edi,7 mov ecx,ebp shl ebp,9 and edi,ebx shr ecx,23 xor eax,ebp shr ebx,3 xor edx,ecx mov ecx,DWORD [esi*4+esp] mov esi,7 mov ebp,ecx shl ecx,12 and esi,ebx shr ebp,20 xor eax,ecx shr ebx,3 xor edx,ebp mov ebp,DWORD [edi*4+esp] mov edi,7 mov ecx,ebp shl ebp,15 and edi,ebx shr ecx,17 xor eax,ebp shr ebx,3 xor edx,ecx mov ecx,DWORD [esi*4+esp] mov esi,7 mov ebp,ecx shl ecx,18 and esi,ebx shr ebp,14 xor eax,ecx shr ebx,3 xor edx,ebp mov ebp,DWORD [edi*4+esp] mov edi,7 mov ecx,ebp shl ebp,21 and edi,ebx shr ecx,11 xor eax,ebp shr ebx,3 xor edx,ecx mov ecx,DWORD [esi*4+esp] mov esi,7 mov ebp,ecx shl ecx,24 and esi,ebx shr ebp,8 xor eax,ecx shr ebx,3 xor edx,ebp mov ebp,DWORD [edi*4+esp] mov ecx,ebp shl ebp,27 mov edi,DWORD [esi*4+esp] shr ecx,5 mov esi,edi xor eax,ebp shl edi,30 xor edx,ecx shr esi,2 xor eax,edi xor edx,esi add esp,36 ret global _bn_GF2m_mul_2x2 align 16 _bn_GF2m_mul_2x2: L$_bn_GF2m_mul_2x2_begin: lea edx,[_OPENSSL_ia32cap_P] mov eax,DWORD [edx] mov edx,DWORD [4+edx] test eax,8388608 jz NEAR L$000ialu test eax,16777216 jz NEAR L$001mmx test edx,2 jz NEAR L$001mmx movups xmm0,[8+esp] shufps xmm0,xmm0,177 db 102,15,58,68,192,1 mov eax,DWORD [4+esp] movups [eax],xmm0 ret align 16 L$001mmx: push ebp push ebx push esi push edi mov eax,DWORD [24+esp] mov ebx,DWORD [32+esp] call __mul_1x1_mmx movq mm7,mm0 mov eax,DWORD [28+esp] mov ebx,DWORD [36+esp] call __mul_1x1_mmx movq mm6,mm0 mov eax,DWORD [24+esp] mov ebx,DWORD [32+esp] xor eax,DWORD [28+esp] xor ebx,DWORD [36+esp] call __mul_1x1_mmx pxor mm0,mm7 mov eax,DWORD [20+esp] pxor mm0,mm6 movq mm2,mm0 psllq mm0,32 pop edi psrlq mm2,32 pop esi pxor mm0,mm6 pop ebx pxor mm2,mm7 movq [eax],mm0 pop ebp movq [8+eax],mm2 emms ret align 16 L$000ialu: push ebp push ebx push esi push edi sub esp,20 mov eax,DWORD [44+esp] mov ebx,DWORD [52+esp] call __mul_1x1_ialu mov DWORD [8+esp],eax mov DWORD [12+esp],edx mov eax,DWORD [48+esp] mov ebx,DWORD [56+esp] call __mul_1x1_ialu mov DWORD [esp],eax mov DWORD [4+esp],edx mov eax,DWORD [44+esp] mov ebx,DWORD [52+esp] xor eax,DWORD [48+esp] xor ebx,DWORD [56+esp] call __mul_1x1_ialu mov ebp,DWORD [40+esp] mov ebx,DWORD [esp] mov ecx,DWORD [4+esp] mov edi,DWORD [8+esp] mov esi,DWORD [12+esp] xor eax,edx xor edx,ecx xor eax,ebx mov DWORD [ebp],ebx xor edx,edi mov DWORD [12+ebp],esi xor eax,esi add esp,20 xor edx,esi pop edi xor eax,edx pop esi mov DWORD [8+ebp],edx pop ebx mov DWORD [4+ebp],eax pop ebp ret db 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105 db 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32 db 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 db 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 db 62,0 segment .bss common _OPENSSL_ia32cap_P 16