diff options
Diffstat (limited to 'deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/x86-gf2m.asm')
-rw-r--r-- | deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/x86-gf2m.asm | 345 |
1 files changed, 345 insertions, 0 deletions
diff --git a/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/x86-gf2m.asm b/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/x86-gf2m.asm new file mode 100644 index 0000000000..709f4a9e50 --- /dev/null +++ b/deps/openssl/config/archs/VC-WIN32/asm_avx2/crypto/bn/x86-gf2m.asm @@ -0,0 +1,345 @@ +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +$@feat.00 equ 1 +section .text code align=64 +%else +section .text code +%endif +;extern _OPENSSL_ia32cap_P +align 16 +__mul_1x1_mmx: + sub esp,36 + mov ecx,eax + lea edx,[eax*1+eax] + and ecx,1073741823 + lea ebp,[edx*1+edx] + mov DWORD [esp],0 + and edx,2147483647 + movd mm2,eax + movd mm3,ebx + mov DWORD [4+esp],ecx + xor ecx,edx + pxor mm5,mm5 + pxor mm4,mm4 + mov DWORD [8+esp],edx + xor edx,ebp + mov DWORD [12+esp],ecx + pcmpgtd mm5,mm2 + paddd mm2,mm2 + xor ecx,edx + mov DWORD [16+esp],ebp + xor ebp,edx + pand mm5,mm3 + pcmpgtd mm4,mm2 + mov DWORD [20+esp],ecx + xor ebp,ecx + psllq mm5,31 + pand mm4,mm3 + mov DWORD [24+esp],edx + mov esi,7 + mov DWORD [28+esp],ebp + mov ebp,esi + and esi,ebx + shr ebx,3 + mov edi,ebp + psllq mm4,30 + and edi,ebx + shr ebx,3 + movd mm0,DWORD [esi*4+esp] + mov esi,ebp + and esi,ebx + shr ebx,3 + movd mm2,DWORD [edi*4+esp] + mov edi,ebp + psllq mm2,3 + and edi,ebx + shr ebx,3 + pxor mm0,mm2 + movd mm1,DWORD [esi*4+esp] + mov esi,ebp + psllq mm1,6 + and esi,ebx + shr ebx,3 + pxor mm0,mm1 + movd mm2,DWORD [edi*4+esp] + mov edi,ebp + psllq mm2,9 + and edi,ebx + shr ebx,3 + pxor mm0,mm2 + movd mm1,DWORD [esi*4+esp] + mov esi,ebp + psllq mm1,12 + and esi,ebx + shr ebx,3 + pxor mm0,mm1 + movd mm2,DWORD [edi*4+esp] + mov edi,ebp + psllq mm2,15 + and edi,ebx + shr ebx,3 + pxor mm0,mm2 + movd mm1,DWORD [esi*4+esp] + mov esi,ebp + psllq mm1,18 + and esi,ebx + shr ebx,3 + pxor mm0,mm1 + movd mm2,DWORD [edi*4+esp] + mov edi,ebp + psllq mm2,21 + and edi,ebx + shr ebx,3 + pxor mm0,mm2 + movd mm1,DWORD [esi*4+esp] + mov esi,ebp + psllq mm1,24 + and esi,ebx + shr ebx,3 + pxor mm0,mm1 + movd mm2,DWORD [edi*4+esp] + pxor mm0,mm4 + psllq mm2,27 + pxor mm0,mm2 + movd mm1,DWORD [esi*4+esp] + pxor mm0,mm5 + psllq mm1,30 + add esp,36 + pxor mm0,mm1 + ret +align 16 +__mul_1x1_ialu: + sub esp,36 + mov ecx,eax + lea edx,[eax*1+eax] + lea ebp,[eax*4] + and ecx,1073741823 + lea edi,[eax*1+eax] + sar eax,31 + mov DWORD [esp],0 + and edx,2147483647 + mov DWORD [4+esp],ecx + xor ecx,edx + mov DWORD [8+esp],edx + xor edx,ebp + mov DWORD [12+esp],ecx + xor ecx,edx + mov DWORD [16+esp],ebp + xor ebp,edx + mov DWORD [20+esp],ecx + xor ebp,ecx + sar edi,31 + and eax,ebx + mov DWORD [24+esp],edx + and edi,ebx + mov DWORD [28+esp],ebp + mov edx,eax + shl eax,31 + mov ecx,edi + shr edx,1 + mov esi,7 + shl edi,30 + and esi,ebx + shr ecx,2 + xor eax,edi + shr ebx,3 + mov edi,7 + and edi,ebx + shr ebx,3 + xor edx,ecx + xor eax,DWORD [esi*4+esp] + mov esi,7 + and esi,ebx + shr ebx,3 + mov ebp,DWORD [edi*4+esp] + mov edi,7 + mov ecx,ebp + shl ebp,3 + and edi,ebx + shr ecx,29 + xor eax,ebp + shr ebx,3 + xor edx,ecx + mov ecx,DWORD [esi*4+esp] + mov esi,7 + mov ebp,ecx + shl ecx,6 + and esi,ebx + shr ebp,26 + xor eax,ecx + shr ebx,3 + xor edx,ebp + mov ebp,DWORD [edi*4+esp] + mov edi,7 + mov ecx,ebp + shl ebp,9 + and edi,ebx + shr ecx,23 + xor eax,ebp + shr ebx,3 + xor edx,ecx + mov ecx,DWORD [esi*4+esp] + mov esi,7 + mov ebp,ecx + shl ecx,12 + and esi,ebx + shr ebp,20 + xor eax,ecx + shr ebx,3 + xor edx,ebp + mov ebp,DWORD [edi*4+esp] + mov edi,7 + mov ecx,ebp + shl ebp,15 + and edi,ebx + shr ecx,17 + xor eax,ebp + shr ebx,3 + xor edx,ecx + mov ecx,DWORD [esi*4+esp] + mov esi,7 + mov ebp,ecx + shl ecx,18 + and esi,ebx + shr ebp,14 + xor eax,ecx + shr ebx,3 + xor edx,ebp + mov ebp,DWORD [edi*4+esp] + mov edi,7 + mov ecx,ebp + shl ebp,21 + and edi,ebx + shr ecx,11 + xor eax,ebp + shr ebx,3 + xor edx,ecx + mov ecx,DWORD [esi*4+esp] + mov esi,7 + mov ebp,ecx + shl ecx,24 + and esi,ebx + shr ebp,8 + xor eax,ecx + shr ebx,3 + xor edx,ebp + mov ebp,DWORD [edi*4+esp] + mov ecx,ebp + shl ebp,27 + mov edi,DWORD [esi*4+esp] + shr ecx,5 + mov esi,edi + xor eax,ebp + shl edi,30 + xor edx,ecx + shr esi,2 + xor eax,edi + xor edx,esi + add esp,36 + ret +global _bn_GF2m_mul_2x2 +align 16 +_bn_GF2m_mul_2x2: +L$_bn_GF2m_mul_2x2_begin: + lea edx,[_OPENSSL_ia32cap_P] + mov eax,DWORD [edx] + mov edx,DWORD [4+edx] + test eax,8388608 + jz NEAR L$000ialu + test eax,16777216 + jz NEAR L$001mmx + test edx,2 + jz NEAR L$001mmx + movups xmm0,[8+esp] + shufps xmm0,xmm0,177 +db 102,15,58,68,192,1 + mov eax,DWORD [4+esp] + movups [eax],xmm0 + ret +align 16 +L$001mmx: + push ebp + push ebx + push esi + push edi + mov eax,DWORD [24+esp] + mov ebx,DWORD [32+esp] + call __mul_1x1_mmx + movq mm7,mm0 + mov eax,DWORD [28+esp] + mov ebx,DWORD [36+esp] + call __mul_1x1_mmx + movq mm6,mm0 + mov eax,DWORD [24+esp] + mov ebx,DWORD [32+esp] + xor eax,DWORD [28+esp] + xor ebx,DWORD [36+esp] + call __mul_1x1_mmx + pxor mm0,mm7 + mov eax,DWORD [20+esp] + pxor mm0,mm6 + movq mm2,mm0 + psllq mm0,32 + pop edi + psrlq mm2,32 + pop esi + pxor mm0,mm6 + pop ebx + pxor mm2,mm7 + movq [eax],mm0 + pop ebp + movq [8+eax],mm2 + emms + ret +align 16 +L$000ialu: + push ebp + push ebx + push esi + push edi + sub esp,20 + mov eax,DWORD [44+esp] + mov ebx,DWORD [52+esp] + call __mul_1x1_ialu + mov DWORD [8+esp],eax + mov DWORD [12+esp],edx + mov eax,DWORD [48+esp] + mov ebx,DWORD [56+esp] + call __mul_1x1_ialu + mov DWORD [esp],eax + mov DWORD [4+esp],edx + mov eax,DWORD [44+esp] + mov ebx,DWORD [52+esp] + xor eax,DWORD [48+esp] + xor ebx,DWORD [56+esp] + call __mul_1x1_ialu + mov ebp,DWORD [40+esp] + mov ebx,DWORD [esp] + mov ecx,DWORD [4+esp] + mov edi,DWORD [8+esp] + mov esi,DWORD [12+esp] + xor eax,edx + xor edx,ecx + xor eax,ebx + mov DWORD [ebp],ebx + xor edx,edi + mov DWORD [12+ebp],esi + xor eax,esi + add esp,20 + xor edx,esi + pop edi + xor eax,edx + pop esi + mov DWORD [8+ebp],edx + pop ebx + mov DWORD [4+ebp],eax + pop ebp + ret +db 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105 +db 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32 +db 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +db 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +db 62,0 +segment .bss +common _OPENSSL_ia32cap_P 16 |