diff options
Diffstat (limited to 'deps/openssl/config/archs/BSD-x86/asm/crypto/bn')
-rw-r--r-- | deps/openssl/config/archs/BSD-x86/asm/crypto/bn/bn-586.s | 1525 | ||||
-rw-r--r-- | deps/openssl/config/archs/BSD-x86/asm/crypto/bn/co-586.s | 1249 | ||||
-rw-r--r-- | deps/openssl/config/archs/BSD-x86/asm/crypto/bn/x86-gf2m.s | 343 | ||||
-rw-r--r-- | deps/openssl/config/archs/BSD-x86/asm/crypto/bn/x86-mont.s | 475 |
4 files changed, 3592 insertions, 0 deletions
diff --git a/deps/openssl/config/archs/BSD-x86/asm/crypto/bn/bn-586.s b/deps/openssl/config/archs/BSD-x86/asm/crypto/bn/bn-586.s new file mode 100644 index 0000000000..80c8db4d29 --- /dev/null +++ b/deps/openssl/config/archs/BSD-x86/asm/crypto/bn/bn-586.s @@ -0,0 +1,1525 @@ +.text +.globl _bn_mul_add_words +.type _bn_mul_add_words,@function +.align 4 +_bn_mul_add_words: +L_bn_mul_add_words_begin: + call L000PIC_me_up +L000PIC_me_up: + popl %eax + leal __GLOBAL_OFFSET_TABLE_+[.-L000PIC_me_up](%eax),%eax + movl _OPENSSL_ia32cap_P@GOT(%eax),%eax + btl $26,(%eax) + jnc L001maw_non_sse2 + movl 4(%esp),%eax + movl 8(%esp),%edx + movl 12(%esp),%ecx + movd 16(%esp),%mm0 + pxor %mm1,%mm1 + jmp L002maw_sse2_entry +.align 4,0x90 +L003maw_sse2_unrolled: + movd (%eax),%mm3 + paddq %mm3,%mm1 + movd (%edx),%mm2 + pmuludq %mm0,%mm2 + movd 4(%edx),%mm4 + pmuludq %mm0,%mm4 + movd 8(%edx),%mm6 + pmuludq %mm0,%mm6 + movd 12(%edx),%mm7 + pmuludq %mm0,%mm7 + paddq %mm2,%mm1 + movd 4(%eax),%mm3 + paddq %mm4,%mm3 + movd 8(%eax),%mm5 + paddq %mm6,%mm5 + movd 12(%eax),%mm4 + paddq %mm4,%mm7 + movd %mm1,(%eax) + movd 16(%edx),%mm2 + pmuludq %mm0,%mm2 + psrlq $32,%mm1 + movd 20(%edx),%mm4 + pmuludq %mm0,%mm4 + paddq %mm3,%mm1 + movd 24(%edx),%mm6 + pmuludq %mm0,%mm6 + movd %mm1,4(%eax) + psrlq $32,%mm1 + movd 28(%edx),%mm3 + addl $32,%edx + pmuludq %mm0,%mm3 + paddq %mm5,%mm1 + movd 16(%eax),%mm5 + paddq %mm5,%mm2 + movd %mm1,8(%eax) + psrlq $32,%mm1 + paddq %mm7,%mm1 + movd 20(%eax),%mm5 + paddq %mm5,%mm4 + movd %mm1,12(%eax) + psrlq $32,%mm1 + paddq %mm2,%mm1 + movd 24(%eax),%mm5 + paddq %mm5,%mm6 + movd %mm1,16(%eax) + psrlq $32,%mm1 + paddq %mm4,%mm1 + movd 28(%eax),%mm5 + paddq %mm5,%mm3 + movd %mm1,20(%eax) + psrlq $32,%mm1 + paddq %mm6,%mm1 + movd %mm1,24(%eax) + psrlq $32,%mm1 + paddq %mm3,%mm1 + movd %mm1,28(%eax) + leal 32(%eax),%eax + psrlq $32,%mm1 + subl $8,%ecx + jz L004maw_sse2_exit +L002maw_sse2_entry: + testl $4294967288,%ecx + jnz L003maw_sse2_unrolled +.align 2,0x90 +L005maw_sse2_loop: + movd (%edx),%mm2 + movd (%eax),%mm3 + pmuludq %mm0,%mm2 + leal 4(%edx),%edx + paddq %mm3,%mm1 + paddq %mm2,%mm1 + movd %mm1,(%eax) + subl $1,%ecx + psrlq $32,%mm1 + leal 4(%eax),%eax + jnz L005maw_sse2_loop +L004maw_sse2_exit: + movd %mm1,%eax + emms + ret +.align 4,0x90 +L001maw_non_sse2: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + xorl %esi,%esi + movl 20(%esp),%edi + movl 28(%esp),%ecx + movl 24(%esp),%ebx + andl $4294967288,%ecx + movl 32(%esp),%ebp + pushl %ecx + jz L006maw_finish +.align 4,0x90 +L007maw_loop: + # Round 0 + movl (%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl (%edi),%eax + adcl $0,%edx + movl %eax,(%edi) + movl %edx,%esi + # Round 4 + movl 4(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 4(%edi),%eax + adcl $0,%edx + movl %eax,4(%edi) + movl %edx,%esi + # Round 8 + movl 8(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 8(%edi),%eax + adcl $0,%edx + movl %eax,8(%edi) + movl %edx,%esi + # Round 12 + movl 12(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 12(%edi),%eax + adcl $0,%edx + movl %eax,12(%edi) + movl %edx,%esi + # Round 16 + movl 16(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 16(%edi),%eax + adcl $0,%edx + movl %eax,16(%edi) + movl %edx,%esi + # Round 20 + movl 20(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 20(%edi),%eax + adcl $0,%edx + movl %eax,20(%edi) + movl %edx,%esi + # Round 24 + movl 24(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 24(%edi),%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi + # Round 28 + movl 28(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 28(%edi),%eax + adcl $0,%edx + movl %eax,28(%edi) + movl %edx,%esi + + subl $8,%ecx + leal 32(%ebx),%ebx + leal 32(%edi),%edi + jnz L007maw_loop +L006maw_finish: + movl 32(%esp),%ecx + andl $7,%ecx + jnz L008maw_finish2 + jmp L009maw_end +L008maw_finish2: + # Tail Round 0 + movl (%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl (%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,(%edi) + movl %edx,%esi + jz L009maw_end + # Tail Round 1 + movl 4(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 4(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,4(%edi) + movl %edx,%esi + jz L009maw_end + # Tail Round 2 + movl 8(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 8(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,8(%edi) + movl %edx,%esi + jz L009maw_end + # Tail Round 3 + movl 12(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 12(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,12(%edi) + movl %edx,%esi + jz L009maw_end + # Tail Round 4 + movl 16(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 16(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,16(%edi) + movl %edx,%esi + jz L009maw_end + # Tail Round 5 + movl 20(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 20(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,20(%edi) + movl %edx,%esi + jz L009maw_end + # Tail Round 6 + movl 24(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 24(%edi),%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi +L009maw_end: + movl %esi,%eax + popl %ecx + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _bn_mul_words +.type _bn_mul_words,@function +.align 4 +_bn_mul_words: +L_bn_mul_words_begin: + call L010PIC_me_up +L010PIC_me_up: + popl %eax + leal __GLOBAL_OFFSET_TABLE_+[.-L010PIC_me_up](%eax),%eax + movl _OPENSSL_ia32cap_P@GOT(%eax),%eax + btl $26,(%eax) + jnc L011mw_non_sse2 + movl 4(%esp),%eax + movl 8(%esp),%edx + movl 12(%esp),%ecx + movd 16(%esp),%mm0 + pxor %mm1,%mm1 +.align 4,0x90 +L012mw_sse2_loop: + movd (%edx),%mm2 + pmuludq %mm0,%mm2 + leal 4(%edx),%edx + paddq %mm2,%mm1 + movd %mm1,(%eax) + subl $1,%ecx + psrlq $32,%mm1 + leal 4(%eax),%eax + jnz L012mw_sse2_loop + movd %mm1,%eax + emms + ret +.align 4,0x90 +L011mw_non_sse2: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + xorl %esi,%esi + movl 20(%esp),%edi + movl 24(%esp),%ebx + movl 28(%esp),%ebp + movl 32(%esp),%ecx + andl $4294967288,%ebp + jz L013mw_finish +L014mw_loop: + # Round 0 + movl (%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,(%edi) + movl %edx,%esi + # Round 4 + movl 4(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,4(%edi) + movl %edx,%esi + # Round 8 + movl 8(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,8(%edi) + movl %edx,%esi + # Round 12 + movl 12(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,12(%edi) + movl %edx,%esi + # Round 16 + movl 16(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,16(%edi) + movl %edx,%esi + # Round 20 + movl 20(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,20(%edi) + movl %edx,%esi + # Round 24 + movl 24(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi + # Round 28 + movl 28(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,28(%edi) + movl %edx,%esi + + addl $32,%ebx + addl $32,%edi + subl $8,%ebp + jz L013mw_finish + jmp L014mw_loop +L013mw_finish: + movl 28(%esp),%ebp + andl $7,%ebp + jnz L015mw_finish2 + jmp L016mw_end +L015mw_finish2: + # Tail Round 0 + movl (%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,(%edi) + movl %edx,%esi + decl %ebp + jz L016mw_end + # Tail Round 1 + movl 4(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,4(%edi) + movl %edx,%esi + decl %ebp + jz L016mw_end + # Tail Round 2 + movl 8(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,8(%edi) + movl %edx,%esi + decl %ebp + jz L016mw_end + # Tail Round 3 + movl 12(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,12(%edi) + movl %edx,%esi + decl %ebp + jz L016mw_end + # Tail Round 4 + movl 16(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,16(%edi) + movl %edx,%esi + decl %ebp + jz L016mw_end + # Tail Round 5 + movl 20(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,20(%edi) + movl %edx,%esi + decl %ebp + jz L016mw_end + # Tail Round 6 + movl 24(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi +L016mw_end: + movl %esi,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _bn_sqr_words +.type _bn_sqr_words,@function +.align 4 +_bn_sqr_words: +L_bn_sqr_words_begin: + call L017PIC_me_up +L017PIC_me_up: + popl %eax + leal __GLOBAL_OFFSET_TABLE_+[.-L017PIC_me_up](%eax),%eax + movl _OPENSSL_ia32cap_P@GOT(%eax),%eax + btl $26,(%eax) + jnc L018sqr_non_sse2 + movl 4(%esp),%eax + movl 8(%esp),%edx + movl 12(%esp),%ecx +.align 4,0x90 +L019sqr_sse2_loop: + movd (%edx),%mm0 + pmuludq %mm0,%mm0 + leal 4(%edx),%edx + movq %mm0,(%eax) + subl $1,%ecx + leal 8(%eax),%eax + jnz L019sqr_sse2_loop + emms + ret +.align 4,0x90 +L018sqr_non_sse2: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%ebx + andl $4294967288,%ebx + jz L020sw_finish +L021sw_loop: + # Round 0 + movl (%edi),%eax + mull %eax + movl %eax,(%esi) + movl %edx,4(%esi) + # Round 4 + movl 4(%edi),%eax + mull %eax + movl %eax,8(%esi) + movl %edx,12(%esi) + # Round 8 + movl 8(%edi),%eax + mull %eax + movl %eax,16(%esi) + movl %edx,20(%esi) + # Round 12 + movl 12(%edi),%eax + mull %eax + movl %eax,24(%esi) + movl %edx,28(%esi) + # Round 16 + movl 16(%edi),%eax + mull %eax + movl %eax,32(%esi) + movl %edx,36(%esi) + # Round 20 + movl 20(%edi),%eax + mull %eax + movl %eax,40(%esi) + movl %edx,44(%esi) + # Round 24 + movl 24(%edi),%eax + mull %eax + movl %eax,48(%esi) + movl %edx,52(%esi) + # Round 28 + movl 28(%edi),%eax + mull %eax + movl %eax,56(%esi) + movl %edx,60(%esi) + + addl $32,%edi + addl $64,%esi + subl $8,%ebx + jnz L021sw_loop +L020sw_finish: + movl 28(%esp),%ebx + andl $7,%ebx + jz L022sw_end + # Tail Round 0 + movl (%edi),%eax + mull %eax + movl %eax,(%esi) + decl %ebx + movl %edx,4(%esi) + jz L022sw_end + # Tail Round 1 + movl 4(%edi),%eax + mull %eax + movl %eax,8(%esi) + decl %ebx + movl %edx,12(%esi) + jz L022sw_end + # Tail Round 2 + movl 8(%edi),%eax + mull %eax + movl %eax,16(%esi) + decl %ebx + movl %edx,20(%esi) + jz L022sw_end + # Tail Round 3 + movl 12(%edi),%eax + mull %eax + movl %eax,24(%esi) + decl %ebx + movl %edx,28(%esi) + jz L022sw_end + # Tail Round 4 + movl 16(%edi),%eax + mull %eax + movl %eax,32(%esi) + decl %ebx + movl %edx,36(%esi) + jz L022sw_end + # Tail Round 5 + movl 20(%edi),%eax + mull %eax + movl %eax,40(%esi) + decl %ebx + movl %edx,44(%esi) + jz L022sw_end + # Tail Round 6 + movl 24(%edi),%eax + mull %eax + movl %eax,48(%esi) + movl %edx,52(%esi) +L022sw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _bn_div_words +.type _bn_div_words,@function +.align 4 +_bn_div_words: +L_bn_div_words_begin: + movl 4(%esp),%edx + movl 8(%esp),%eax + movl 12(%esp),%ecx + divl %ecx + ret +.globl _bn_add_words +.type _bn_add_words,@function +.align 4 +_bn_add_words: +L_bn_add_words_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%ebx + movl 24(%esp),%esi + movl 28(%esp),%edi + movl 32(%esp),%ebp + xorl %eax,%eax + andl $4294967288,%ebp + jz L023aw_finish +L024aw_loop: + # Round 0 + movl (%esi),%ecx + movl (%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + # Round 1 + movl 4(%esi),%ecx + movl 4(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + # Round 2 + movl 8(%esi),%ecx + movl 8(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + # Round 3 + movl 12(%esi),%ecx + movl 12(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + # Round 4 + movl 16(%esi),%ecx + movl 16(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + # Round 5 + movl 20(%esi),%ecx + movl 20(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + # Round 6 + movl 24(%esi),%ecx + movl 24(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + # Round 7 + movl 28(%esi),%ecx + movl 28(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%esi + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz L024aw_loop +L023aw_finish: + movl 32(%esp),%ebp + andl $7,%ebp + jz L025aw_end + # Tail Round 0 + movl (%esi),%ecx + movl (%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,(%ebx) + jz L025aw_end + # Tail Round 1 + movl 4(%esi),%ecx + movl 4(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,4(%ebx) + jz L025aw_end + # Tail Round 2 + movl 8(%esi),%ecx + movl 8(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,8(%ebx) + jz L025aw_end + # Tail Round 3 + movl 12(%esi),%ecx + movl 12(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,12(%ebx) + jz L025aw_end + # Tail Round 4 + movl 16(%esi),%ecx + movl 16(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,16(%ebx) + jz L025aw_end + # Tail Round 5 + movl 20(%esi),%ecx + movl 20(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,20(%ebx) + jz L025aw_end + # Tail Round 6 + movl 24(%esi),%ecx + movl 24(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) +L025aw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _bn_sub_words +.type _bn_sub_words,@function +.align 4 +_bn_sub_words: +L_bn_sub_words_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%ebx + movl 24(%esp),%esi + movl 28(%esp),%edi + movl 32(%esp),%ebp + xorl %eax,%eax + andl $4294967288,%ebp + jz L026aw_finish +L027aw_loop: + # Round 0 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + # Round 1 + movl 4(%esi),%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + # Round 2 + movl 8(%esi),%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + # Round 3 + movl 12(%esi),%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + # Round 4 + movl 16(%esi),%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + # Round 5 + movl 20(%esi),%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + # Round 6 + movl 24(%esi),%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + # Round 7 + movl 28(%esi),%ecx + movl 28(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%esi + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz L027aw_loop +L026aw_finish: + movl 32(%esp),%ebp + andl $7,%ebp + jz L028aw_end + # Tail Round 0 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,(%ebx) + jz L028aw_end + # Tail Round 1 + movl 4(%esi),%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,4(%ebx) + jz L028aw_end + # Tail Round 2 + movl 8(%esi),%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,8(%ebx) + jz L028aw_end + # Tail Round 3 + movl 12(%esi),%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,12(%ebx) + jz L028aw_end + # Tail Round 4 + movl 16(%esi),%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,16(%ebx) + jz L028aw_end + # Tail Round 5 + movl 20(%esi),%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,20(%ebx) + jz L028aw_end + # Tail Round 6 + movl 24(%esi),%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) +L028aw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _bn_sub_part_words +.type _bn_sub_part_words,@function +.align 4 +_bn_sub_part_words: +L_bn_sub_part_words_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%ebx + movl 24(%esp),%esi + movl 28(%esp),%edi + movl 32(%esp),%ebp + xorl %eax,%eax + andl $4294967288,%ebp + jz L029aw_finish +L030aw_loop: + # Round 0 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + # Round 1 + movl 4(%esi),%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + # Round 2 + movl 8(%esi),%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + # Round 3 + movl 12(%esi),%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + # Round 4 + movl 16(%esi),%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + # Round 5 + movl 20(%esi),%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + # Round 6 + movl 24(%esi),%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + # Round 7 + movl 28(%esi),%ecx + movl 28(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%esi + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz L030aw_loop +L029aw_finish: + movl 32(%esp),%ebp + andl $7,%ebp + jz L031aw_end + # Tail Round 0 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz L031aw_end + # Tail Round 1 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz L031aw_end + # Tail Round 2 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz L031aw_end + # Tail Round 3 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz L031aw_end + # Tail Round 4 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz L031aw_end + # Tail Round 5 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz L031aw_end + # Tail Round 6 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx +L031aw_end: + cmpl $0,36(%esp) + je L032pw_end + movl 36(%esp),%ebp + cmpl $0,%ebp + je L032pw_end + jge L033pw_pos + # pw_neg + movl $0,%edx + subl %ebp,%edx + movl %edx,%ebp + andl $4294967288,%ebp + jz L034pw_neg_finish +L035pw_neg_loop: + # dl<0 Round 0 + movl $0,%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + # dl<0 Round 1 + movl $0,%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + # dl<0 Round 2 + movl $0,%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + # dl<0 Round 3 + movl $0,%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + # dl<0 Round 4 + movl $0,%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + # dl<0 Round 5 + movl $0,%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + # dl<0 Round 6 + movl $0,%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + # dl<0 Round 7 + movl $0,%ecx + movl 28(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz L035pw_neg_loop +L034pw_neg_finish: + movl 36(%esp),%edx + movl $0,%ebp + subl %edx,%ebp + andl $7,%ebp + jz L032pw_end + # dl<0 Tail Round 0 + movl $0,%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,(%ebx) + jz L032pw_end + # dl<0 Tail Round 1 + movl $0,%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,4(%ebx) + jz L032pw_end + # dl<0 Tail Round 2 + movl $0,%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,8(%ebx) + jz L032pw_end + # dl<0 Tail Round 3 + movl $0,%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,12(%ebx) + jz L032pw_end + # dl<0 Tail Round 4 + movl $0,%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,16(%ebx) + jz L032pw_end + # dl<0 Tail Round 5 + movl $0,%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,20(%ebx) + jz L032pw_end + # dl<0 Tail Round 6 + movl $0,%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + jmp L032pw_end +L033pw_pos: + andl $4294967288,%ebp + jz L036pw_pos_finish +L037pw_pos_loop: + # dl>0 Round 0 + movl (%esi),%ecx + subl %eax,%ecx + movl %ecx,(%ebx) + jnc L038pw_nc0 + # dl>0 Round 1 + movl 4(%esi),%ecx + subl %eax,%ecx + movl %ecx,4(%ebx) + jnc L039pw_nc1 + # dl>0 Round 2 + movl 8(%esi),%ecx + subl %eax,%ecx + movl %ecx,8(%ebx) + jnc L040pw_nc2 + # dl>0 Round 3 + movl 12(%esi),%ecx + subl %eax,%ecx + movl %ecx,12(%ebx) + jnc L041pw_nc3 + # dl>0 Round 4 + movl 16(%esi),%ecx + subl %eax,%ecx + movl %ecx,16(%ebx) + jnc L042pw_nc4 + # dl>0 Round 5 + movl 20(%esi),%ecx + subl %eax,%ecx + movl %ecx,20(%ebx) + jnc L043pw_nc5 + # dl>0 Round 6 + movl 24(%esi),%ecx + subl %eax,%ecx + movl %ecx,24(%ebx) + jnc L044pw_nc6 + # dl>0 Round 7 + movl 28(%esi),%ecx + subl %eax,%ecx + movl %ecx,28(%ebx) + jnc L045pw_nc7 + + addl $32,%esi + addl $32,%ebx + subl $8,%ebp + jnz L037pw_pos_loop +L036pw_pos_finish: + movl 36(%esp),%ebp + andl $7,%ebp + jz L032pw_end + # dl>0 Tail Round 0 + movl (%esi),%ecx + subl %eax,%ecx + movl %ecx,(%ebx) + jnc L046pw_tail_nc0 + decl %ebp + jz L032pw_end + # dl>0 Tail Round 1 + movl 4(%esi),%ecx + subl %eax,%ecx + movl %ecx,4(%ebx) + jnc L047pw_tail_nc1 + decl %ebp + jz L032pw_end + # dl>0 Tail Round 2 + movl 8(%esi),%ecx + subl %eax,%ecx + movl %ecx,8(%ebx) + jnc L048pw_tail_nc2 + decl %ebp + jz L032pw_end + # dl>0 Tail Round 3 + movl 12(%esi),%ecx + subl %eax,%ecx + movl %ecx,12(%ebx) + jnc L049pw_tail_nc3 + decl %ebp + jz L032pw_end + # dl>0 Tail Round 4 + movl 16(%esi),%ecx + subl %eax,%ecx + movl %ecx,16(%ebx) + jnc L050pw_tail_nc4 + decl %ebp + jz L032pw_end + # dl>0 Tail Round 5 + movl 20(%esi),%ecx + subl %eax,%ecx + movl %ecx,20(%ebx) + jnc L051pw_tail_nc5 + decl %ebp + jz L032pw_end + # dl>0 Tail Round 6 + movl 24(%esi),%ecx + subl %eax,%ecx + movl %ecx,24(%ebx) + jnc L052pw_tail_nc6 + movl $1,%eax + jmp L032pw_end +L053pw_nc_loop: + movl (%esi),%ecx + movl %ecx,(%ebx) +L038pw_nc0: + movl 4(%esi),%ecx + movl %ecx,4(%ebx) +L039pw_nc1: + movl 8(%esi),%ecx + movl %ecx,8(%ebx) +L040pw_nc2: + movl 12(%esi),%ecx + movl %ecx,12(%ebx) +L041pw_nc3: + movl 16(%esi),%ecx + movl %ecx,16(%ebx) +L042pw_nc4: + movl 20(%esi),%ecx + movl %ecx,20(%ebx) +L043pw_nc5: + movl 24(%esi),%ecx + movl %ecx,24(%ebx) +L044pw_nc6: + movl 28(%esi),%ecx + movl %ecx,28(%ebx) +L045pw_nc7: + + addl $32,%esi + addl $32,%ebx + subl $8,%ebp + jnz L053pw_nc_loop + movl 36(%esp),%ebp + andl $7,%ebp + jz L054pw_nc_end + movl (%esi),%ecx + movl %ecx,(%ebx) +L046pw_tail_nc0: + decl %ebp + jz L054pw_nc_end + movl 4(%esi),%ecx + movl %ecx,4(%ebx) +L047pw_tail_nc1: + decl %ebp + jz L054pw_nc_end + movl 8(%esi),%ecx + movl %ecx,8(%ebx) +L048pw_tail_nc2: + decl %ebp + jz L054pw_nc_end + movl 12(%esi),%ecx + movl %ecx,12(%ebx) +L049pw_tail_nc3: + decl %ebp + jz L054pw_nc_end + movl 16(%esi),%ecx + movl %ecx,16(%ebx) +L050pw_tail_nc4: + decl %ebp + jz L054pw_nc_end + movl 20(%esi),%ecx + movl %ecx,20(%ebx) +L051pw_tail_nc5: + decl %ebp + jz L054pw_nc_end + movl 24(%esi),%ecx + movl %ecx,24(%ebx) +L052pw_tail_nc6: +L054pw_nc_end: + movl $0,%eax +L032pw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.comm _OPENSSL_ia32cap_P,16 diff --git a/deps/openssl/config/archs/BSD-x86/asm/crypto/bn/co-586.s b/deps/openssl/config/archs/BSD-x86/asm/crypto/bn/co-586.s new file mode 100644 index 0000000000..0196f42b78 --- /dev/null +++ b/deps/openssl/config/archs/BSD-x86/asm/crypto/bn/co-586.s @@ -0,0 +1,1249 @@ +.text +.globl _bn_mul_comba8 +.type _bn_mul_comba8,@function +.align 4 +_bn_mul_comba8: +L_bn_mul_comba8_begin: + pushl %esi + movl 12(%esp),%esi + pushl %edi + movl 20(%esp),%edi + pushl %ebp + pushl %ebx + xorl %ebx,%ebx + movl (%esi),%eax + xorl %ecx,%ecx + movl (%edi),%edx + # ################## Calculate word 0 + xorl %ebp,%ebp + # mul a[0]*b[0] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,(%eax) + movl 4(%esi),%eax + # saved r[0] + # ################## Calculate word 1 + xorl %ebx,%ebx + # mul a[1]*b[0] + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + # mul a[0]*b[1] + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl (%edi),%edx + adcl $0,%ebx + movl %ecx,4(%eax) + movl 8(%esi),%eax + # saved r[1] + # ################## Calculate word 2 + xorl %ecx,%ecx + # mul a[2]*b[0] + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 4(%edi),%edx + adcl $0,%ecx + # mul a[1]*b[1] + mull %edx + addl %eax,%ebp + movl (%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + # mul a[0]*b[2] + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl (%edi),%edx + adcl $0,%ecx + movl %ebp,8(%eax) + movl 12(%esi),%eax + # saved r[2] + # ################## Calculate word 3 + xorl %ebp,%ebp + # mul a[3]*b[0] + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + # mul a[2]*b[1] + mull %edx + addl %eax,%ebx + movl 4(%esi),%eax + adcl %edx,%ecx + movl 8(%edi),%edx + adcl $0,%ebp + # mul a[1]*b[2] + mull %edx + addl %eax,%ebx + movl (%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + # mul a[0]*b[3] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,12(%eax) + movl 16(%esi),%eax + # saved r[3] + # ################## Calculate word 4 + xorl %ebx,%ebx + # mul a[4]*b[0] + mull %edx + addl %eax,%ecx + movl 12(%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + # mul a[3]*b[1] + mull %edx + addl %eax,%ecx + movl 8(%esi),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + # mul a[2]*b[2] + mull %edx + addl %eax,%ecx + movl 4(%esi),%eax + adcl %edx,%ebp + movl 12(%edi),%edx + adcl $0,%ebx + # mul a[1]*b[3] + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + # mul a[0]*b[4] + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl (%edi),%edx + adcl $0,%ebx + movl %ecx,16(%eax) + movl 20(%esi),%eax + # saved r[4] + # ################## Calculate word 5 + xorl %ecx,%ecx + # mul a[5]*b[0] + mull %edx + addl %eax,%ebp + movl 16(%esi),%eax + adcl %edx,%ebx + movl 4(%edi),%edx + adcl $0,%ecx + # mul a[4]*b[1] + mull %edx + addl %eax,%ebp + movl 12(%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + # mul a[3]*b[2] + mull %edx + addl %eax,%ebp + movl 8(%esi),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + # mul a[2]*b[3] + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 16(%edi),%edx + adcl $0,%ecx + # mul a[1]*b[4] + mull %edx + addl %eax,%ebp + movl (%esi),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + # mul a[0]*b[5] + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl (%edi),%edx + adcl $0,%ecx + movl %ebp,20(%eax) + movl 24(%esi),%eax + # saved r[5] + # ################## Calculate word 6 + xorl %ebp,%ebp + # mul a[6]*b[0] + mull %edx + addl %eax,%ebx + movl 20(%esi),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + # mul a[5]*b[1] + mull %edx + addl %eax,%ebx + movl 16(%esi),%eax + adcl %edx,%ecx + movl 8(%edi),%edx + adcl $0,%ebp + # mul a[4]*b[2] + mull %edx + addl %eax,%ebx + movl 12(%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + # mul a[3]*b[3] + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 16(%edi),%edx + adcl $0,%ebp + # mul a[2]*b[4] + mull %edx + addl %eax,%ebx + movl 4(%esi),%eax + adcl %edx,%ecx + movl 20(%edi),%edx + adcl $0,%ebp + # mul a[1]*b[5] + mull %edx + addl %eax,%ebx + movl (%esi),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + # mul a[0]*b[6] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,24(%eax) + movl 28(%esi),%eax + # saved r[6] + # ################## Calculate word 7 + xorl %ebx,%ebx + # mul a[7]*b[0] + mull %edx + addl %eax,%ecx + movl 24(%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + # mul a[6]*b[1] + mull %edx + addl %eax,%ecx + movl 20(%esi),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + # mul a[5]*b[2] + mull %edx + addl %eax,%ecx + movl 16(%esi),%eax + adcl %edx,%ebp + movl 12(%edi),%edx + adcl $0,%ebx + # mul a[4]*b[3] + mull %edx + addl %eax,%ecx + movl 12(%esi),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + # mul a[3]*b[4] + mull %edx + addl %eax,%ecx + movl 8(%esi),%eax + adcl %edx,%ebp + movl 20(%edi),%edx + adcl $0,%ebx + # mul a[2]*b[5] + mull %edx + addl %eax,%ecx + movl 4(%esi),%eax + adcl %edx,%ebp + movl 24(%edi),%edx + adcl $0,%ebx + # mul a[1]*b[6] + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + # mul a[0]*b[7] + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + movl %ecx,28(%eax) + movl 28(%esi),%eax + # saved r[7] + # ################## Calculate word 8 + xorl %ecx,%ecx + # mul a[7]*b[1] + mull %edx + addl %eax,%ebp + movl 24(%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + # mul a[6]*b[2] + mull %edx + addl %eax,%ebp + movl 20(%esi),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + # mul a[5]*b[3] + mull %edx + addl %eax,%ebp + movl 16(%esi),%eax + adcl %edx,%ebx + movl 16(%edi),%edx + adcl $0,%ecx + # mul a[4]*b[4] + mull %edx + addl %eax,%ebp + movl 12(%esi),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + # mul a[3]*b[5] + mull %edx + addl %eax,%ebp + movl 8(%esi),%eax + adcl %edx,%ebx + movl 24(%edi),%edx + adcl $0,%ecx + # mul a[2]*b[6] + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 28(%edi),%edx + adcl $0,%ecx + # mul a[1]*b[7] + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + movl %ebp,32(%eax) + movl 28(%esi),%eax + # saved r[8] + # ################## Calculate word 9 + xorl %ebp,%ebp + # mul a[7]*b[2] + mull %edx + addl %eax,%ebx + movl 24(%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + # mul a[6]*b[3] + mull %edx + addl %eax,%ebx + movl 20(%esi),%eax + adcl %edx,%ecx + movl 16(%edi),%edx + adcl $0,%ebp + # mul a[5]*b[4] + mull %edx + addl %eax,%ebx + movl 16(%esi),%eax + adcl %edx,%ecx + movl 20(%edi),%edx + adcl $0,%ebp + # mul a[4]*b[5] + mull %edx + addl %eax,%ebx + movl 12(%esi),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + # mul a[3]*b[6] + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 28(%edi),%edx + adcl $0,%ebp + # mul a[2]*b[7] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + movl %ebx,36(%eax) + movl 28(%esi),%eax + # saved r[9] + # ################## Calculate word 10 + xorl %ebx,%ebx + # mul a[7]*b[3] + mull %edx + addl %eax,%ecx + movl 24(%esi),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + # mul a[6]*b[4] + mull %edx + addl %eax,%ecx + movl 20(%esi),%eax + adcl %edx,%ebp + movl 20(%edi),%edx + adcl $0,%ebx + # mul a[5]*b[5] + mull %edx + addl %eax,%ecx + movl 16(%esi),%eax + adcl %edx,%ebp + movl 24(%edi),%edx + adcl $0,%ebx + # mul a[4]*b[6] + mull %edx + addl %eax,%ecx + movl 12(%esi),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + # mul a[3]*b[7] + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + movl %ecx,40(%eax) + movl 28(%esi),%eax + # saved r[10] + # ################## Calculate word 11 + xorl %ecx,%ecx + # mul a[7]*b[4] + mull %edx + addl %eax,%ebp + movl 24(%esi),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + # mul a[6]*b[5] + mull %edx + addl %eax,%ebp + movl 20(%esi),%eax + adcl %edx,%ebx + movl 24(%edi),%edx + adcl $0,%ecx + # mul a[5]*b[6] + mull %edx + addl %eax,%ebp + movl 16(%esi),%eax + adcl %edx,%ebx + movl 28(%edi),%edx + adcl $0,%ecx + # mul a[4]*b[7] + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + movl %ebp,44(%eax) + movl 28(%esi),%eax + # saved r[11] + # ################## Calculate word 12 + xorl %ebp,%ebp + # mul a[7]*b[5] + mull %edx + addl %eax,%ebx + movl 24(%esi),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + # mul a[6]*b[6] + mull %edx + addl %eax,%ebx + movl 20(%esi),%eax + adcl %edx,%ecx + movl 28(%edi),%edx + adcl $0,%ebp + # mul a[5]*b[7] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + movl %ebx,48(%eax) + movl 28(%esi),%eax + # saved r[12] + # ################## Calculate word 13 + xorl %ebx,%ebx + # mul a[7]*b[6] + mull %edx + addl %eax,%ecx + movl 24(%esi),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + # mul a[6]*b[7] + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + movl %ecx,52(%eax) + movl 28(%esi),%eax + # saved r[13] + # ################## Calculate word 14 + xorl %ecx,%ecx + # mul a[7]*b[7] + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + adcl $0,%ecx + movl %ebp,56(%eax) + # saved r[14] + # save r[15] + movl %ebx,60(%eax) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.globl _bn_mul_comba4 +.type _bn_mul_comba4,@function +.align 4 +_bn_mul_comba4: +L_bn_mul_comba4_begin: + pushl %esi + movl 12(%esp),%esi + pushl %edi + movl 20(%esp),%edi + pushl %ebp + pushl %ebx + xorl %ebx,%ebx + movl (%esi),%eax + xorl %ecx,%ecx + movl (%edi),%edx + # ################## Calculate word 0 + xorl %ebp,%ebp + # mul a[0]*b[0] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,(%eax) + movl 4(%esi),%eax + # saved r[0] + # ################## Calculate word 1 + xorl %ebx,%ebx + # mul a[1]*b[0] + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + # mul a[0]*b[1] + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl (%edi),%edx + adcl $0,%ebx + movl %ecx,4(%eax) + movl 8(%esi),%eax + # saved r[1] + # ################## Calculate word 2 + xorl %ecx,%ecx + # mul a[2]*b[0] + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 4(%edi),%edx + adcl $0,%ecx + # mul a[1]*b[1] + mull %edx + addl %eax,%ebp + movl (%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + # mul a[0]*b[2] + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl (%edi),%edx + adcl $0,%ecx + movl %ebp,8(%eax) + movl 12(%esi),%eax + # saved r[2] + # ################## Calculate word 3 + xorl %ebp,%ebp + # mul a[3]*b[0] + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + # mul a[2]*b[1] + mull %edx + addl %eax,%ebx + movl 4(%esi),%eax + adcl %edx,%ecx + movl 8(%edi),%edx + adcl $0,%ebp + # mul a[1]*b[2] + mull %edx + addl %eax,%ebx + movl (%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + # mul a[0]*b[3] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + movl %ebx,12(%eax) + movl 12(%esi),%eax + # saved r[3] + # ################## Calculate word 4 + xorl %ebx,%ebx + # mul a[3]*b[1] + mull %edx + addl %eax,%ecx + movl 8(%esi),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + # mul a[2]*b[2] + mull %edx + addl %eax,%ecx + movl 4(%esi),%eax + adcl %edx,%ebp + movl 12(%edi),%edx + adcl $0,%ebx + # mul a[1]*b[3] + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + movl %ecx,16(%eax) + movl 12(%esi),%eax + # saved r[4] + # ################## Calculate word 5 + xorl %ecx,%ecx + # mul a[3]*b[2] + mull %edx + addl %eax,%ebp + movl 8(%esi),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + # mul a[2]*b[3] + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + movl %ebp,20(%eax) + movl 12(%esi),%eax + # saved r[5] + # ################## Calculate word 6 + xorl %ebp,%ebp + # mul a[3]*b[3] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + adcl $0,%ebp + movl %ebx,24(%eax) + # saved r[6] + # save r[7] + movl %ecx,28(%eax) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.globl _bn_sqr_comba8 +.type _bn_sqr_comba8,@function +.align 4 +_bn_sqr_comba8: +L_bn_sqr_comba8_begin: + pushl %esi + pushl %edi + pushl %ebp + pushl %ebx + movl 20(%esp),%edi + movl 24(%esp),%esi + xorl %ebx,%ebx + xorl %ecx,%ecx + movl (%esi),%eax + # ############### Calculate word 0 + xorl %ebp,%ebp + # sqr a[0]*a[0] + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl (%esi),%edx + adcl $0,%ebp + movl %ebx,(%edi) + movl 4(%esi),%eax + # saved r[0] + # ############### Calculate word 1 + xorl %ebx,%ebx + # sqr a[1]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + movl %ecx,4(%edi) + movl (%esi),%edx + # saved r[1] + # ############### Calculate word 2 + xorl %ecx,%ecx + # sqr a[2]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 4(%esi),%eax + adcl $0,%ecx + # sqr a[1]*a[1] + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + movl (%esi),%edx + adcl $0,%ecx + movl %ebp,8(%edi) + movl 12(%esi),%eax + # saved r[2] + # ############### Calculate word 3 + xorl %ebp,%ebp + # sqr a[3]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 8(%esi),%eax + adcl $0,%ebp + movl 4(%esi),%edx + # sqr a[2]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 16(%esi),%eax + adcl $0,%ebp + movl %ebx,12(%edi) + movl (%esi),%edx + # saved r[3] + # ############### Calculate word 4 + xorl %ebx,%ebx + # sqr a[4]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 12(%esi),%eax + adcl $0,%ebx + movl 4(%esi),%edx + # sqr a[3]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + # sqr a[2]*a[2] + mull %eax + addl %eax,%ecx + adcl %edx,%ebp + movl (%esi),%edx + adcl $0,%ebx + movl %ecx,16(%edi) + movl 20(%esi),%eax + # saved r[4] + # ############### Calculate word 5 + xorl %ecx,%ecx + # sqr a[5]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 16(%esi),%eax + adcl $0,%ecx + movl 4(%esi),%edx + # sqr a[4]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 12(%esi),%eax + adcl $0,%ecx + movl 8(%esi),%edx + # sqr a[3]*a[2] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 24(%esi),%eax + adcl $0,%ecx + movl %ebp,20(%edi) + movl (%esi),%edx + # saved r[5] + # ############### Calculate word 6 + xorl %ebp,%ebp + # sqr a[6]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 20(%esi),%eax + adcl $0,%ebp + movl 4(%esi),%edx + # sqr a[5]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 16(%esi),%eax + adcl $0,%ebp + movl 8(%esi),%edx + # sqr a[4]*a[2] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 12(%esi),%eax + adcl $0,%ebp + # sqr a[3]*a[3] + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl (%esi),%edx + adcl $0,%ebp + movl %ebx,24(%edi) + movl 28(%esi),%eax + # saved r[6] + # ############### Calculate word 7 + xorl %ebx,%ebx + # sqr a[7]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 24(%esi),%eax + adcl $0,%ebx + movl 4(%esi),%edx + # sqr a[6]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 20(%esi),%eax + adcl $0,%ebx + movl 8(%esi),%edx + # sqr a[5]*a[2] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 16(%esi),%eax + adcl $0,%ebx + movl 12(%esi),%edx + # sqr a[4]*a[3] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 28(%esi),%eax + adcl $0,%ebx + movl %ecx,28(%edi) + movl 4(%esi),%edx + # saved r[7] + # ############### Calculate word 8 + xorl %ecx,%ecx + # sqr a[7]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 24(%esi),%eax + adcl $0,%ecx + movl 8(%esi),%edx + # sqr a[6]*a[2] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 20(%esi),%eax + adcl $0,%ecx + movl 12(%esi),%edx + # sqr a[5]*a[3] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 16(%esi),%eax + adcl $0,%ecx + # sqr a[4]*a[4] + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + movl 8(%esi),%edx + adcl $0,%ecx + movl %ebp,32(%edi) + movl 28(%esi),%eax + # saved r[8] + # ############### Calculate word 9 + xorl %ebp,%ebp + # sqr a[7]*a[2] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 24(%esi),%eax + adcl $0,%ebp + movl 12(%esi),%edx + # sqr a[6]*a[3] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 20(%esi),%eax + adcl $0,%ebp + movl 16(%esi),%edx + # sqr a[5]*a[4] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 28(%esi),%eax + adcl $0,%ebp + movl %ebx,36(%edi) + movl 12(%esi),%edx + # saved r[9] + # ############### Calculate word 10 + xorl %ebx,%ebx + # sqr a[7]*a[3] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 24(%esi),%eax + adcl $0,%ebx + movl 16(%esi),%edx + # sqr a[6]*a[4] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 20(%esi),%eax + adcl $0,%ebx + # sqr a[5]*a[5] + mull %eax + addl %eax,%ecx + adcl %edx,%ebp + movl 16(%esi),%edx + adcl $0,%ebx + movl %ecx,40(%edi) + movl 28(%esi),%eax + # saved r[10] + # ############### Calculate word 11 + xorl %ecx,%ecx + # sqr a[7]*a[4] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 24(%esi),%eax + adcl $0,%ecx + movl 20(%esi),%edx + # sqr a[6]*a[5] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 28(%esi),%eax + adcl $0,%ecx + movl %ebp,44(%edi) + movl 20(%esi),%edx + # saved r[11] + # ############### Calculate word 12 + xorl %ebp,%ebp + # sqr a[7]*a[5] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 24(%esi),%eax + adcl $0,%ebp + # sqr a[6]*a[6] + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl 24(%esi),%edx + adcl $0,%ebp + movl %ebx,48(%edi) + movl 28(%esi),%eax + # saved r[12] + # ############### Calculate word 13 + xorl %ebx,%ebx + # sqr a[7]*a[6] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 28(%esi),%eax + adcl $0,%ebx + movl %ecx,52(%edi) + # saved r[13] + # ############### Calculate word 14 + xorl %ecx,%ecx + # sqr a[7]*a[7] + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + adcl $0,%ecx + movl %ebp,56(%edi) + # saved r[14] + movl %ebx,60(%edi) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.globl _bn_sqr_comba4 +.type _bn_sqr_comba4,@function +.align 4 +_bn_sqr_comba4: +L_bn_sqr_comba4_begin: + pushl %esi + pushl %edi + pushl %ebp + pushl %ebx + movl 20(%esp),%edi + movl 24(%esp),%esi + xorl %ebx,%ebx + xorl %ecx,%ecx + movl (%esi),%eax + # ############### Calculate word 0 + xorl %ebp,%ebp + # sqr a[0]*a[0] + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl (%esi),%edx + adcl $0,%ebp + movl %ebx,(%edi) + movl 4(%esi),%eax + # saved r[0] + # ############### Calculate word 1 + xorl %ebx,%ebx + # sqr a[1]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + movl %ecx,4(%edi) + movl (%esi),%edx + # saved r[1] + # ############### Calculate word 2 + xorl %ecx,%ecx + # sqr a[2]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 4(%esi),%eax + adcl $0,%ecx + # sqr a[1]*a[1] + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + movl (%esi),%edx + adcl $0,%ecx + movl %ebp,8(%edi) + movl 12(%esi),%eax + # saved r[2] + # ############### Calculate word 3 + xorl %ebp,%ebp + # sqr a[3]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 8(%esi),%eax + adcl $0,%ebp + movl 4(%esi),%edx + # sqr a[2]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 12(%esi),%eax + adcl $0,%ebp + movl %ebx,12(%edi) + movl 4(%esi),%edx + # saved r[3] + # ############### Calculate word 4 + xorl %ebx,%ebx + # sqr a[3]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + # sqr a[2]*a[2] + mull %eax + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%edx + adcl $0,%ebx + movl %ecx,16(%edi) + movl 12(%esi),%eax + # saved r[4] + # ############### Calculate word 5 + xorl %ecx,%ecx + # sqr a[3]*a[2] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 12(%esi),%eax + adcl $0,%ecx + movl %ebp,20(%edi) + # saved r[5] + # ############### Calculate word 6 + xorl %ebp,%ebp + # sqr a[3]*a[3] + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + adcl $0,%ebp + movl %ebx,24(%edi) + # saved r[6] + movl %ecx,28(%edi) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret diff --git a/deps/openssl/config/archs/BSD-x86/asm/crypto/bn/x86-gf2m.s b/deps/openssl/config/archs/BSD-x86/asm/crypto/bn/x86-gf2m.s new file mode 100644 index 0000000000..0999445f43 --- /dev/null +++ b/deps/openssl/config/archs/BSD-x86/asm/crypto/bn/x86-gf2m.s @@ -0,0 +1,343 @@ +.text +.type __mul_1x1_mmx,@function +.align 4 +__mul_1x1_mmx: + subl $36,%esp + movl %eax,%ecx + leal (%eax,%eax,1),%edx + andl $1073741823,%ecx + leal (%edx,%edx,1),%ebp + movl $0,(%esp) + andl $2147483647,%edx + movd %eax,%mm2 + movd %ebx,%mm3 + movl %ecx,4(%esp) + xorl %edx,%ecx + pxor %mm5,%mm5 + pxor %mm4,%mm4 + movl %edx,8(%esp) + xorl %ebp,%edx + movl %ecx,12(%esp) + pcmpgtd %mm2,%mm5 + paddd %mm2,%mm2 + xorl %edx,%ecx + movl %ebp,16(%esp) + xorl %edx,%ebp + pand %mm3,%mm5 + pcmpgtd %mm2,%mm4 + movl %ecx,20(%esp) + xorl %ecx,%ebp + psllq $31,%mm5 + pand %mm3,%mm4 + movl %edx,24(%esp) + movl $7,%esi + movl %ebp,28(%esp) + movl %esi,%ebp + andl %ebx,%esi + shrl $3,%ebx + movl %ebp,%edi + psllq $30,%mm4 + andl %ebx,%edi + shrl $3,%ebx + movd (%esp,%esi,4),%mm0 + movl %ebp,%esi + andl %ebx,%esi + shrl $3,%ebx + movd (%esp,%edi,4),%mm2 + movl %ebp,%edi + psllq $3,%mm2 + andl %ebx,%edi + shrl $3,%ebx + pxor %mm2,%mm0 + movd (%esp,%esi,4),%mm1 + movl %ebp,%esi + psllq $6,%mm1 + andl %ebx,%esi + shrl $3,%ebx + pxor %mm1,%mm0 + movd (%esp,%edi,4),%mm2 + movl %ebp,%edi + psllq $9,%mm2 + andl %ebx,%edi + shrl $3,%ebx + pxor %mm2,%mm0 + movd (%esp,%esi,4),%mm1 + movl %ebp,%esi + psllq $12,%mm1 + andl %ebx,%esi + shrl $3,%ebx + pxor %mm1,%mm0 + movd (%esp,%edi,4),%mm2 + movl %ebp,%edi + psllq $15,%mm2 + andl %ebx,%edi + shrl $3,%ebx + pxor %mm2,%mm0 + movd (%esp,%esi,4),%mm1 + movl %ebp,%esi + psllq $18,%mm1 + andl %ebx,%esi + shrl $3,%ebx + pxor %mm1,%mm0 + movd (%esp,%edi,4),%mm2 + movl %ebp,%edi + psllq $21,%mm2 + andl %ebx,%edi + shrl $3,%ebx + pxor %mm2,%mm0 + movd (%esp,%esi,4),%mm1 + movl %ebp,%esi + psllq $24,%mm1 + andl %ebx,%esi + shrl $3,%ebx + pxor %mm1,%mm0 + movd (%esp,%edi,4),%mm2 + pxor %mm4,%mm0 + psllq $27,%mm2 + pxor %mm2,%mm0 + movd (%esp,%esi,4),%mm1 + pxor %mm5,%mm0 + psllq $30,%mm1 + addl $36,%esp + pxor %mm1,%mm0 + ret +.type __mul_1x1_ialu,@function +.align 4 +__mul_1x1_ialu: + subl $36,%esp + movl %eax,%ecx + leal (%eax,%eax,1),%edx + leal (,%eax,4),%ebp + andl $1073741823,%ecx + leal (%eax,%eax,1),%edi + sarl $31,%eax + movl $0,(%esp) + andl $2147483647,%edx + movl %ecx,4(%esp) + xorl %edx,%ecx + movl %edx,8(%esp) + xorl %ebp,%edx + movl %ecx,12(%esp) + xorl %edx,%ecx + movl %ebp,16(%esp) + xorl %edx,%ebp + movl %ecx,20(%esp) + xorl %ecx,%ebp + sarl $31,%edi + andl %ebx,%eax + movl %edx,24(%esp) + andl %ebx,%edi + movl %ebp,28(%esp) + movl %eax,%edx + shll $31,%eax + movl %edi,%ecx + shrl $1,%edx + movl $7,%esi + shll $30,%edi + andl %ebx,%esi + shrl $2,%ecx + xorl %edi,%eax + shrl $3,%ebx + movl $7,%edi + andl %ebx,%edi + shrl $3,%ebx + xorl %ecx,%edx + xorl (%esp,%esi,4),%eax + movl $7,%esi + andl %ebx,%esi + shrl $3,%ebx + movl (%esp,%edi,4),%ebp + movl $7,%edi + movl %ebp,%ecx + shll $3,%ebp + andl %ebx,%edi + shrl $29,%ecx + xorl %ebp,%eax + shrl $3,%ebx + xorl %ecx,%edx + movl (%esp,%esi,4),%ecx + movl $7,%esi + movl %ecx,%ebp + shll $6,%ecx + andl %ebx,%esi + shrl $26,%ebp + xorl %ecx,%eax + shrl $3,%ebx + xorl %ebp,%edx + movl (%esp,%edi,4),%ebp + movl $7,%edi + movl %ebp,%ecx + shll $9,%ebp + andl %ebx,%edi + shrl $23,%ecx + xorl %ebp,%eax + shrl $3,%ebx + xorl %ecx,%edx + movl (%esp,%esi,4),%ecx + movl $7,%esi + movl %ecx,%ebp + shll $12,%ecx + andl %ebx,%esi + shrl $20,%ebp + xorl %ecx,%eax + shrl $3,%ebx + xorl %ebp,%edx + movl (%esp,%edi,4),%ebp + movl $7,%edi + movl %ebp,%ecx + shll $15,%ebp + andl %ebx,%edi + shrl $17,%ecx + xorl %ebp,%eax + shrl $3,%ebx + xorl %ecx,%edx + movl (%esp,%esi,4),%ecx + movl $7,%esi + movl %ecx,%ebp + shll $18,%ecx + andl %ebx,%esi + shrl $14,%ebp + xorl %ecx,%eax + shrl $3,%ebx + xorl %ebp,%edx + movl (%esp,%edi,4),%ebp + movl $7,%edi + movl %ebp,%ecx + shll $21,%ebp + andl %ebx,%edi + shrl $11,%ecx + xorl %ebp,%eax + shrl $3,%ebx + xorl %ecx,%edx + movl (%esp,%esi,4),%ecx + movl $7,%esi + movl %ecx,%ebp + shll $24,%ecx + andl %ebx,%esi + shrl $8,%ebp + xorl %ecx,%eax + shrl $3,%ebx + xorl %ebp,%edx + movl (%esp,%edi,4),%ebp + movl %ebp,%ecx + shll $27,%ebp + movl (%esp,%esi,4),%edi + shrl $5,%ecx + movl %edi,%esi + xorl %ebp,%eax + shll $30,%edi + xorl %ecx,%edx + shrl $2,%esi + xorl %edi,%eax + xorl %esi,%edx + addl $36,%esp + ret +.globl _bn_GF2m_mul_2x2 +.type _bn_GF2m_mul_2x2,@function +.align 4 +_bn_GF2m_mul_2x2: +L_bn_GF2m_mul_2x2_begin: + call L000PIC_me_up +L000PIC_me_up: + popl %edx + leal __GLOBAL_OFFSET_TABLE_+[.-L000PIC_me_up](%edx),%edx + movl _OPENSSL_ia32cap_P@GOT(%edx),%edx + movl (%edx),%eax + movl 4(%edx),%edx + testl $8388608,%eax + jz L001ialu + testl $16777216,%eax + jz L002mmx + testl $2,%edx + jz L002mmx + movups 8(%esp),%xmm0 + shufps $177,%xmm0,%xmm0 +.byte 102,15,58,68,192,1 + movl 4(%esp),%eax + movups %xmm0,(%eax) + ret +.align 4,0x90 +L002mmx: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 24(%esp),%eax + movl 32(%esp),%ebx + call __mul_1x1_mmx + movq %mm0,%mm7 + movl 28(%esp),%eax + movl 36(%esp),%ebx + call __mul_1x1_mmx + movq %mm0,%mm6 + movl 24(%esp),%eax + movl 32(%esp),%ebx + xorl 28(%esp),%eax + xorl 36(%esp),%ebx + call __mul_1x1_mmx + pxor %mm7,%mm0 + movl 20(%esp),%eax + pxor %mm6,%mm0 + movq %mm0,%mm2 + psllq $32,%mm0 + popl %edi + psrlq $32,%mm2 + popl %esi + pxor %mm6,%mm0 + popl %ebx + pxor %mm7,%mm2 + movq %mm0,(%eax) + popl %ebp + movq %mm2,8(%eax) + emms + ret +.align 4,0x90 +L001ialu: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + subl $20,%esp + movl 44(%esp),%eax + movl 52(%esp),%ebx + call __mul_1x1_ialu + movl %eax,8(%esp) + movl %edx,12(%esp) + movl 48(%esp),%eax + movl 56(%esp),%ebx + call __mul_1x1_ialu + movl %eax,(%esp) + movl %edx,4(%esp) + movl 44(%esp),%eax + movl 52(%esp),%ebx + xorl 48(%esp),%eax + xorl 56(%esp),%ebx + call __mul_1x1_ialu + movl 40(%esp),%ebp + movl (%esp),%ebx + movl 4(%esp),%ecx + movl 8(%esp),%edi + movl 12(%esp),%esi + xorl %edx,%eax + xorl %ecx,%edx + xorl %ebx,%eax + movl %ebx,(%ebp) + xorl %edi,%edx + movl %esi,12(%ebp) + xorl %esi,%eax + addl $20,%esp + xorl %esi,%edx + popl %edi + xorl %edx,%eax + popl %esi + movl %edx,8(%ebp) + popl %ebx + movl %eax,4(%ebp) + popl %ebp + ret +.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105 +.byte 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32 +.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +.byte 62,0 +.comm _OPENSSL_ia32cap_P,16 diff --git a/deps/openssl/config/archs/BSD-x86/asm/crypto/bn/x86-mont.s b/deps/openssl/config/archs/BSD-x86/asm/crypto/bn/x86-mont.s new file mode 100644 index 0000000000..2612cf541b --- /dev/null +++ b/deps/openssl/config/archs/BSD-x86/asm/crypto/bn/x86-mont.s @@ -0,0 +1,475 @@ +.text +.globl _bn_mul_mont +.type _bn_mul_mont,@function +.align 4 +_bn_mul_mont: +L_bn_mul_mont_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + xorl %eax,%eax + movl 40(%esp),%edi + cmpl $4,%edi + jl L000just_leave + leal 20(%esp),%esi + leal 24(%esp),%edx + addl $2,%edi + negl %edi + leal -32(%esp,%edi,4),%ebp + negl %edi + movl %ebp,%eax + subl %edx,%eax + andl $2047,%eax + subl %eax,%ebp + xorl %ebp,%edx + andl $2048,%edx + xorl $2048,%edx + subl %edx,%ebp + andl $-64,%ebp + movl %esp,%eax + subl %ebp,%eax + andl $-4096,%eax + movl %esp,%edx + leal (%ebp,%eax,1),%esp + movl (%esp),%eax + cmpl %ebp,%esp + ja L001page_walk + jmp L002page_walk_done +.align 4,0x90 +L001page_walk: + leal -4096(%esp),%esp + movl (%esp),%eax + cmpl %ebp,%esp + ja L001page_walk +L002page_walk_done: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%ebp + movl 16(%esi),%esi + movl (%esi),%esi + movl %eax,4(%esp) + movl %ebx,8(%esp) + movl %ecx,12(%esp) + movl %ebp,16(%esp) + movl %esi,20(%esp) + leal -3(%edi),%ebx + movl %edx,24(%esp) + call L003PIC_me_up +L003PIC_me_up: + popl %eax + leal __GLOBAL_OFFSET_TABLE_+[.-L003PIC_me_up](%eax),%eax + movl _OPENSSL_ia32cap_P@GOT(%eax),%eax + btl $26,(%eax) + jnc L004non_sse2 + movl $-1,%eax + movd %eax,%mm7 + movl 8(%esp),%esi + movl 12(%esp),%edi + movl 16(%esp),%ebp + xorl %edx,%edx + xorl %ecx,%ecx + movd (%edi),%mm4 + movd (%esi),%mm5 + movd (%ebp),%mm3 + pmuludq %mm4,%mm5 + movq %mm5,%mm2 + movq %mm5,%mm0 + pand %mm7,%mm0 + pmuludq 20(%esp),%mm5 + pmuludq %mm5,%mm3 + paddq %mm0,%mm3 + movd 4(%ebp),%mm1 + movd 4(%esi),%mm0 + psrlq $32,%mm2 + psrlq $32,%mm3 + incl %ecx +.align 4,0x90 +L0051st: + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + pand %mm7,%mm0 + movd 4(%ebp,%ecx,4),%mm1 + paddq %mm0,%mm3 + movd 4(%esi,%ecx,4),%mm0 + psrlq $32,%mm2 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm3 + leal 1(%ecx),%ecx + cmpl %ebx,%ecx + jl L0051st + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + pand %mm7,%mm0 + paddq %mm0,%mm3 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm2 + psrlq $32,%mm3 + paddq %mm2,%mm3 + movq %mm3,32(%esp,%ebx,4) + incl %edx +L006outer: + xorl %ecx,%ecx + movd (%edi,%edx,4),%mm4 + movd (%esi),%mm5 + movd 32(%esp),%mm6 + movd (%ebp),%mm3 + pmuludq %mm4,%mm5 + paddq %mm6,%mm5 + movq %mm5,%mm0 + movq %mm5,%mm2 + pand %mm7,%mm0 + pmuludq 20(%esp),%mm5 + pmuludq %mm5,%mm3 + paddq %mm0,%mm3 + movd 36(%esp),%mm6 + movd 4(%ebp),%mm1 + movd 4(%esi),%mm0 + psrlq $32,%mm2 + psrlq $32,%mm3 + paddq %mm6,%mm2 + incl %ecx + decl %ebx +L007inner: + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + movd 36(%esp,%ecx,4),%mm6 + pand %mm7,%mm0 + movd 4(%ebp,%ecx,4),%mm1 + paddq %mm0,%mm3 + movd 4(%esi,%ecx,4),%mm0 + psrlq $32,%mm2 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm3 + paddq %mm6,%mm2 + decl %ebx + leal 1(%ecx),%ecx + jnz L007inner + movl %ecx,%ebx + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + pand %mm7,%mm0 + paddq %mm0,%mm3 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm2 + psrlq $32,%mm3 + movd 36(%esp,%ebx,4),%mm6 + paddq %mm2,%mm3 + paddq %mm6,%mm3 + movq %mm3,32(%esp,%ebx,4) + leal 1(%edx),%edx + cmpl %ebx,%edx + jle L006outer + emms + jmp L008common_tail +.align 4,0x90 +L004non_sse2: + movl 8(%esp),%esi + leal 1(%ebx),%ebp + movl 12(%esp),%edi + xorl %ecx,%ecx + movl %esi,%edx + andl $1,%ebp + subl %edi,%edx + leal 4(%edi,%ebx,4),%eax + orl %edx,%ebp + movl (%edi),%edi + jz L009bn_sqr_mont + movl %eax,28(%esp) + movl (%esi),%eax + xorl %edx,%edx +.align 4,0x90 +L010mull: + movl %edx,%ebp + mull %edi + addl %eax,%ebp + leal 1(%ecx),%ecx + adcl $0,%edx + movl (%esi,%ecx,4),%eax + cmpl %ebx,%ecx + movl %ebp,28(%esp,%ecx,4) + jl L010mull + movl %edx,%ebp + mull %edi + movl 20(%esp),%edi + addl %ebp,%eax + movl 16(%esp),%esi + adcl $0,%edx + imull 32(%esp),%edi + movl %eax,32(%esp,%ebx,4) + xorl %ecx,%ecx + movl %edx,36(%esp,%ebx,4) + movl %ecx,40(%esp,%ebx,4) + movl (%esi),%eax + mull %edi + addl 32(%esp),%eax + movl 4(%esi),%eax + adcl $0,%edx + incl %ecx + jmp L0112ndmadd +.align 4,0x90 +L0121stmadd: + movl %edx,%ebp + mull %edi + addl 32(%esp,%ecx,4),%ebp + leal 1(%ecx),%ecx + adcl $0,%edx + addl %eax,%ebp + movl (%esi,%ecx,4),%eax + adcl $0,%edx + cmpl %ebx,%ecx + movl %ebp,28(%esp,%ecx,4) + jl L0121stmadd + movl %edx,%ebp + mull %edi + addl 32(%esp,%ebx,4),%eax + movl 20(%esp),%edi + adcl $0,%edx + movl 16(%esp),%esi + addl %eax,%ebp + adcl $0,%edx + imull 32(%esp),%edi + xorl %ecx,%ecx + addl 36(%esp,%ebx,4),%edx + movl %ebp,32(%esp,%ebx,4) + adcl $0,%ecx + movl (%esi),%eax + movl %edx,36(%esp,%ebx,4) + movl %ecx,40(%esp,%ebx,4) + mull %edi + addl 32(%esp),%eax + movl 4(%esi),%eax + adcl $0,%edx + movl $1,%ecx +.align 4,0x90 +L0112ndmadd: + movl %edx,%ebp + mull %edi + addl 32(%esp,%ecx,4),%ebp + leal 1(%ecx),%ecx + adcl $0,%edx + addl %eax,%ebp + movl (%esi,%ecx,4),%eax + adcl $0,%edx + cmpl %ebx,%ecx + movl %ebp,24(%esp,%ecx,4) + jl L0112ndmadd + movl %edx,%ebp + mull %edi + addl 32(%esp,%ebx,4),%ebp + adcl $0,%edx + addl %eax,%ebp + adcl $0,%edx + movl %ebp,28(%esp,%ebx,4) + xorl %eax,%eax + movl 12(%esp),%ecx + addl 36(%esp,%ebx,4),%edx + adcl 40(%esp,%ebx,4),%eax + leal 4(%ecx),%ecx + movl %edx,32(%esp,%ebx,4) + cmpl 28(%esp),%ecx + movl %eax,36(%esp,%ebx,4) + je L008common_tail + movl (%ecx),%edi + movl 8(%esp),%esi + movl %ecx,12(%esp) + xorl %ecx,%ecx + xorl %edx,%edx + movl (%esi),%eax + jmp L0121stmadd +.align 4,0x90 +L009bn_sqr_mont: + movl %ebx,(%esp) + movl %ecx,12(%esp) + movl %edi,%eax + mull %edi + movl %eax,32(%esp) + movl %edx,%ebx + shrl $1,%edx + andl $1,%ebx + incl %ecx +.align 4,0x90 +L013sqr: + movl (%esi,%ecx,4),%eax + movl %edx,%ebp + mull %edi + addl %ebp,%eax + leal 1(%ecx),%ecx + adcl $0,%edx + leal (%ebx,%eax,2),%ebp + shrl $31,%eax + cmpl (%esp),%ecx + movl %eax,%ebx + movl %ebp,28(%esp,%ecx,4) + jl L013sqr + movl (%esi,%ecx,4),%eax + movl %edx,%ebp + mull %edi + addl %ebp,%eax + movl 20(%esp),%edi + adcl $0,%edx + movl 16(%esp),%esi + leal (%ebx,%eax,2),%ebp + imull 32(%esp),%edi + shrl $31,%eax + movl %ebp,32(%esp,%ecx,4) + leal (%eax,%edx,2),%ebp + movl (%esi),%eax + shrl $31,%edx + movl %ebp,36(%esp,%ecx,4) + movl %edx,40(%esp,%ecx,4) + mull %edi + addl 32(%esp),%eax + movl %ecx,%ebx + adcl $0,%edx + movl 4(%esi),%eax + movl $1,%ecx +.align 4,0x90 +L0143rdmadd: + movl %edx,%ebp + mull %edi + addl 32(%esp,%ecx,4),%ebp + adcl $0,%edx + addl %eax,%ebp + movl 4(%esi,%ecx,4),%eax + adcl $0,%edx + movl %ebp,28(%esp,%ecx,4) + movl %edx,%ebp + mull %edi + addl 36(%esp,%ecx,4),%ebp + leal 2(%ecx),%ecx + adcl $0,%edx + addl %eax,%ebp + movl (%esi,%ecx,4),%eax + adcl $0,%edx + cmpl %ebx,%ecx + movl %ebp,24(%esp,%ecx,4) + jl L0143rdmadd + movl %edx,%ebp + mull %edi + addl 32(%esp,%ebx,4),%ebp + adcl $0,%edx + addl %eax,%ebp + adcl $0,%edx + movl %ebp,28(%esp,%ebx,4) + movl 12(%esp),%ecx + xorl %eax,%eax + movl 8(%esp),%esi + addl 36(%esp,%ebx,4),%edx + adcl 40(%esp,%ebx,4),%eax + movl %edx,32(%esp,%ebx,4) + cmpl %ebx,%ecx + movl %eax,36(%esp,%ebx,4) + je L008common_tail + movl 4(%esi,%ecx,4),%edi + leal 1(%ecx),%ecx + movl %edi,%eax + movl %ecx,12(%esp) + mull %edi + addl 32(%esp,%ecx,4),%eax + adcl $0,%edx + movl %eax,32(%esp,%ecx,4) + xorl %ebp,%ebp + cmpl %ebx,%ecx + leal 1(%ecx),%ecx + je L015sqrlast + movl %edx,%ebx + shrl $1,%edx + andl $1,%ebx +.align 4,0x90 +L016sqradd: + movl (%esi,%ecx,4),%eax + movl %edx,%ebp + mull %edi + addl %ebp,%eax + leal (%eax,%eax,1),%ebp + adcl $0,%edx + shrl $31,%eax + addl 32(%esp,%ecx,4),%ebp + leal 1(%ecx),%ecx + adcl $0,%eax + addl %ebx,%ebp + adcl $0,%eax + cmpl (%esp),%ecx + movl %ebp,28(%esp,%ecx,4) + movl %eax,%ebx + jle L016sqradd + movl %edx,%ebp + addl %edx,%edx + shrl $31,%ebp + addl %ebx,%edx + adcl $0,%ebp +L015sqrlast: + movl 20(%esp),%edi + movl 16(%esp),%esi + imull 32(%esp),%edi + addl 32(%esp,%ecx,4),%edx + movl (%esi),%eax + adcl $0,%ebp + movl %edx,32(%esp,%ecx,4) + movl %ebp,36(%esp,%ecx,4) + mull %edi + addl 32(%esp),%eax + leal -1(%ecx),%ebx + adcl $0,%edx + movl $1,%ecx + movl 4(%esi),%eax + jmp L0143rdmadd +.align 4,0x90 +L008common_tail: + movl 16(%esp),%ebp + movl 4(%esp),%edi + leal 32(%esp),%esi + movl (%esi),%eax + movl %ebx,%ecx + xorl %edx,%edx +.align 4,0x90 +L017sub: + sbbl (%ebp,%edx,4),%eax + movl %eax,(%edi,%edx,4) + decl %ecx + movl 4(%esi,%edx,4),%eax + leal 1(%edx),%edx + jge L017sub + sbbl $0,%eax + movl $-1,%edx + xorl %eax,%edx + jmp L018copy +.align 4,0x90 +L018copy: + movl 32(%esp,%ebx,4),%esi + movl (%edi,%ebx,4),%ebp + movl %ecx,32(%esp,%ebx,4) + andl %eax,%esi + andl %edx,%ebp + orl %esi,%ebp + movl %ebp,(%edi,%ebx,4) + decl %ebx + jge L018copy + movl 24(%esp),%esp + movl $1,%eax +L000just_leave: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 +.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 +.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 +.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 +.byte 111,114,103,62,0 +.comm _OPENSSL_ia32cap_P,16 |