diff options
Diffstat (limited to 'deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s')
-rw-r--r-- | deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s | 115 |
1 files changed, 61 insertions, 54 deletions
diff --git a/deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s b/deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s index accec0e519..7bc58d24e0 100644 --- a/deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s +++ b/deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s @@ -14,47 +14,54 @@ L_bn_mul_mont_begin: jl L000just_leave leal 20(%esp),%esi leal 24(%esp),%edx - movl %esp,%ebp addl $2,%edi negl %edi - leal -32(%esp,%edi,4),%esp + leal -32(%esp,%edi,4),%ebp negl %edi - movl %esp,%eax + movl %ebp,%eax subl %edx,%eax andl $2047,%eax - subl %eax,%esp - xorl %esp,%edx + subl %eax,%ebp + xorl %ebp,%edx andl $2048,%edx xorl $2048,%edx - subl %edx,%esp - andl $-64,%esp - movl %ebp,%eax - subl %esp,%eax + subl %edx,%ebp + andl $-64,%ebp + movl %esp,%eax + subl %ebp,%eax andl $-4096,%eax + movl %esp,%edx + leal (%ebp,%eax,1),%esp + movl (%esp),%eax + cmpl %ebp,%esp + ja L001page_walk + jmp L002page_walk_done +.align 4,0x90 L001page_walk: - movl (%esp,%eax,1),%edx - subl $4096,%eax -.byte 46 - jnc L001page_walk + leal -4096(%esp),%esp + movl (%esp),%eax + cmpl %ebp,%esp + ja L001page_walk +L002page_walk_done: movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx - movl 12(%esi),%edx + movl 12(%esi),%ebp movl 16(%esi),%esi movl (%esi),%esi movl %eax,4(%esp) movl %ebx,8(%esp) movl %ecx,12(%esp) - movl %edx,16(%esp) + movl %ebp,16(%esp) movl %esi,20(%esp) leal -3(%edi),%ebx - movl %ebp,24(%esp) - call L002PIC_me_up -L002PIC_me_up: + movl %edx,24(%esp) + call L003PIC_me_up +L003PIC_me_up: popl %eax - movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L002PIC_me_up(%eax),%eax + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L003PIC_me_up(%eax),%eax btl $26,(%eax) - jnc L003non_sse2 + jnc L004non_sse2 movl $-1,%eax movd %eax,%mm7 movl 8(%esp),%esi @@ -78,7 +85,7 @@ L002PIC_me_up: psrlq $32,%mm3 incl %ecx .align 4,0x90 -L0041st: +L0051st: pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 @@ -93,7 +100,7 @@ L0041st: psrlq $32,%mm3 leal 1(%ecx),%ecx cmpl %ebx,%ecx - jl L0041st + jl L0051st pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 @@ -107,7 +114,7 @@ L0041st: paddq %mm2,%mm3 movq %mm3,32(%esp,%ebx,4) incl %edx -L005outer: +L006outer: xorl %ecx,%ecx movd (%edi,%edx,4),%mm4 movd (%esi),%mm5 @@ -129,7 +136,7 @@ L005outer: paddq %mm6,%mm2 incl %ecx decl %ebx -L006inner: +L007inner: pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 @@ -146,7 +153,7 @@ L006inner: paddq %mm6,%mm2 decl %ebx leal 1(%ecx),%ecx - jnz L006inner + jnz L007inner movl %ecx,%ebx pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 @@ -164,11 +171,11 @@ L006inner: movq %mm3,32(%esp,%ebx,4) leal 1(%edx),%edx cmpl %ebx,%edx - jle L005outer + jle L006outer emms - jmp L007common_tail + jmp L008common_tail .align 4,0x90 -L003non_sse2: +L004non_sse2: movl 8(%esp),%esi leal 1(%ebx),%ebp movl 12(%esp),%edi @@ -179,12 +186,12 @@ L003non_sse2: leal 4(%edi,%ebx,4),%eax orl %edx,%ebp movl (%edi),%edi - jz L008bn_sqr_mont + jz L009bn_sqr_mont movl %eax,28(%esp) movl (%esi),%eax xorl %edx,%edx .align 4,0x90 -L009mull: +L010mull: movl %edx,%ebp mull %edi addl %eax,%ebp @@ -193,7 +200,7 @@ L009mull: movl (%esi,%ecx,4),%eax cmpl %ebx,%ecx movl %ebp,28(%esp,%ecx,4) - jl L009mull + jl L010mull movl %edx,%ebp mull %edi movl 20(%esp),%edi @@ -211,9 +218,9 @@ L009mull: movl 4(%esi),%eax adcl $0,%edx incl %ecx - jmp L0102ndmadd + jmp L0112ndmadd .align 4,0x90 -L0111stmadd: +L0121stmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -224,7 +231,7 @@ L0111stmadd: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,28(%esp,%ecx,4) - jl L0111stmadd + jl L0121stmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%eax @@ -247,7 +254,7 @@ L0111stmadd: adcl $0,%edx movl $1,%ecx .align 4,0x90 -L0102ndmadd: +L0112ndmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -258,7 +265,7 @@ L0102ndmadd: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,24(%esp,%ecx,4) - jl L0102ndmadd + jl L0112ndmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%ebp @@ -274,16 +281,16 @@ L0102ndmadd: movl %edx,32(%esp,%ebx,4) cmpl 28(%esp),%ecx movl %eax,36(%esp,%ebx,4) - je L007common_tail + je L008common_tail movl (%ecx),%edi movl 8(%esp),%esi movl %ecx,12(%esp) xorl %ecx,%ecx xorl %edx,%edx movl (%esi),%eax - jmp L0111stmadd + jmp L0121stmadd .align 4,0x90 -L008bn_sqr_mont: +L009bn_sqr_mont: movl %ebx,(%esp) movl %ecx,12(%esp) movl %edi,%eax @@ -294,7 +301,7 @@ L008bn_sqr_mont: andl $1,%ebx incl %ecx .align 4,0x90 -L012sqr: +L013sqr: movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -306,7 +313,7 @@ L012sqr: cmpl (%esp),%ecx movl %eax,%ebx movl %ebp,28(%esp,%ecx,4) - jl L012sqr + jl L013sqr movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -330,7 +337,7 @@ L012sqr: movl 4(%esi),%eax movl $1,%ecx .align 4,0x90 -L0133rdmadd: +L0143rdmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -349,7 +356,7 @@ L0133rdmadd: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,24(%esp,%ecx,4) - jl L0133rdmadd + jl L0143rdmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%ebp @@ -365,7 +372,7 @@ L0133rdmadd: movl %edx,32(%esp,%ebx,4) cmpl %ebx,%ecx movl %eax,36(%esp,%ebx,4) - je L007common_tail + je L008common_tail movl 4(%esi,%ecx,4),%edi leal 1(%ecx),%ecx movl %edi,%eax @@ -377,12 +384,12 @@ L0133rdmadd: xorl %ebp,%ebp cmpl %ebx,%ecx leal 1(%ecx),%ecx - je L014sqrlast + je L015sqrlast movl %edx,%ebx shrl $1,%edx andl $1,%ebx .align 4,0x90 -L015sqradd: +L016sqradd: movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -398,13 +405,13 @@ L015sqradd: cmpl (%esp),%ecx movl %ebp,28(%esp,%ecx,4) movl %eax,%ebx - jle L015sqradd + jle L016sqradd movl %edx,%ebp addl %edx,%edx shrl $31,%ebp addl %ebx,%edx adcl $0,%ebp -L014sqrlast: +L015sqrlast: movl 20(%esp),%edi movl 16(%esp),%esi imull 32(%esp),%edi @@ -419,9 +426,9 @@ L014sqrlast: adcl $0,%edx movl $1,%ecx movl 4(%esi),%eax - jmp L0133rdmadd + jmp L0143rdmadd .align 4,0x90 -L007common_tail: +L008common_tail: movl 16(%esp),%ebp movl 4(%esp),%edi leal 32(%esp),%esi @@ -429,13 +436,13 @@ L007common_tail: movl %ebx,%ecx xorl %edx,%edx .align 4,0x90 -L016sub: +L017sub: sbbl (%ebp,%edx,4),%eax movl %eax,(%edi,%edx,4) decl %ecx movl 4(%esi,%edx,4),%eax leal 1(%edx),%edx - jge L016sub + jge L017sub sbbl $0,%eax andl %eax,%esi notl %eax @@ -443,12 +450,12 @@ L016sub: andl %eax,%ebp orl %ebp,%esi .align 4,0x90 -L017copy: +L018copy: movl (%esi,%ebx,4),%eax movl %eax,(%edi,%ebx,4) movl %ecx,32(%esp,%ebx,4) decl %ebx - jge L017copy + jge L018copy movl 24(%esp),%esp movl $1,%eax L000just_leave: |