From c17a1fedd8cea4b0c4c265adfcb9048b0918788b Mon Sep 17 00:00:00 2001 From: Shigeki Ohtsu Date: Thu, 22 Sep 2016 21:07:52 +0900 Subject: deps: update openssl asm and asm_obsolete files Regenerate asm files with Makefile and CC=gcc and ASM=gcc where gcc-5.4.0. Also asm files in asm_obsolete dir to support old compiler and assembler are regenerated without CC and ASM envs. PR-URL: https://github.com/nodejs/node/pull/8714 Reviewed-By: Fedor Indutny --- deps/openssl/asm/arm-void-gas/aes/bsaes-armv7.S | 24 +-- deps/openssl/asm/x64-elf-gas/bn/x86_64-mont.s | 155 ++++++++------ deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s | 204 +++++++++++------- .../asm/x64-elf-gas/ec/ecp_nistz256-x86_64.s | 189 +++++++++-------- deps/openssl/asm/x64-elf-gas/sha/sha1-x86_64.s | 4 +- deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont.s | 155 ++++++++------ deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s | 204 +++++++++++------- .../asm/x64-macosx-gas/ec/ecp_nistz256-x86_64.s | 189 +++++++++-------- deps/openssl/asm/x64-macosx-gas/sha/sha1-x86_64.s | 4 +- deps/openssl/asm/x64-win32-masm/bn/x86_64-mont.asm | 185 ++++++++++------- .../openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm | 227 +++++++++++++-------- .../asm/x64-win32-masm/ec/ecp_nistz256-x86_64.asm | 189 +++++++++-------- .../openssl/asm/x64-win32-masm/sha/sha1-x86_64.asm | 4 +- deps/openssl/asm/x86-elf-gas/bn/x86-mont.s | 109 +++++----- deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s | 115 ++++++----- deps/openssl/asm/x86-win32-masm/bn/x86-mont.asm | 109 +++++----- .../asm_obsolete/arm-void-gas/aes/bsaes-armv7.S | 24 +-- .../asm_obsolete/x64-elf-gas/bn/x86_64-mont.s | 127 +++++++----- .../asm_obsolete/x64-elf-gas/bn/x86_64-mont5.s | 134 +++++++----- .../x64-elf-gas/ec/ecp_nistz256-x86_64.s | 119 +++++------ .../asm_obsolete/x64-elf-gas/sha/sha1-x86_64.s | 4 +- .../asm_obsolete/x64-macosx-gas/bn/x86_64-mont.s | 127 +++++++----- .../asm_obsolete/x64-macosx-gas/bn/x86_64-mont5.s | 134 +++++++----- .../x64-macosx-gas/ec/ecp_nistz256-x86_64.s | 119 +++++------ .../asm_obsolete/x64-macosx-gas/sha/sha1-x86_64.s | 4 +- .../asm_obsolete/x64-win32-masm/bn/x86_64-mont.asm | 154 ++++++++------ .../x64-win32-masm/bn/x86_64-mont5.asm | 153 +++++++++----- .../x64-win32-masm/ec/ecp_nistz256-x86_64.asm | 119 +++++------ .../x64-win32-masm/sha/sha1-x86_64.asm | 4 +- .../openssl/asm_obsolete/x86-elf-gas/bn/x86-mont.s | 109 +++++----- .../asm_obsolete/x86-macosx-gas/bn/x86-mont.s | 115 ++++++----- .../asm_obsolete/x86-win32-masm/bn/x86-mont.asm | 109 +++++----- 32 files changed, 2040 insertions(+), 1581 deletions(-) (limited to 'deps/openssl') diff --git a/deps/openssl/asm/arm-void-gas/aes/bsaes-armv7.S b/deps/openssl/asm/arm-void-gas/aes/bsaes-armv7.S index 9738ed5023..449e7a442e 100644 --- a/deps/openssl/asm/arm-void-gas/aes/bsaes-armv7.S +++ b/deps/openssl/asm/arm-void-gas/aes/bsaes-armv7.S @@ -1816,8 +1816,6 @@ bsaes_xts_encrypt: b .Lxts_enc_done .align 4 .Lxts_enc_6: - vst1.64 {q14}, [r0,:128] @ next round tweak - veor q4, q4, q12 #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule @@ -1853,8 +1851,6 @@ bsaes_xts_encrypt: .align 5 .Lxts_enc_5: - vst1.64 {q13}, [r0,:128] @ next round tweak - veor q3, q3, q11 #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule @@ -1883,8 +1879,6 @@ bsaes_xts_encrypt: b .Lxts_enc_done .align 4 .Lxts_enc_4: - vst1.64 {q12}, [r0,:128] @ next round tweak - veor q2, q2, q10 #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule @@ -1910,8 +1904,6 @@ bsaes_xts_encrypt: b .Lxts_enc_done .align 4 .Lxts_enc_3: - vst1.64 {q11}, [r0,:128] @ next round tweak - veor q1, q1, q9 #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule @@ -1936,8 +1928,6 @@ bsaes_xts_encrypt: b .Lxts_enc_done .align 4 .Lxts_enc_2: - vst1.64 {q10}, [r0,:128] @ next round tweak - veor q0, q0, q8 #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule @@ -1960,7 +1950,7 @@ bsaes_xts_encrypt: .align 4 .Lxts_enc_1: mov r0, sp - veor q0, q8 + veor q0, q0, q8 mov r1, sp vst1.8 {q0}, [sp,:128] mov r2, r10 @@ -2346,8 +2336,6 @@ bsaes_xts_decrypt: b .Lxts_dec_done .align 4 .Lxts_dec_5: - vst1.64 {q13}, [r0,:128] @ next round tweak - veor q3, q3, q11 #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule @@ -2376,8 +2364,6 @@ bsaes_xts_decrypt: b .Lxts_dec_done .align 4 .Lxts_dec_4: - vst1.64 {q12}, [r0,:128] @ next round tweak - veor q2, q2, q10 #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule @@ -2403,8 +2389,6 @@ bsaes_xts_decrypt: b .Lxts_dec_done .align 4 .Lxts_dec_3: - vst1.64 {q11}, [r0,:128] @ next round tweak - veor q1, q1, q9 #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule @@ -2429,8 +2413,6 @@ bsaes_xts_decrypt: b .Lxts_dec_done .align 4 .Lxts_dec_2: - vst1.64 {q10}, [r0,:128] @ next round tweak - veor q0, q0, q8 #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule @@ -2453,12 +2435,12 @@ bsaes_xts_decrypt: .align 4 .Lxts_dec_1: mov r0, sp - veor q0, q8 + veor q0, q0, q8 mov r1, sp vst1.8 {q0}, [sp,:128] + mov r5, r2 @ preserve magic mov r2, r10 mov r4, r3 @ preserve fp - mov r5, r2 @ preserve magic bl AES_decrypt diff --git a/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont.s b/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont.s index b4fb5fe7e9..21175f570d 100644 --- a/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont.s +++ b/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont.s @@ -6,6 +6,8 @@ .type bn_mul_mont,@function .align 16 bn_mul_mont: + movl %r9d,%r9d + movq %rsp,%rax testl $3,%r9d jnz .Lmul_enter cmpl $8,%r9d @@ -26,29 +28,36 @@ bn_mul_mont: pushq %r14 pushq %r15 - movl %r9d,%r9d - leaq 2(%r9),%r10 + negq %r9 movq %rsp,%r11 - negq %r10 - leaq (%rsp,%r10,8),%rsp - andq $-1024,%rsp + leaq -16(%rsp,%r9,8),%r10 + negq %r9 + andq $-1024,%r10 - movq %r11,8(%rsp,%r9,8) -.Lmul_body: - subq %rsp,%r11 + subq %r10,%r11 andq $-4096,%r11 + leaq (%r10,%r11,1),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja .Lmul_page_walk + jmp .Lmul_page_walk_done + +.align 16 .Lmul_page_walk: - movq (%rsp,%r11,1),%r10 - subq $4096,%r11 -.byte 0x66,0x2e - jnc .Lmul_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja .Lmul_page_walk +.Lmul_page_walk_done: + movq %rax,8(%rsp,%r9,8) +.Lmul_body: movq %rdx,%r12 movq (%r8),%r8 movq (%r12),%rbx @@ -216,19 +225,21 @@ bn_mul_mont: movq 8(%rsp,%r9,8),%rsi movq $1,%rax - movq (%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp .Lmul_epilogue: .byte 0xf3,0xc3 .size bn_mul_mont,.-bn_mul_mont .type bn_mul4x_mont,@function .align 16 bn_mul4x_mont: + movl %r9d,%r9d + movq %rsp,%rax .Lmul4x_enter: andl $0x80100,%r11d cmpl $0x80100,%r11d @@ -240,23 +251,29 @@ bn_mul4x_mont: pushq %r14 pushq %r15 - movl %r9d,%r9d - leaq 4(%r9),%r10 + negq %r9 movq %rsp,%r11 - negq %r10 - leaq (%rsp,%r10,8),%rsp - andq $-1024,%rsp + leaq -32(%rsp,%r9,8),%r10 + negq %r9 + andq $-1024,%r10 - movq %r11,8(%rsp,%r9,8) -.Lmul4x_body: - subq %rsp,%r11 + subq %r10,%r11 andq $-4096,%r11 + leaq (%r10,%r11,1),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja .Lmul4x_page_walk + jmp .Lmul4x_page_walk_done + .Lmul4x_page_walk: - movq (%rsp,%r11,1),%r10 - subq $4096,%r11 -.byte 0x2e - jnc .Lmul4x_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja .Lmul4x_page_walk +.Lmul4x_page_walk_done: + movq %rax,8(%rsp,%r9,8) +.Lmul4x_body: movq %rdi,16(%rsp,%r9,8) movq %rdx,%r12 movq (%r8),%r8 @@ -625,13 +642,13 @@ bn_mul4x_mont: movdqu %xmm2,16(%rdi,%r14,1) movq 8(%rsp,%r9,8),%rsi movq $1,%rax - movq (%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp .Lmul4x_epilogue: .byte 0xf3,0xc3 .size bn_mul4x_mont,.-bn_mul4x_mont @@ -641,14 +658,15 @@ bn_mul4x_mont: .type bn_sqr8x_mont,@function .align 32 bn_sqr8x_mont: -.Lsqr8x_enter: movq %rsp,%rax +.Lsqr8x_enter: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 +.Lsqr8x_prologue: movl %r9d,%r10d shll $3,%r9d @@ -661,33 +679,42 @@ bn_sqr8x_mont: leaq -64(%rsp,%r9,2),%r11 + movq %rsp,%rbp movq (%r8),%r8 subq %rsi,%r11 andq $4095,%r11 cmpq %r11,%r10 jb .Lsqr8x_sp_alt - subq %r11,%rsp - leaq -64(%rsp,%r9,2),%rsp + subq %r11,%rbp + leaq -64(%rbp,%r9,2),%rbp jmp .Lsqr8x_sp_done .align 32 .Lsqr8x_sp_alt: leaq 4096-64(,%r9,2),%r10 - leaq -64(%rsp,%r9,2),%rsp + leaq -64(%rbp,%r9,2),%rbp subq %r10,%r11 movq $0,%r10 cmovcq %r10,%r11 - subq %r11,%rsp + subq %r11,%rbp .Lsqr8x_sp_done: - andq $-64,%rsp - movq %rax,%r11 - subq %rsp,%r11 + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lsqr8x_page_walk + jmp .Lsqr8x_page_walk_done + +.align 16 .Lsqr8x_page_walk: - movq (%rsp,%r11,1),%r10 - subq $4096,%r11 -.byte 0x2e - jnc .Lsqr8x_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lsqr8x_page_walk +.Lsqr8x_page_walk_done: movq %r9,%r10 negq %r9 @@ -800,30 +827,38 @@ bn_sqr8x_mont: .type bn_mulx4x_mont,@function .align 32 bn_mulx4x_mont: -.Lmulx4x_enter: movq %rsp,%rax +.Lmulx4x_enter: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 +.Lmulx4x_prologue: shll $3,%r9d -.byte 0x67 xorq %r10,%r10 subq %r9,%r10 movq (%r8),%r8 - leaq -72(%rsp,%r10,1),%rsp - andq $-128,%rsp - movq %rax,%r11 - subq %rsp,%r11 + leaq -72(%rsp,%r10,1),%rbp + andq $-128,%rbp + movq %rsp,%r11 + subq %rbp,%r11 andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lmulx4x_page_walk + jmp .Lmulx4x_page_walk_done + +.align 16 .Lmulx4x_page_walk: - movq (%rsp,%r11,1),%r10 - subq $4096,%r11 -.byte 0x66,0x2e - jnc .Lmulx4x_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lmulx4x_page_walk +.Lmulx4x_page_walk_done: leaq (%rdx,%r9,1),%r10 diff --git a/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s b/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s index e4dfd83460..416152560a 100644 --- a/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s +++ b/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s @@ -6,6 +6,8 @@ .type bn_mul_mont_gather5,@function .align 64 bn_mul_mont_gather5: + movl %r9d,%r9d + movq %rsp,%rax testl $7,%r9d jnz .Lmul_enter movl OPENSSL_ia32cap_P+8(%rip),%r11d @@ -13,10 +15,7 @@ bn_mul_mont_gather5: .align 16 .Lmul_enter: - movl %r9d,%r9d - movq %rsp,%rax movd 8(%rsp),%xmm5 - leaq .Linc(%rip),%r10 pushq %rbx pushq %rbp pushq %r12 @@ -24,26 +23,36 @@ bn_mul_mont_gather5: pushq %r14 pushq %r15 - leaq 2(%r9),%r11 - negq %r11 - leaq -264(%rsp,%r11,8),%rsp - andq $-1024,%rsp + negq %r9 + movq %rsp,%r11 + leaq -280(%rsp,%r9,8),%r10 + negq %r9 + andq $-1024,%r10 - movq %rax,8(%rsp,%r9,8) -.Lmul_body: - subq %rsp,%rax - andq $-4096,%rax + subq %r10,%r11 + andq $-4096,%r11 + leaq (%r10,%r11,1),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja .Lmul_page_walk + jmp .Lmul_page_walk_done + .Lmul_page_walk: - movq (%rsp,%rax,1),%r11 - subq $4096,%rax -.byte 0x2e - jnc .Lmul_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja .Lmul_page_walk +.Lmul_page_walk_done: + + leaq .Linc(%rip),%r10 + movq %rax,8(%rsp,%r9,8) +.Lmul_body: leaq 128(%rdx),%r12 movdqa 0(%r10),%xmm0 @@ -414,18 +423,19 @@ bn_mul_mont_gather5: .type bn_mul4x_mont_gather5,@function .align 32 bn_mul4x_mont_gather5: +.byte 0x67 + movq %rsp,%rax .Lmul4x_enter: andl $0x80108,%r11d cmpl $0x80108,%r11d je .Lmulx4x_enter -.byte 0x67 - movq %rsp,%rax pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 +.Lmul4x_prologue: .byte 0x67 shll $3,%r9d @@ -442,32 +452,40 @@ bn_mul4x_mont_gather5: leaq -320(%rsp,%r9,2),%r11 + movq %rsp,%rbp subq %rdi,%r11 andq $4095,%r11 cmpq %r11,%r10 jb .Lmul4xsp_alt - subq %r11,%rsp - leaq -320(%rsp,%r9,2),%rsp + subq %r11,%rbp + leaq -320(%rbp,%r9,2),%rbp jmp .Lmul4xsp_done .align 32 .Lmul4xsp_alt: leaq 4096-320(,%r9,2),%r10 - leaq -320(%rsp,%r9,2),%rsp + leaq -320(%rbp,%r9,2),%rbp subq %r10,%r11 movq $0,%r10 cmovcq %r10,%r11 - subq %r11,%rsp + subq %r11,%rbp .Lmul4xsp_done: - andq $-64,%rsp - movq %rax,%r11 - subq %rsp,%r11 + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lmul4x_page_walk + jmp .Lmul4x_page_walk_done + .Lmul4x_page_walk: - movq (%rsp,%r11,1),%r10 - subq $4096,%r11 -.byte 0x2e - jnc .Lmul4x_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lmul4x_page_walk +.Lmul4x_page_walk_done: negq %r9 @@ -1019,17 +1037,18 @@ mul4x_internal: .type bn_power5,@function .align 32 bn_power5: + movq %rsp,%rax movl OPENSSL_ia32cap_P+8(%rip),%r11d andl $0x80108,%r11d cmpl $0x80108,%r11d je .Lpowerx5_enter - movq %rsp,%rax pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 +.Lpower5_prologue: shll $3,%r9d leal (%r9,%r9,2),%r10d @@ -1044,32 +1063,40 @@ bn_power5: leaq -320(%rsp,%r9,2),%r11 + movq %rsp,%rbp subq %rdi,%r11 andq $4095,%r11 cmpq %r11,%r10 jb .Lpwr_sp_alt - subq %r11,%rsp - leaq -320(%rsp,%r9,2),%rsp + subq %r11,%rbp + leaq -320(%rbp,%r9,2),%rbp jmp .Lpwr_sp_done .align 32 .Lpwr_sp_alt: leaq 4096-320(,%r9,2),%r10 - leaq -320(%rsp,%r9,2),%rsp + leaq -320(%rbp,%r9,2),%rbp subq %r10,%r11 movq $0,%r10 cmovcq %r10,%r11 - subq %r11,%rsp + subq %r11,%rbp .Lpwr_sp_done: - andq $-64,%rsp - movq %rax,%r11 - subq %rsp,%r11 + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lpwr_page_walk + jmp .Lpwr_page_walk_done + .Lpwr_page_walk: - movq (%rsp,%r11,1),%r10 - subq $4096,%r11 -.byte 0x2e - jnc .Lpwr_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lpwr_page_walk +.Lpwr_page_walk_done: movq %r9,%r10 negq %r9 @@ -1980,6 +2007,7 @@ bn_from_mont8x: pushq %r13 pushq %r14 pushq %r15 +.Lfrom_prologue: shll $3,%r9d leaq (%r9,%r9,2),%r10 @@ -1994,32 +2022,40 @@ bn_from_mont8x: leaq -320(%rsp,%r9,2),%r11 + movq %rsp,%rbp subq %rdi,%r11 andq $4095,%r11 cmpq %r11,%r10 jb .Lfrom_sp_alt - subq %r11,%rsp - leaq -320(%rsp,%r9,2),%rsp + subq %r11,%rbp + leaq -320(%rbp,%r9,2),%rbp jmp .Lfrom_sp_done .align 32 .Lfrom_sp_alt: leaq 4096-320(,%r9,2),%r10 - leaq -320(%rsp,%r9,2),%rsp + leaq -320(%rbp,%r9,2),%rbp subq %r10,%r11 movq $0,%r10 cmovcq %r10,%r11 - subq %r11,%rsp + subq %r11,%rbp .Lfrom_sp_done: - andq $-64,%rsp - movq %rax,%r11 - subq %rsp,%r11 + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lfrom_page_walk + jmp .Lfrom_page_walk_done + .Lfrom_page_walk: - movq (%rsp,%r11,1),%r10 - subq $4096,%r11 -.byte 0x2e - jnc .Lfrom_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lfrom_page_walk +.Lfrom_page_walk_done: movq %r9,%r10 negq %r9 @@ -2113,14 +2149,15 @@ bn_from_mont8x: .type bn_mulx4x_mont_gather5,@function .align 32 bn_mulx4x_mont_gather5: -.Lmulx4x_enter: movq %rsp,%rax +.Lmulx4x_enter: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 +.Lmulx4x_prologue: shll $3,%r9d leaq (%r9,%r9,2),%r10 @@ -2137,31 +2174,39 @@ bn_mulx4x_mont_gather5: leaq -320(%rsp,%r9,2),%r11 + movq %rsp,%rbp subq %rdi,%r11 andq $4095,%r11 cmpq %r11,%r10 jb .Lmulx4xsp_alt - subq %r11,%rsp - leaq -320(%rsp,%r9,2),%rsp + subq %r11,%rbp + leaq -320(%rbp,%r9,2),%rbp jmp .Lmulx4xsp_done .Lmulx4xsp_alt: leaq 4096-320(,%r9,2),%r10 - leaq -320(%rsp,%r9,2),%rsp + leaq -320(%rbp,%r9,2),%rbp subq %r10,%r11 movq $0,%r10 cmovcq %r10,%r11 - subq %r11,%rsp + subq %r11,%rbp .Lmulx4xsp_done: - andq $-64,%rsp - movq %rax,%r11 - subq %rsp,%r11 + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lmulx4x_page_walk + jmp .Lmulx4x_page_walk_done + .Lmulx4x_page_walk: - movq (%rsp,%r11,1),%r10 - subq $4096,%r11 -.byte 0x2e - jnc .Lmulx4x_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lmulx4x_page_walk +.Lmulx4x_page_walk_done: @@ -2619,14 +2664,15 @@ mulx4x_internal: .type bn_powerx5,@function .align 32 bn_powerx5: -.Lpowerx5_enter: movq %rsp,%rax +.Lpowerx5_enter: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 +.Lpowerx5_prologue: shll $3,%r9d leaq (%r9,%r9,2),%r10 @@ -2641,32 +2687,40 @@ bn_powerx5: leaq -320(%rsp,%r9,2),%r11 + movq %rsp,%rbp subq %rdi,%r11 andq $4095,%r11 cmpq %r11,%r10 jb .Lpwrx_sp_alt - subq %r11,%rsp - leaq -320(%rsp,%r9,2),%rsp + subq %r11,%rbp + leaq -320(%rbp,%r9,2),%rbp jmp .Lpwrx_sp_done .align 32 .Lpwrx_sp_alt: leaq 4096-320(,%r9,2),%r10 - leaq -320(%rsp,%r9,2),%rsp + leaq -320(%rbp,%r9,2),%rbp subq %r10,%r11 movq $0,%r10 cmovcq %r10,%r11 - subq %r11,%rsp + subq %r11,%rbp .Lpwrx_sp_done: - andq $-64,%rsp - movq %rax,%r11 - subq %rsp,%r11 + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lpwrx_page_walk + jmp .Lpwrx_page_walk_done + .Lpwrx_page_walk: - movq (%rsp,%r11,1),%r10 - subq $4096,%r11 -.byte 0x2e - jnc .Lpwrx_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lpwrx_page_walk +.Lpwrx_page_walk_done: movq %r9,%r10 negq %r9 diff --git a/deps/openssl/asm/x64-elf-gas/ec/ecp_nistz256-x86_64.s b/deps/openssl/asm/x64-elf-gas/ec/ecp_nistz256-x86_64.s index 6d1be614f3..23188cda6e 100644 --- a/deps/openssl/asm/x64-elf-gas/ec/ecp_nistz256-x86_64.s +++ b/deps/openssl/asm/x64-elf-gas/ec/ecp_nistz256-x86_64.s @@ -27,6 +27,7 @@ ecp_nistz256_mul_by_2: pushq %r13 movq 0(%rsi),%r8 + xorq %r13,%r13 movq 8(%rsi),%r9 addq %r8,%r8 movq 16(%rsi),%r10 @@ -37,7 +38,7 @@ ecp_nistz256_mul_by_2: adcq %r10,%r10 adcq %r11,%r11 movq %r9,%rdx - sbbq %r13,%r13 + adcq $0,%r13 subq 0(%rsi),%r8 movq %r10,%rcx @@ -45,14 +46,14 @@ ecp_nistz256_mul_by_2: sbbq 16(%rsi),%r10 movq %r11,%r12 sbbq 24(%rsi),%r11 - testq %r13,%r13 + sbbq $0,%r13 - cmovzq %rax,%r8 - cmovzq %rdx,%r9 + cmovcq %rax,%r8 + cmovcq %rdx,%r9 movq %r8,0(%rdi) - cmovzq %rcx,%r10 + cmovcq %rcx,%r10 movq %r9,8(%rdi) - cmovzq %r12,%r11 + cmovcq %r12,%r11 movq %r10,16(%rdi) movq %r11,24(%rdi) @@ -149,12 +150,12 @@ ecp_nistz256_mul_by_3: sbbq $0,%r10 movq %r11,%r12 sbbq .Lpoly+24(%rip),%r11 - testq %r13,%r13 + sbbq $0,%r13 - cmovzq %rax,%r8 - cmovzq %rdx,%r9 - cmovzq %rcx,%r10 - cmovzq %r12,%r11 + cmovcq %rax,%r8 + cmovcq %rdx,%r9 + cmovcq %rcx,%r10 + cmovcq %r12,%r11 xorq %r13,%r13 addq 0(%rsi),%r8 @@ -171,14 +172,14 @@ ecp_nistz256_mul_by_3: sbbq $0,%r10 movq %r11,%r12 sbbq .Lpoly+24(%rip),%r11 - testq %r13,%r13 + sbbq $0,%r13 - cmovzq %rax,%r8 - cmovzq %rdx,%r9 + cmovcq %rax,%r8 + cmovcq %rdx,%r9 movq %r8,0(%rdi) - cmovzq %rcx,%r10 + cmovcq %rcx,%r10 movq %r9,8(%rdi) - cmovzq %r12,%r11 + cmovcq %r12,%r11 movq %r10,16(%rdi) movq %r11,24(%rdi) @@ -217,14 +218,14 @@ ecp_nistz256_add: sbbq 16(%rsi),%r10 movq %r11,%r12 sbbq 24(%rsi),%r11 - testq %r13,%r13 + sbbq $0,%r13 - cmovzq %rax,%r8 - cmovzq %rdx,%r9 + cmovcq %rax,%r8 + cmovcq %rdx,%r9 movq %r8,0(%rdi) - cmovzq %rcx,%r10 + cmovcq %rcx,%r10 movq %r9,8(%rdi) - cmovzq %r12,%r11 + cmovcq %r12,%r11 movq %r10,16(%rdi) movq %r11,24(%rdi) @@ -1461,13 +1462,14 @@ ecp_nistz256_avx2_select_w7: .type __ecp_nistz256_add_toq,@function .align 32 __ecp_nistz256_add_toq: + xorq %r11,%r11 addq 0(%rbx),%r12 adcq 8(%rbx),%r13 movq %r12,%rax adcq 16(%rbx),%r8 adcq 24(%rbx),%r9 movq %r13,%rbp - sbbq %r11,%r11 + adcq $0,%r11 subq $-1,%r12 movq %r8,%rcx @@ -1475,14 +1477,14 @@ __ecp_nistz256_add_toq: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 - testq %r11,%r11 + sbbq $0,%r11 - cmovzq %rax,%r12 - cmovzq %rbp,%r13 + cmovcq %rax,%r12 + cmovcq %rbp,%r13 movq %r12,0(%rdi) - cmovzq %rcx,%r8 + cmovcq %rcx,%r8 movq %r13,8(%rdi) - cmovzq %r10,%r9 + cmovcq %r10,%r9 movq %r8,16(%rdi) movq %r9,24(%rdi) @@ -1550,13 +1552,14 @@ __ecp_nistz256_subq: .type __ecp_nistz256_mul_by_2q,@function .align 32 __ecp_nistz256_mul_by_2q: + xorq %r11,%r11 addq %r12,%r12 adcq %r13,%r13 movq %r12,%rax adcq %r8,%r8 adcq %r9,%r9 movq %r13,%rbp - sbbq %r11,%r11 + adcq $0,%r11 subq $-1,%r12 movq %r8,%rcx @@ -1564,14 +1567,14 @@ __ecp_nistz256_mul_by_2q: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 - testq %r11,%r11 + sbbq $0,%r11 - cmovzq %rax,%r12 - cmovzq %rbp,%r13 + cmovcq %rax,%r12 + cmovcq %rbp,%r13 movq %r12,0(%rdi) - cmovzq %rcx,%r8 + cmovcq %rcx,%r8 movq %r13,8(%rdi) - cmovzq %r10,%r9 + cmovcq %r10,%r9 movq %r8,16(%rdi) movq %r9,24(%rdi) @@ -1809,16 +1812,14 @@ ecp_nistz256_point_add: movq %rdx,%rsi movdqa %xmm0,384(%rsp) movdqa %xmm1,384+16(%rsp) - por %xmm0,%xmm1 movdqa %xmm2,416(%rsp) movdqa %xmm3,416+16(%rsp) - por %xmm2,%xmm3 movdqa %xmm4,448(%rsp) movdqa %xmm5,448+16(%rsp) - por %xmm1,%xmm3 + por %xmm4,%xmm5 movdqu 0(%rsi),%xmm0 - pshufd $0xb1,%xmm3,%xmm5 + pshufd $0xb1,%xmm5,%xmm3 movdqu 16(%rsi),%xmm1 movdqu 32(%rsi),%xmm2 por %xmm3,%xmm5 @@ -1830,14 +1831,14 @@ ecp_nistz256_point_add: movdqa %xmm0,480(%rsp) pshufd $0x1e,%xmm5,%xmm4 movdqa %xmm1,480+16(%rsp) - por %xmm0,%xmm1 -.byte 102,72,15,110,199 + movdqu 64(%rsi),%xmm0 + movdqu 80(%rsi),%xmm1 movdqa %xmm2,512(%rsp) movdqa %xmm3,512+16(%rsp) - por %xmm2,%xmm3 por %xmm4,%xmm5 pxor %xmm4,%xmm4 - por %xmm1,%xmm3 + por %xmm0,%xmm1 +.byte 102,72,15,110,199 leaq 64-0(%rsi),%rsi movq %rax,544+0(%rsp) @@ -1848,8 +1849,8 @@ ecp_nistz256_point_add: call __ecp_nistz256_sqr_montq pcmpeqd %xmm4,%xmm5 - pshufd $0xb1,%xmm3,%xmm4 - por %xmm3,%xmm4 + pshufd $0xb1,%xmm1,%xmm4 + por %xmm1,%xmm4 pshufd $0,%xmm5,%xmm5 pshufd $0x1e,%xmm4,%xmm3 por %xmm3,%xmm4 @@ -2032,6 +2033,7 @@ ecp_nistz256_point_add: + xorq %r11,%r11 addq %r12,%r12 leaq 96(%rsp),%rsi adcq %r13,%r13 @@ -2039,7 +2041,7 @@ ecp_nistz256_point_add: adcq %r8,%r8 adcq %r9,%r9 movq %r13,%rbp - sbbq %r11,%r11 + adcq $0,%r11 subq $-1,%r12 movq %r8,%rcx @@ -2047,15 +2049,15 @@ ecp_nistz256_point_add: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 - testq %r11,%r11 + sbbq $0,%r11 - cmovzq %rax,%r12 + cmovcq %rax,%r12 movq 0(%rsi),%rax - cmovzq %rbp,%r13 + cmovcq %rbp,%r13 movq 8(%rsi),%rbp - cmovzq %rcx,%r8 + cmovcq %rcx,%r8 movq 16(%rsi),%rcx - cmovzq %r10,%r9 + cmovcq %r10,%r9 movq 24(%rsi),%r10 call __ecp_nistz256_subq @@ -2213,16 +2215,14 @@ ecp_nistz256_point_add_affine: movq 64+24(%rsi),%r8 movdqa %xmm0,320(%rsp) movdqa %xmm1,320+16(%rsp) - por %xmm0,%xmm1 movdqa %xmm2,352(%rsp) movdqa %xmm3,352+16(%rsp) - por %xmm2,%xmm3 movdqa %xmm4,384(%rsp) movdqa %xmm5,384+16(%rsp) - por %xmm1,%xmm3 + por %xmm4,%xmm5 movdqu 0(%rbx),%xmm0 - pshufd $0xb1,%xmm3,%xmm5 + pshufd $0xb1,%xmm5,%xmm3 movdqu 16(%rbx),%xmm1 movdqu 32(%rbx),%xmm2 por %xmm3,%xmm5 @@ -2340,6 +2340,7 @@ ecp_nistz256_point_add_affine: + xorq %r11,%r11 addq %r12,%r12 leaq 192(%rsp),%rsi adcq %r13,%r13 @@ -2347,7 +2348,7 @@ ecp_nistz256_point_add_affine: adcq %r8,%r8 adcq %r9,%r9 movq %r13,%rbp - sbbq %r11,%r11 + adcq $0,%r11 subq $-1,%r12 movq %r8,%rcx @@ -2355,15 +2356,15 @@ ecp_nistz256_point_add_affine: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 - testq %r11,%r11 + sbbq $0,%r11 - cmovzq %rax,%r12 + cmovcq %rax,%r12 movq 0(%rsi),%rax - cmovzq %rbp,%r13 + cmovcq %rbp,%r13 movq 8(%rsi),%rbp - cmovzq %rcx,%r8 + cmovcq %rcx,%r8 movq 16(%rsi),%rcx - cmovzq %r10,%r9 + cmovcq %r10,%r9 movq 24(%rsi),%r10 call __ecp_nistz256_subq @@ -2510,14 +2511,14 @@ __ecp_nistz256_add_tox: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 + sbbq $0,%r11 - btq $0,%r11 - cmovncq %rax,%r12 - cmovncq %rbp,%r13 + cmovcq %rax,%r12 + cmovcq %rbp,%r13 movq %r12,0(%rdi) - cmovncq %rcx,%r8 + cmovcq %rcx,%r8 movq %r13,8(%rdi) - cmovncq %r10,%r9 + cmovcq %r10,%r9 movq %r8,16(%rdi) movq %r9,24(%rdi) @@ -2605,14 +2606,14 @@ __ecp_nistz256_mul_by_2x: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 + sbbq $0,%r11 - btq $0,%r11 - cmovncq %rax,%r12 - cmovncq %rbp,%r13 + cmovcq %rax,%r12 + cmovcq %rbp,%r13 movq %r12,0(%rdi) - cmovncq %rcx,%r8 + cmovcq %rcx,%r8 movq %r13,8(%rdi) - cmovncq %r10,%r9 + cmovcq %r10,%r9 movq %r8,16(%rdi) movq %r9,24(%rdi) @@ -2842,16 +2843,14 @@ ecp_nistz256_point_addx: movq %rdx,%rsi movdqa %xmm0,384(%rsp) movdqa %xmm1,384+16(%rsp) - por %xmm0,%xmm1 movdqa %xmm2,416(%rsp) movdqa %xmm3,416+16(%rsp) - por %xmm2,%xmm3 movdqa %xmm4,448(%rsp) movdqa %xmm5,448+16(%rsp) - por %xmm1,%xmm3 + por %xmm4,%xmm5 movdqu 0(%rsi),%xmm0 - pshufd $0xb1,%xmm3,%xmm5 + pshufd $0xb1,%xmm5,%xmm3 movdqu 16(%rsi),%xmm1 movdqu 32(%rsi),%xmm2 por %xmm3,%xmm5 @@ -2863,14 +2862,14 @@ ecp_nistz256_point_addx: movdqa %xmm0,480(%rsp) pshufd $0x1e,%xmm5,%xmm4 movdqa %xmm1,480+16(%rsp) - por %xmm0,%xmm1 -.byte 102,72,15,110,199 + movdqu 64(%rsi),%xmm0 + movdqu 80(%rsi),%xmm1 movdqa %xmm2,512(%rsp) movdqa %xmm3,512+16(%rsp) - por %xmm2,%xmm3 por %xmm4,%xmm5 pxor %xmm4,%xmm4 - por %xmm1,%xmm3 + por %xmm0,%xmm1 +.byte 102,72,15,110,199 leaq 64-128(%rsi),%rsi movq %rdx,544+0(%rsp) @@ -2881,8 +2880,8 @@ ecp_nistz256_point_addx: call __ecp_nistz256_sqr_montx pcmpeqd %xmm4,%xmm5 - pshufd $0xb1,%xmm3,%xmm4 - por %xmm3,%xmm4 + pshufd $0xb1,%xmm1,%xmm4 + por %xmm1,%xmm4 pshufd $0,%xmm5,%xmm5 pshufd $0x1e,%xmm4,%xmm3 por %xmm3,%xmm4 @@ -3065,6 +3064,7 @@ ecp_nistz256_point_addx: + xorq %r11,%r11 addq %r12,%r12 leaq 96(%rsp),%rsi adcq %r13,%r13 @@ -3072,7 +3072,7 @@ ecp_nistz256_point_addx: adcq %r8,%r8 adcq %r9,%r9 movq %r13,%rbp - sbbq %r11,%r11 + adcq $0,%r11 subq $-1,%r12 movq %r8,%rcx @@ -3080,15 +3080,15 @@ ecp_nistz256_point_addx: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 - testq %r11,%r11 + sbbq $0,%r11 - cmovzq %rax,%r12 + cmovcq %rax,%r12 movq 0(%rsi),%rax - cmovzq %rbp,%r13 + cmovcq %rbp,%r13 movq 8(%rsi),%rbp - cmovzq %rcx,%r8 + cmovcq %rcx,%r8 movq 16(%rsi),%rcx - cmovzq %r10,%r9 + cmovcq %r10,%r9 movq 24(%rsi),%r10 call __ecp_nistz256_subx @@ -3242,16 +3242,14 @@ ecp_nistz256_point_add_affinex: movq 64+24(%rsi),%r8 movdqa %xmm0,320(%rsp) movdqa %xmm1,320+16(%rsp) - por %xmm0,%xmm1 movdqa %xmm2,352(%rsp) movdqa %xmm3,352+16(%rsp) - por %xmm2,%xmm3 movdqa %xmm4,384(%rsp) movdqa %xmm5,384+16(%rsp) - por %xmm1,%xmm3 + por %xmm4,%xmm5 movdqu 0(%rbx),%xmm0 - pshufd $0xb1,%xmm3,%xmm5 + pshufd $0xb1,%xmm5,%xmm3 movdqu 16(%rbx),%xmm1 movdqu 32(%rbx),%xmm2 por %xmm3,%xmm5 @@ -3369,6 +3367,7 @@ ecp_nistz256_point_add_affinex: + xorq %r11,%r11 addq %r12,%r12 leaq 192(%rsp),%rsi adcq %r13,%r13 @@ -3376,7 +3375,7 @@ ecp_nistz256_point_add_affinex: adcq %r8,%r8 adcq %r9,%r9 movq %r13,%rbp - sbbq %r11,%r11 + adcq $0,%r11 subq $-1,%r12 movq %r8,%rcx @@ -3384,15 +3383,15 @@ ecp_nistz256_point_add_affinex: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 - testq %r11,%r11 + sbbq $0,%r11 - cmovzq %rax,%r12 + cmovcq %rax,%r12 movq 0(%rsi),%rax - cmovzq %rbp,%r13 + cmovcq %rbp,%r13 movq 8(%rsi),%rbp - cmovzq %rcx,%r8 + cmovcq %rcx,%r8 movq 16(%rsi),%rcx - cmovzq %r10,%r9 + cmovcq %r10,%r9 movq 24(%rsi),%r10 call __ecp_nistz256_subx diff --git a/deps/openssl/asm/x64-elf-gas/sha/sha1-x86_64.s b/deps/openssl/asm/x64-elf-gas/sha/sha1-x86_64.s index 22a031f368..195a148bb9 100644 --- a/deps/openssl/asm/x64-elf-gas/sha/sha1-x86_64.s +++ b/deps/openssl/asm/x64-elf-gas/sha/sha1-x86_64.s @@ -1263,9 +1263,9 @@ _shaext_shortcut: .align 16 .Loop_shaext: decq %rdx - leaq 64(%rsi),%rax + leaq 64(%rsi),%r8 paddd %xmm4,%xmm1 - cmovneq %rax,%rsi + cmovneq %r8,%rsi movdqa %xmm0,%xmm8 .byte 15,56,201,229 movdqa %xmm0,%xmm2 diff --git a/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont.s b/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont.s index bca9fbda7f..1d5e63892f 100644 --- a/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont.s +++ b/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont.s @@ -6,6 +6,8 @@ .p2align 4 _bn_mul_mont: + movl %r9d,%r9d + movq %rsp,%rax testl $3,%r9d jnz L$mul_enter cmpl $8,%r9d @@ -26,29 +28,36 @@ L$mul_enter: pushq %r14 pushq %r15 - movl %r9d,%r9d - leaq 2(%r9),%r10 + negq %r9 movq %rsp,%r11 - negq %r10 - leaq (%rsp,%r10,8),%rsp - andq $-1024,%rsp + leaq -16(%rsp,%r9,8),%r10 + negq %r9 + andq $-1024,%r10 - movq %r11,8(%rsp,%r9,8) -L$mul_body: - subq %rsp,%r11 + subq %r10,%r11 andq $-4096,%r11 + leaq (%r10,%r11,1),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja L$mul_page_walk + jmp L$mul_page_walk_done + +.p2align 4 L$mul_page_walk: - movq (%rsp,%r11,1),%r10 - subq $4096,%r11 -.byte 0x66,0x2e - jnc L$mul_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja L$mul_page_walk +L$mul_page_walk_done: + movq %rax,8(%rsp,%r9,8) +L$mul_body: movq %rdx,%r12 movq (%r8),%r8 movq (%r12),%rbx @@ -216,19 +225,21 @@ L$copy: movq 8(%rsp,%r9,8),%rsi movq $1,%rax - movq (%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp L$mul_epilogue: .byte 0xf3,0xc3 .p2align 4 bn_mul4x_mont: + movl %r9d,%r9d + movq %rsp,%rax L$mul4x_enter: andl $0x80100,%r11d cmpl $0x80100,%r11d @@ -240,23 +251,29 @@ L$mul4x_enter: pushq %r14 pushq %r15 - movl %r9d,%r9d - leaq 4(%r9),%r10 + negq %r9 movq %rsp,%r11 - negq %r10 - leaq (%rsp,%r10,8),%rsp - andq $-1024,%rsp + leaq -32(%rsp,%r9,8),%r10 + negq %r9 + andq $-1024,%r10 - movq %r11,8(%rsp,%r9,8) -L$mul4x_body: - subq %rsp,%r11 + subq %r10,%r11 andq $-4096,%r11 + leaq (%r10,%r11,1),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja L$mul4x_page_walk + jmp L$mul4x_page_walk_done + L$mul4x_page_walk: - movq (%rsp,%r11,1),%r10 - subq $4096,%r11 -.byte 0x2e - jnc L$mul4x_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja L$mul4x_page_walk +L$mul4x_page_walk_done: + movq %rax,8(%rsp,%r9,8) +L$mul4x_body: movq %rdi,16(%rsp,%r9,8) movq %rdx,%r12 movq (%r8),%r8 @@ -625,13 +642,13 @@ L$copy4x: movdqu %xmm2,16(%rdi,%r14,1) movq 8(%rsp,%r9,8),%rsi movq $1,%rax - movq (%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp L$mul4x_epilogue: .byte 0xf3,0xc3 @@ -641,14 +658,15 @@ L$mul4x_epilogue: .p2align 5 bn_sqr8x_mont: -L$sqr8x_enter: movq %rsp,%rax +L$sqr8x_enter: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 +L$sqr8x_prologue: movl %r9d,%r10d shll $3,%r9d @@ -661,33 +679,42 @@ L$sqr8x_enter: leaq -64(%rsp,%r9,2),%r11 + movq %rsp,%rbp movq (%r8),%r8 subq %rsi,%r11 andq $4095,%r11 cmpq %r11,%r10 jb L$sqr8x_sp_alt - subq %r11,%rsp - leaq -64(%rsp,%r9,2),%rsp + subq %r11,%rbp + leaq -64(%rbp,%r9,2),%rbp jmp L$sqr8x_sp_done .p2align 5 L$sqr8x_sp_alt: leaq 4096-64(,%r9,2),%r10 - leaq -64(%rsp,%r9,2),%rsp + leaq -64(%rbp,%r9,2),%rbp subq %r10,%r11 movq $0,%r10 cmovcq %r10,%r11 - subq %r11,%rsp + subq %r11,%rbp L$sqr8x_sp_done: - andq $-64,%rsp - movq %rax,%r11 - subq %rsp,%r11 + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$sqr8x_page_walk + jmp L$sqr8x_page_walk_done + +.p2align 4 L$sqr8x_page_walk: - movq (%rsp,%r11,1),%r10 - subq $4096,%r11 -.byte 0x2e - jnc L$sqr8x_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$sqr8x_page_walk +L$sqr8x_page_walk_done: movq %r9,%r10 negq %r9 @@ -800,30 +827,38 @@ L$sqr8x_epilogue: .p2align 5 bn_mulx4x_mont: -L$mulx4x_enter: movq %rsp,%rax +L$mulx4x_enter: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 +L$mulx4x_prologue: shll $3,%r9d -.byte 0x67 xorq %r10,%r10 subq %r9,%r10 movq (%r8),%r8 - leaq -72(%rsp,%r10,1),%rsp - andq $-128,%rsp - movq %rax,%r11 - subq %rsp,%r11 + leaq -72(%rsp,%r10,1),%rbp + andq $-128,%rbp + movq %rsp,%r11 + subq %rbp,%r11 andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$mulx4x_page_walk + jmp L$mulx4x_page_walk_done + +.p2align 4 L$mulx4x_page_walk: - movq (%rsp,%r11,1),%r10 - subq $4096,%r11 -.byte 0x66,0x2e - jnc L$mulx4x_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$mulx4x_page_walk +L$mulx4x_page_walk_done: leaq (%rdx,%r9,1),%r10 diff --git a/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s b/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s index 5d987a1229..878c31ffac 100644 --- a/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s +++ b/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s @@ -6,6 +6,8 @@ .p2align 6 _bn_mul_mont_gather5: + movl %r9d,%r9d + movq %rsp,%rax testl $7,%r9d jnz L$mul_enter movl _OPENSSL_ia32cap_P+8(%rip),%r11d @@ -13,10 +15,7 @@ _bn_mul_mont_gather5: .p2align 4 L$mul_enter: - movl %r9d,%r9d - movq %rsp,%rax movd 8(%rsp),%xmm5 - leaq L$inc(%rip),%r10 pushq %rbx pushq %rbp pushq %r12 @@ -24,26 +23,36 @@ L$mul_enter: pushq %r14 pushq %r15 - leaq 2(%r9),%r11 - negq %r11 - leaq -264(%rsp,%r11,8),%rsp - andq $-1024,%rsp + negq %r9 + movq %rsp,%r11 + leaq -280(%rsp,%r9,8),%r10 + negq %r9 + andq $-1024,%r10 - movq %rax,8(%rsp,%r9,8) -L$mul_body: - subq %rsp,%rax - andq $-4096,%rax + subq %r10,%r11 + andq $-4096,%r11 + leaq (%r10,%r11,1),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja L$mul_page_walk + jmp L$mul_page_walk_done + L$mul_page_walk: - movq (%rsp,%rax,1),%r11 - subq $4096,%rax -.byte 0x2e - jnc L$mul_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja L$mul_page_walk +L$mul_page_walk_done: + + leaq L$inc(%rip),%r10 + movq %rax,8(%rsp,%r9,8) +L$mul_body: leaq 128(%rdx),%r12 movdqa 0(%r10),%xmm0 @@ -414,18 +423,19 @@ L$mul_epilogue: .p2align 5 bn_mul4x_mont_gather5: +.byte 0x67 + movq %rsp,%rax L$mul4x_enter: andl $0x80108,%r11d cmpl $0x80108,%r11d je L$mulx4x_enter -.byte 0x67 - movq %rsp,%rax pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 +L$mul4x_prologue: .byte 0x67 shll $3,%r9d @@ -442,32 +452,40 @@ L$mul4x_enter: leaq -320(%rsp,%r9,2),%r11 + movq %rsp,%rbp subq %rdi,%r11 andq $4095,%r11 cmpq %r11,%r10 jb L$mul4xsp_alt - subq %r11,%rsp - leaq -320(%rsp,%r9,2),%rsp + subq %r11,%rbp + leaq -320(%rbp,%r9,2),%rbp jmp L$mul4xsp_done .p2align 5 L$mul4xsp_alt: leaq 4096-320(,%r9,2),%r10 - leaq -320(%rsp,%r9,2),%rsp + leaq -320(%rbp,%r9,2),%rbp subq %r10,%r11 movq $0,%r10 cmovcq %r10,%r11 - subq %r11,%rsp + subq %r11,%rbp L$mul4xsp_done: - andq $-64,%rsp - movq %rax,%r11 - subq %rsp,%r11 + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$mul4x_page_walk + jmp L$mul4x_page_walk_done + L$mul4x_page_walk: - movq (%rsp,%r11,1),%r10 - subq $4096,%r11 -.byte 0x2e - jnc L$mul4x_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$mul4x_page_walk +L$mul4x_page_walk_done: negq %r9 @@ -1019,17 +1037,18 @@ L$inner4x: .p2align 5 _bn_power5: + movq %rsp,%rax movl _OPENSSL_ia32cap_P+8(%rip),%r11d andl $0x80108,%r11d cmpl $0x80108,%r11d je L$powerx5_enter - movq %rsp,%rax pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 +L$power5_prologue: shll $3,%r9d leal (%r9,%r9,2),%r10d @@ -1044,32 +1063,40 @@ _bn_power5: leaq -320(%rsp,%r9,2),%r11 + movq %rsp,%rbp subq %rdi,%r11 andq $4095,%r11 cmpq %r11,%r10 jb L$pwr_sp_alt - subq %r11,%rsp - leaq -320(%rsp,%r9,2),%rsp + subq %r11,%rbp + leaq -320(%rbp,%r9,2),%rbp jmp L$pwr_sp_done .p2align 5 L$pwr_sp_alt: leaq 4096-320(,%r9,2),%r10 - leaq -320(%rsp,%r9,2),%rsp + leaq -320(%rbp,%r9,2),%rbp subq %r10,%r11 movq $0,%r10 cmovcq %r10,%r11 - subq %r11,%rsp + subq %r11,%rbp L$pwr_sp_done: - andq $-64,%rsp - movq %rax,%r11 - subq %rsp,%r11 + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$pwr_page_walk + jmp L$pwr_page_walk_done + L$pwr_page_walk: - movq (%rsp,%r11,1),%r10 - subq $4096,%r11 -.byte 0x2e - jnc L$pwr_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$pwr_page_walk +L$pwr_page_walk_done: movq %r9,%r10 negq %r9 @@ -1980,6 +2007,7 @@ bn_from_mont8x: pushq %r13 pushq %r14 pushq %r15 +L$from_prologue: shll $3,%r9d leaq (%r9,%r9,2),%r10 @@ -1994,32 +2022,40 @@ bn_from_mont8x: leaq -320(%rsp,%r9,2),%r11 + movq %rsp,%rbp subq %rdi,%r11 andq $4095,%r11 cmpq %r11,%r10 jb L$from_sp_alt - subq %r11,%rsp - leaq -320(%rsp,%r9,2),%rsp + subq %r11,%rbp + leaq -320(%rbp,%r9,2),%rbp jmp L$from_sp_done .p2align 5 L$from_sp_alt: leaq 4096-320(,%r9,2),%r10 - leaq -320(%rsp,%r9,2),%rsp + leaq -320(%rbp,%r9,2),%rbp subq %r10,%r11 movq $0,%r10 cmovcq %r10,%r11 - subq %r11,%rsp + subq %r11,%rbp L$from_sp_done: - andq $-64,%rsp - movq %rax,%r11 - subq %rsp,%r11 + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$from_page_walk + jmp L$from_page_walk_done + L$from_page_walk: - movq (%rsp,%r11,1),%r10 - subq $4096,%r11 -.byte 0x2e - jnc L$from_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$from_page_walk +L$from_page_walk_done: movq %r9,%r10 negq %r9 @@ -2113,14 +2149,15 @@ L$from_epilogue: .p2align 5 bn_mulx4x_mont_gather5: -L$mulx4x_enter: movq %rsp,%rax +L$mulx4x_enter: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 +L$mulx4x_prologue: shll $3,%r9d leaq (%r9,%r9,2),%r10 @@ -2137,31 +2174,39 @@ L$mulx4x_enter: leaq -320(%rsp,%r9,2),%r11 + movq %rsp,%rbp subq %rdi,%r11 andq $4095,%r11 cmpq %r11,%r10 jb L$mulx4xsp_alt - subq %r11,%rsp - leaq -320(%rsp,%r9,2),%rsp + subq %r11,%rbp + leaq -320(%rbp,%r9,2),%rbp jmp L$mulx4xsp_done L$mulx4xsp_alt: leaq 4096-320(,%r9,2),%r10 - leaq -320(%rsp,%r9,2),%rsp + leaq -320(%rbp,%r9,2),%rbp subq %r10,%r11 movq $0,%r10 cmovcq %r10,%r11 - subq %r11,%rsp + subq %r11,%rbp L$mulx4xsp_done: - andq $-64,%rsp - movq %rax,%r11 - subq %rsp,%r11 + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$mulx4x_page_walk + jmp L$mulx4x_page_walk_done + L$mulx4x_page_walk: - movq (%rsp,%r11,1),%r10 - subq $4096,%r11 -.byte 0x2e - jnc L$mulx4x_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$mulx4x_page_walk +L$mulx4x_page_walk_done: @@ -2619,14 +2664,15 @@ L$mulx4x_inner: .p2align 5 bn_powerx5: -L$powerx5_enter: movq %rsp,%rax +L$powerx5_enter: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 +L$powerx5_prologue: shll $3,%r9d leaq (%r9,%r9,2),%r10 @@ -2641,32 +2687,40 @@ L$powerx5_enter: leaq -320(%rsp,%r9,2),%r11 + movq %rsp,%rbp subq %rdi,%r11 andq $4095,%r11 cmpq %r11,%r10 jb L$pwrx_sp_alt - subq %r11,%rsp - leaq -320(%rsp,%r9,2),%rsp + subq %r11,%rbp + leaq -320(%rbp,%r9,2),%rbp jmp L$pwrx_sp_done .p2align 5 L$pwrx_sp_alt: leaq 4096-320(,%r9,2),%r10 - leaq -320(%rsp,%r9,2),%rsp + leaq -320(%rbp,%r9,2),%rbp subq %r10,%r11 movq $0,%r10 cmovcq %r10,%r11 - subq %r11,%rsp + subq %r11,%rbp L$pwrx_sp_done: - andq $-64,%rsp - movq %rax,%r11 - subq %rsp,%r11 + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$pwrx_page_walk + jmp L$pwrx_page_walk_done + L$pwrx_page_walk: - movq (%rsp,%r11,1),%r10 - subq $4096,%r11 -.byte 0x2e - jnc L$pwrx_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$pwrx_page_walk +L$pwrx_page_walk_done: movq %r9,%r10 negq %r9 diff --git a/deps/openssl/asm/x64-macosx-gas/ec/ecp_nistz256-x86_64.s b/deps/openssl/asm/x64-macosx-gas/ec/ecp_nistz256-x86_64.s index b90788f453..05b6d5be2c 100644 --- a/deps/openssl/asm/x64-macosx-gas/ec/ecp_nistz256-x86_64.s +++ b/deps/openssl/asm/x64-macosx-gas/ec/ecp_nistz256-x86_64.s @@ -27,6 +27,7 @@ _ecp_nistz256_mul_by_2: pushq %r13 movq 0(%rsi),%r8 + xorq %r13,%r13 movq 8(%rsi),%r9 addq %r8,%r8 movq 16(%rsi),%r10 @@ -37,7 +38,7 @@ _ecp_nistz256_mul_by_2: adcq %r10,%r10 adcq %r11,%r11 movq %r9,%rdx - sbbq %r13,%r13 + adcq $0,%r13 subq 0(%rsi),%r8 movq %r10,%rcx @@ -45,14 +46,14 @@ _ecp_nistz256_mul_by_2: sbbq 16(%rsi),%r10 movq %r11,%r12 sbbq 24(%rsi),%r11 - testq %r13,%r13 + sbbq $0,%r13 - cmovzq %rax,%r8 - cmovzq %rdx,%r9 + cmovcq %rax,%r8 + cmovcq %rdx,%r9 movq %r8,0(%rdi) - cmovzq %rcx,%r10 + cmovcq %rcx,%r10 movq %r9,8(%rdi) - cmovzq %r12,%r11 + cmovcq %r12,%r11 movq %r10,16(%rdi) movq %r11,24(%rdi) @@ -149,12 +150,12 @@ _ecp_nistz256_mul_by_3: sbbq $0,%r10 movq %r11,%r12 sbbq L$poly+24(%rip),%r11 - testq %r13,%r13 + sbbq $0,%r13 - cmovzq %rax,%r8 - cmovzq %rdx,%r9 - cmovzq %rcx,%r10 - cmovzq %r12,%r11 + cmovcq %rax,%r8 + cmovcq %rdx,%r9 + cmovcq %rcx,%r10 + cmovcq %r12,%r11 xorq %r13,%r13 addq 0(%rsi),%r8 @@ -171,14 +172,14 @@ _ecp_nistz256_mul_by_3: sbbq $0,%r10 movq %r11,%r12 sbbq L$poly+24(%rip),%r11 - testq %r13,%r13 + sbbq $0,%r13 - cmovzq %rax,%r8 - cmovzq %rdx,%r9 + cmovcq %rax,%r8 + cmovcq %rdx,%r9 movq %r8,0(%rdi) - cmovzq %rcx,%r10 + cmovcq %rcx,%r10 movq %r9,8(%rdi) - cmovzq %r12,%r11 + cmovcq %r12,%r11 movq %r10,16(%rdi) movq %r11,24(%rdi) @@ -217,14 +218,14 @@ _ecp_nistz256_add: sbbq 16(%rsi),%r10 movq %r11,%r12 sbbq 24(%rsi),%r11 - testq %r13,%r13 + sbbq $0,%r13 - cmovzq %rax,%r8 - cmovzq %rdx,%r9 + cmovcq %rax,%r8 + cmovcq %rdx,%r9 movq %r8,0(%rdi) - cmovzq %rcx,%r10 + cmovcq %rcx,%r10 movq %r9,8(%rdi) - cmovzq %r12,%r11 + cmovcq %r12,%r11 movq %r10,16(%rdi) movq %r11,24(%rdi) @@ -1461,13 +1462,14 @@ L$select_loop_avx2_w7: .p2align 5 __ecp_nistz256_add_toq: + xorq %r11,%r11 addq 0(%rbx),%r12 adcq 8(%rbx),%r13 movq %r12,%rax adcq 16(%rbx),%r8 adcq 24(%rbx),%r9 movq %r13,%rbp - sbbq %r11,%r11 + adcq $0,%r11 subq $-1,%r12 movq %r8,%rcx @@ -1475,14 +1477,14 @@ __ecp_nistz256_add_toq: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 - testq %r11,%r11 + sbbq $0,%r11 - cmovzq %rax,%r12 - cmovzq %rbp,%r13 + cmovcq %rax,%r12 + cmovcq %rbp,%r13 movq %r12,0(%rdi) - cmovzq %rcx,%r8 + cmovcq %rcx,%r8 movq %r13,8(%rdi) - cmovzq %r10,%r9 + cmovcq %r10,%r9 movq %r8,16(%rdi) movq %r9,24(%rdi) @@ -1550,13 +1552,14 @@ __ecp_nistz256_subq: .p2align 5 __ecp_nistz256_mul_by_2q: + xorq %r11,%r11 addq %r12,%r12 adcq %r13,%r13 movq %r12,%rax adcq %r8,%r8 adcq %r9,%r9 movq %r13,%rbp - sbbq %r11,%r11 + adcq $0,%r11 subq $-1,%r12 movq %r8,%rcx @@ -1564,14 +1567,14 @@ __ecp_nistz256_mul_by_2q: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 - testq %r11,%r11 + sbbq $0,%r11 - cmovzq %rax,%r12 - cmovzq %rbp,%r13 + cmovcq %rax,%r12 + cmovcq %rbp,%r13 movq %r12,0(%rdi) - cmovzq %rcx,%r8 + cmovcq %rcx,%r8 movq %r13,8(%rdi) - cmovzq %r10,%r9 + cmovcq %r10,%r9 movq %r8,16(%rdi) movq %r9,24(%rdi) @@ -1809,16 +1812,14 @@ _ecp_nistz256_point_add: movq %rdx,%rsi movdqa %xmm0,384(%rsp) movdqa %xmm1,384+16(%rsp) - por %xmm0,%xmm1 movdqa %xmm2,416(%rsp) movdqa %xmm3,416+16(%rsp) - por %xmm2,%xmm3 movdqa %xmm4,448(%rsp) movdqa %xmm5,448+16(%rsp) - por %xmm1,%xmm3 + por %xmm4,%xmm5 movdqu 0(%rsi),%xmm0 - pshufd $0xb1,%xmm3,%xmm5 + pshufd $0xb1,%xmm5,%xmm3 movdqu 16(%rsi),%xmm1 movdqu 32(%rsi),%xmm2 por %xmm3,%xmm5 @@ -1830,14 +1831,14 @@ _ecp_nistz256_point_add: movdqa %xmm0,480(%rsp) pshufd $0x1e,%xmm5,%xmm4 movdqa %xmm1,480+16(%rsp) - por %xmm0,%xmm1 -.byte 102,72,15,110,199 + movdqu 64(%rsi),%xmm0 + movdqu 80(%rsi),%xmm1 movdqa %xmm2,512(%rsp) movdqa %xmm3,512+16(%rsp) - por %xmm2,%xmm3 por %xmm4,%xmm5 pxor %xmm4,%xmm4 - por %xmm1,%xmm3 + por %xmm0,%xmm1 +.byte 102,72,15,110,199 leaq 64-0(%rsi),%rsi movq %rax,544+0(%rsp) @@ -1848,8 +1849,8 @@ _ecp_nistz256_point_add: call __ecp_nistz256_sqr_montq pcmpeqd %xmm4,%xmm5 - pshufd $0xb1,%xmm3,%xmm4 - por %xmm3,%xmm4 + pshufd $0xb1,%xmm1,%xmm4 + por %xmm1,%xmm4 pshufd $0,%xmm5,%xmm5 pshufd $0x1e,%xmm4,%xmm3 por %xmm3,%xmm4 @@ -2032,6 +2033,7 @@ L$add_proceedq: + xorq %r11,%r11 addq %r12,%r12 leaq 96(%rsp),%rsi adcq %r13,%r13 @@ -2039,7 +2041,7 @@ L$add_proceedq: adcq %r8,%r8 adcq %r9,%r9 movq %r13,%rbp - sbbq %r11,%r11 + adcq $0,%r11 subq $-1,%r12 movq %r8,%rcx @@ -2047,15 +2049,15 @@ L$add_proceedq: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 - testq %r11,%r11 + sbbq $0,%r11 - cmovzq %rax,%r12 + cmovcq %rax,%r12 movq 0(%rsi),%rax - cmovzq %rbp,%r13 + cmovcq %rbp,%r13 movq 8(%rsi),%rbp - cmovzq %rcx,%r8 + cmovcq %rcx,%r8 movq 16(%rsi),%rcx - cmovzq %r10,%r9 + cmovcq %r10,%r9 movq 24(%rsi),%r10 call __ecp_nistz256_subq @@ -2213,16 +2215,14 @@ _ecp_nistz256_point_add_affine: movq 64+24(%rsi),%r8 movdqa %xmm0,320(%rsp) movdqa %xmm1,320+16(%rsp) - por %xmm0,%xmm1 movdqa %xmm2,352(%rsp) movdqa %xmm3,352+16(%rsp) - por %xmm2,%xmm3 movdqa %xmm4,384(%rsp) movdqa %xmm5,384+16(%rsp) - por %xmm1,%xmm3 + por %xmm4,%xmm5 movdqu 0(%rbx),%xmm0 - pshufd $0xb1,%xmm3,%xmm5 + pshufd $0xb1,%xmm5,%xmm3 movdqu 16(%rbx),%xmm1 movdqu 32(%rbx),%xmm2 por %xmm3,%xmm5 @@ -2340,6 +2340,7 @@ _ecp_nistz256_point_add_affine: + xorq %r11,%r11 addq %r12,%r12 leaq 192(%rsp),%rsi adcq %r13,%r13 @@ -2347,7 +2348,7 @@ _ecp_nistz256_point_add_affine: adcq %r8,%r8 adcq %r9,%r9 movq %r13,%rbp - sbbq %r11,%r11 + adcq $0,%r11 subq $-1,%r12 movq %r8,%rcx @@ -2355,15 +2356,15 @@ _ecp_nistz256_point_add_affine: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 - testq %r11,%r11 + sbbq $0,%r11 - cmovzq %rax,%r12 + cmovcq %rax,%r12 movq 0(%rsi),%rax - cmovzq %rbp,%r13 + cmovcq %rbp,%r13 movq 8(%rsi),%rbp - cmovzq %rcx,%r8 + cmovcq %rcx,%r8 movq 16(%rsi),%rcx - cmovzq %r10,%r9 + cmovcq %r10,%r9 movq 24(%rsi),%r10 call __ecp_nistz256_subq @@ -2510,14 +2511,14 @@ __ecp_nistz256_add_tox: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 + sbbq $0,%r11 - btq $0,%r11 - cmovncq %rax,%r12 - cmovncq %rbp,%r13 + cmovcq %rax,%r12 + cmovcq %rbp,%r13 movq %r12,0(%rdi) - cmovncq %rcx,%r8 + cmovcq %rcx,%r8 movq %r13,8(%rdi) - cmovncq %r10,%r9 + cmovcq %r10,%r9 movq %r8,16(%rdi) movq %r9,24(%rdi) @@ -2605,14 +2606,14 @@ __ecp_nistz256_mul_by_2x: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 + sbbq $0,%r11 - btq $0,%r11 - cmovncq %rax,%r12 - cmovncq %rbp,%r13 + cmovcq %rax,%r12 + cmovcq %rbp,%r13 movq %r12,0(%rdi) - cmovncq %rcx,%r8 + cmovcq %rcx,%r8 movq %r13,8(%rdi) - cmovncq %r10,%r9 + cmovcq %r10,%r9 movq %r8,16(%rdi) movq %r9,24(%rdi) @@ -2842,16 +2843,14 @@ L$point_addx: movq %rdx,%rsi movdqa %xmm0,384(%rsp) movdqa %xmm1,384+16(%rsp) - por %xmm0,%xmm1 movdqa %xmm2,416(%rsp) movdqa %xmm3,416+16(%rsp) - por %xmm2,%xmm3 movdqa %xmm4,448(%rsp) movdqa %xmm5,448+16(%rsp) - por %xmm1,%xmm3 + por %xmm4,%xmm5 movdqu 0(%rsi),%xmm0 - pshufd $0xb1,%xmm3,%xmm5 + pshufd $0xb1,%xmm5,%xmm3 movdqu 16(%rsi),%xmm1 movdqu 32(%rsi),%xmm2 por %xmm3,%xmm5 @@ -2863,14 +2862,14 @@ L$point_addx: movdqa %xmm0,480(%rsp) pshufd $0x1e,%xmm5,%xmm4 movdqa %xmm1,480+16(%rsp) - por %xmm0,%xmm1 -.byte 102,72,15,110,199 + movdqu 64(%rsi),%xmm0 + movdqu 80(%rsi),%xmm1 movdqa %xmm2,512(%rsp) movdqa %xmm3,512+16(%rsp) - por %xmm2,%xmm3 por %xmm4,%xmm5 pxor %xmm4,%xmm4 - por %xmm1,%xmm3 + por %xmm0,%xmm1 +.byte 102,72,15,110,199 leaq 64-128(%rsi),%rsi movq %rdx,544+0(%rsp) @@ -2881,8 +2880,8 @@ L$point_addx: call __ecp_nistz256_sqr_montx pcmpeqd %xmm4,%xmm5 - pshufd $0xb1,%xmm3,%xmm4 - por %xmm3,%xmm4 + pshufd $0xb1,%xmm1,%xmm4 + por %xmm1,%xmm4 pshufd $0,%xmm5,%xmm5 pshufd $0x1e,%xmm4,%xmm3 por %xmm3,%xmm4 @@ -3065,6 +3064,7 @@ L$add_proceedx: + xorq %r11,%r11 addq %r12,%r12 leaq 96(%rsp),%rsi adcq %r13,%r13 @@ -3072,7 +3072,7 @@ L$add_proceedx: adcq %r8,%r8 adcq %r9,%r9 movq %r13,%rbp - sbbq %r11,%r11 + adcq $0,%r11 subq $-1,%r12 movq %r8,%rcx @@ -3080,15 +3080,15 @@ L$add_proceedx: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 - testq %r11,%r11 + sbbq $0,%r11 - cmovzq %rax,%r12 + cmovcq %rax,%r12 movq 0(%rsi),%rax - cmovzq %rbp,%r13 + cmovcq %rbp,%r13 movq 8(%rsi),%rbp - cmovzq %rcx,%r8 + cmovcq %rcx,%r8 movq 16(%rsi),%rcx - cmovzq %r10,%r9 + cmovcq %r10,%r9 movq 24(%rsi),%r10 call __ecp_nistz256_subx @@ -3242,16 +3242,14 @@ L$point_add_affinex: movq 64+24(%rsi),%r8 movdqa %xmm0,320(%rsp) movdqa %xmm1,320+16(%rsp) - por %xmm0,%xmm1 movdqa %xmm2,352(%rsp) movdqa %xmm3,352+16(%rsp) - por %xmm2,%xmm3 movdqa %xmm4,384(%rsp) movdqa %xmm5,384+16(%rsp) - por %xmm1,%xmm3 + por %xmm4,%xmm5 movdqu 0(%rbx),%xmm0 - pshufd $0xb1,%xmm3,%xmm5 + pshufd $0xb1,%xmm5,%xmm3 movdqu 16(%rbx),%xmm1 movdqu 32(%rbx),%xmm2 por %xmm3,%xmm5 @@ -3369,6 +3367,7 @@ L$point_add_affinex: + xorq %r11,%r11 addq %r12,%r12 leaq 192(%rsp),%rsi adcq %r13,%r13 @@ -3376,7 +3375,7 @@ L$point_add_affinex: adcq %r8,%r8 adcq %r9,%r9 movq %r13,%rbp - sbbq %r11,%r11 + adcq $0,%r11 subq $-1,%r12 movq %r8,%rcx @@ -3384,15 +3383,15 @@ L$point_add_affinex: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 - testq %r11,%r11 + sbbq $0,%r11 - cmovzq %rax,%r12 + cmovcq %rax,%r12 movq 0(%rsi),%rax - cmovzq %rbp,%r13 + cmovcq %rbp,%r13 movq 8(%rsi),%rbp - cmovzq %rcx,%r8 + cmovcq %rcx,%r8 movq 16(%rsi),%rcx - cmovzq %r10,%r9 + cmovcq %r10,%r9 movq 24(%rsi),%r10 call __ecp_nistz256_subx diff --git a/deps/openssl/asm/x64-macosx-gas/sha/sha1-x86_64.s b/deps/openssl/asm/x64-macosx-gas/sha/sha1-x86_64.s index c89ffe3df6..1c52e05e39 100644 --- a/deps/openssl/asm/x64-macosx-gas/sha/sha1-x86_64.s +++ b/deps/openssl/asm/x64-macosx-gas/sha/sha1-x86_64.s @@ -1263,9 +1263,9 @@ _shaext_shortcut: .p2align 4 L$oop_shaext: decq %rdx - leaq 64(%rsi),%rax + leaq 64(%rsi),%r8 paddd %xmm4,%xmm1 - cmovneq %rax,%rsi + cmovneq %r8,%rsi movdqa %xmm0,%xmm8 .byte 15,56,201,229 movdqa %xmm0,%xmm2 diff --git a/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont.asm b/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont.asm index ccc591b7c9..9693ed97e4 100644 --- a/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont.asm +++ b/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont.asm @@ -19,6 +19,8 @@ $L$SEH_begin_bn_mul_mont:: mov r9,QWORD PTR[48+rsp] + mov r9d,r9d + mov rax,rsp test r9d,3 jnz $L$mul_enter cmp r9d,8 @@ -39,29 +41,36 @@ $L$mul_enter:: push r14 push r15 - mov r9d,r9d - lea r10,QWORD PTR[2+r9] + neg r9 mov r11,rsp - neg r10 - lea rsp,QWORD PTR[r10*8+rsp] - and rsp,-1024 + lea r10,QWORD PTR[((-16))+r9*8+rsp] + neg r9 + and r10,-1024 - mov QWORD PTR[8+r9*8+rsp],r11 -$L$mul_body:: - sub r11,rsp + sub r11,r10 and r11,-4096 + lea rsp,QWORD PTR[r11*1+r10] + mov r11,QWORD PTR[rsp] + cmp rsp,r10 + ja $L$mul_page_walk + jmp $L$mul_page_walk_done + +ALIGN 16 $L$mul_page_walk:: - mov r10,QWORD PTR[r11*1+rsp] - sub r11,4096 -DB 066h,02eh - jnc $L$mul_page_walk + lea rsp,QWORD PTR[((-4096))+rsp] + mov r11,QWORD PTR[rsp] + cmp rsp,r10 + ja $L$mul_page_walk +$L$mul_page_walk_done:: + mov QWORD PTR[8+r9*8+rsp],rax +$L$mul_body:: mov r12,rdx mov r8,QWORD PTR[r8] mov rbx,QWORD PTR[r12] @@ -229,13 +238,13 @@ $L$copy:: mov rsi,QWORD PTR[8+r9*8+rsp] mov rax,1 - mov r15,QWORD PTR[rsi] - mov r14,QWORD PTR[8+rsi] - mov r13,QWORD PTR[16+rsi] - mov r12,QWORD PTR[24+rsi] - mov rbp,QWORD PTR[32+rsi] - mov rbx,QWORD PTR[40+rsi] - lea rsp,QWORD PTR[48+rsi] + mov r15,QWORD PTR[((-48))+rsi] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] $L$mul_epilogue:: mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue mov rsi,QWORD PTR[16+rsp] @@ -257,6 +266,8 @@ $L$SEH_begin_bn_mul4x_mont:: mov r9,QWORD PTR[48+rsp] + mov r9d,r9d + mov rax,rsp $L$mul4x_enter:: and r11d,080100h cmp r11d,080100h @@ -268,23 +279,29 @@ $L$mul4x_enter:: push r14 push r15 - mov r9d,r9d - lea r10,QWORD PTR[4+r9] + neg r9 mov r11,rsp - neg r10 - lea rsp,QWORD PTR[r10*8+rsp] - and rsp,-1024 + lea r10,QWORD PTR[((-32))+r9*8+rsp] + neg r9 + and r10,-1024 - mov QWORD PTR[8+r9*8+rsp],r11 -$L$mul4x_body:: - sub r11,rsp + sub r11,r10 and r11,-4096 + lea rsp,QWORD PTR[r11*1+r10] + mov r11,QWORD PTR[rsp] + cmp rsp,r10 + ja $L$mul4x_page_walk + jmp $L$mul4x_page_walk_done + $L$mul4x_page_walk:: - mov r10,QWORD PTR[r11*1+rsp] - sub r11,4096 -DB 02eh - jnc $L$mul4x_page_walk + lea rsp,QWORD PTR[((-4096))+rsp] + mov r11,QWORD PTR[rsp] + cmp rsp,r10 + ja $L$mul4x_page_walk +$L$mul4x_page_walk_done:: + mov QWORD PTR[8+r9*8+rsp],rax +$L$mul4x_body:: mov QWORD PTR[16+r9*8+rsp],rdi mov r12,rdx mov r8,QWORD PTR[r8] @@ -653,13 +670,13 @@ $L$copy4x:: movdqu XMMWORD PTR[16+r14*1+rdi],xmm2 mov rsi,QWORD PTR[8+r9*8+rsp] mov rax,1 - mov r15,QWORD PTR[rsi] - mov r14,QWORD PTR[8+rsi] - mov r13,QWORD PTR[16+rsi] - mov r12,QWORD PTR[24+rsi] - mov rbp,QWORD PTR[32+rsi] - mov rbx,QWORD PTR[40+rsi] - lea rsp,QWORD PTR[48+rsi] + mov r15,QWORD PTR[((-48))+rsi] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] $L$mul4x_epilogue:: mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue mov rsi,QWORD PTR[16+rsp] @@ -684,14 +701,15 @@ $L$SEH_begin_bn_sqr8x_mont:: mov r9,QWORD PTR[48+rsp] -$L$sqr8x_enter:: mov rax,rsp +$L$sqr8x_enter:: push rbx push rbp push r12 push r13 push r14 push r15 +$L$sqr8x_prologue:: mov r10d,r9d shl r9d,3 @@ -704,33 +722,42 @@ $L$sqr8x_enter:: lea r11,QWORD PTR[((-64))+r9*2+rsp] + mov rbp,rsp mov r8,QWORD PTR[r8] sub r11,rsi and r11,4095 cmp r10,r11 jb $L$sqr8x_sp_alt - sub rsp,r11 - lea rsp,QWORD PTR[((-64))+r9*2+rsp] + sub rbp,r11 + lea rbp,QWORD PTR[((-64))+r9*2+rbp] jmp $L$sqr8x_sp_done ALIGN 32 $L$sqr8x_sp_alt:: lea r10,QWORD PTR[((4096-64))+r9*2] - lea rsp,QWORD PTR[((-64))+r9*2+rsp] + lea rbp,QWORD PTR[((-64))+r9*2+rbp] sub r11,r10 mov r10,0 cmovc r11,r10 - sub rsp,r11 + sub rbp,r11 $L$sqr8x_sp_done:: - and rsp,-64 - mov r11,rax - sub r11,rsp + and rbp,-64 + mov r11,rsp + sub r11,rbp and r11,-4096 + lea rsp,QWORD PTR[rbp*1+r11] + mov r10,QWORD PTR[rsp] + cmp rsp,rbp + ja $L$sqr8x_page_walk + jmp $L$sqr8x_page_walk_done + +ALIGN 16 $L$sqr8x_page_walk:: - mov r10,QWORD PTR[r11*1+rsp] - sub r11,4096 -DB 02eh - jnc $L$sqr8x_page_walk + lea rsp,QWORD PTR[((-4096))+rsp] + mov r10,QWORD PTR[rsp] + cmp rsp,rbp + ja $L$sqr8x_page_walk +$L$sqr8x_page_walk_done:: mov r10,r9 neg r9 @@ -858,30 +885,38 @@ $L$SEH_begin_bn_mulx4x_mont:: mov r9,QWORD PTR[48+rsp] -$L$mulx4x_enter:: mov rax,rsp +$L$mulx4x_enter:: push rbx push rbp push r12 push r13 push r14 push r15 +$L$mulx4x_prologue:: shl r9d,3 -DB 067h xor r10,r10 sub r10,r9 mov r8,QWORD PTR[r8] - lea rsp,QWORD PTR[((-72))+r10*1+rsp] - and rsp,-128 - mov r11,rax - sub r11,rsp + lea rbp,QWORD PTR[((-72))+r10*1+rsp] + and rbp,-128 + mov r11,rsp + sub r11,rbp and r11,-4096 + lea rsp,QWORD PTR[rbp*1+r11] + mov r10,QWORD PTR[rsp] + cmp rsp,rbp + ja $L$mulx4x_page_walk + jmp $L$mulx4x_page_walk_done + +ALIGN 16 $L$mulx4x_page_walk:: - mov r10,QWORD PTR[r11*1+rsp] - sub r11,4096 -DB 066h,02eh - jnc $L$mulx4x_page_walk + lea rsp,QWORD PTR[((-4096))+rsp] + mov r10,QWORD PTR[rsp] + cmp rsp,rbp + ja $L$mulx4x_page_walk +$L$mulx4x_page_walk_done:: lea r10,QWORD PTR[r9*1+rdx] @@ -1230,22 +1265,8 @@ mul_handler PROC PRIVATE mov r10,QWORD PTR[192+r8] mov rax,QWORD PTR[8+r10*8+rax] - lea rax,QWORD PTR[48+rax] - - mov rbx,QWORD PTR[((-8))+rax] - mov rbp,QWORD PTR[((-16))+rax] - mov r12,QWORD PTR[((-24))+rax] - mov r13,QWORD PTR[((-32))+rax] - mov r14,QWORD PTR[((-40))+rax] - mov r15,QWORD PTR[((-48))+rax] - mov QWORD PTR[144+r8],rbx - mov QWORD PTR[160+r8],rbp - mov QWORD PTR[216+r8],r12 - mov QWORD PTR[224+r8],r13 - mov QWORD PTR[232+r8],r14 - mov QWORD PTR[240+r8],r15 - jmp $L$common_seh_tail + jmp $L$common_pop_regs mul_handler ENDP @@ -1273,15 +1294,21 @@ sqr_handler PROC PRIVATE cmp rbx,r10 jb $L$common_seh_tail + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_pop_regs + mov rax,QWORD PTR[152+r8] - mov r10d,DWORD PTR[4+r11] + mov r10d,DWORD PTR[8+r11] lea r10,QWORD PTR[r10*1+rsi] cmp rbx,r10 jae $L$common_seh_tail mov rax,QWORD PTR[40+rax] +$L$common_pop_regs:: mov rbx,QWORD PTR[((-8))+rax] mov rbp,QWORD PTR[((-16))+rax] mov r12,QWORD PTR[((-24))+rax] @@ -1366,11 +1393,13 @@ DB 9,0,0,0 $L$SEH_info_bn_sqr8x_mont:: DB 9,0,0,0 DD imagerel sqr_handler - DD imagerel $L$sqr8x_body,imagerel $L$sqr8x_epilogue + DD imagerel $L$sqr8x_prologue,imagerel $L$sqr8x_body,imagerel $L$sqr8x_epilogue +ALIGN 8 $L$SEH_info_bn_mulx4x_mont:: DB 9,0,0,0 DD imagerel sqr_handler - DD imagerel $L$mulx4x_body,imagerel $L$mulx4x_epilogue + DD imagerel $L$mulx4x_prologue,imagerel $L$mulx4x_body,imagerel $L$mulx4x_epilogue +ALIGN 8 .xdata ENDS END diff --git a/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm b/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm index 3c1a74afb9..6cd4f2d3d8 100644 --- a/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm +++ b/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm @@ -19,6 +19,8 @@ $L$SEH_begin_bn_mul_mont_gather5:: mov r9,QWORD PTR[48+rsp] + mov r9d,r9d + mov rax,rsp test r9d,7 jnz $L$mul_enter mov r11d,DWORD PTR[((OPENSSL_ia32cap_P+8))] @@ -26,10 +28,7 @@ $L$SEH_begin_bn_mul_mont_gather5:: ALIGN 16 $L$mul_enter:: - mov r9d,r9d - mov rax,rsp movd xmm5,DWORD PTR[56+rsp] - lea r10,QWORD PTR[$L$inc] push rbx push rbp push r12 @@ -37,26 +36,36 @@ $L$mul_enter:: push r14 push r15 - lea r11,QWORD PTR[2+r9] - neg r11 - lea rsp,QWORD PTR[((-264))+r11*8+rsp] - and rsp,-1024 + neg r9 + mov r11,rsp + lea r10,QWORD PTR[((-280))+r9*8+rsp] + neg r9 + and r10,-1024 + - mov QWORD PTR[8+r9*8+rsp],rax -$L$mul_body:: + sub r11,r10 + and r11,-4096 + lea rsp,QWORD PTR[r11*1+r10] + mov r11,QWORD PTR[rsp] + cmp rsp,r10 + ja $L$mul_page_walk + jmp $L$mul_page_walk_done - sub rax,rsp - and rax,-4096 $L$mul_page_walk:: - mov r11,QWORD PTR[rax*1+rsp] - sub rax,4096 -DB 02eh - jnc $L$mul_page_walk + lea rsp,QWORD PTR[((-4096))+rsp] + mov r11,QWORD PTR[rsp] + cmp rsp,r10 + ja $L$mul_page_walk +$L$mul_page_walk_done:: + + lea r10,QWORD PTR[$L$inc] + mov QWORD PTR[8+r9*8+rsp],rax +$L$mul_body:: lea r12,QWORD PTR[128+rdx] movdqa xmm0,XMMWORD PTR[r10] @@ -442,18 +451,19 @@ $L$SEH_begin_bn_mul4x_mont_gather5:: mov r9,QWORD PTR[48+rsp] +DB 067h + mov rax,rsp $L$mul4x_enter:: and r11d,080108h cmp r11d,080108h je $L$mulx4x_enter -DB 067h - mov rax,rsp push rbx push rbp push r12 push r13 push r14 push r15 +$L$mul4x_prologue:: DB 067h shl r9d,3 @@ -470,32 +480,40 @@ DB 067h lea r11,QWORD PTR[((-320))+r9*2+rsp] + mov rbp,rsp sub r11,rdi and r11,4095 cmp r10,r11 jb $L$mul4xsp_alt - sub rsp,r11 - lea rsp,QWORD PTR[((-320))+r9*2+rsp] + sub rbp,r11 + lea rbp,QWORD PTR[((-320))+r9*2+rbp] jmp $L$mul4xsp_done ALIGN 32 $L$mul4xsp_alt:: lea r10,QWORD PTR[((4096-320))+r9*2] - lea rsp,QWORD PTR[((-320))+r9*2+rsp] + lea rbp,QWORD PTR[((-320))+r9*2+rbp] sub r11,r10 mov r10,0 cmovc r11,r10 - sub rsp,r11 + sub rbp,r11 $L$mul4xsp_done:: - and rsp,-64 - mov r11,rax - sub r11,rsp + and rbp,-64 + mov r11,rsp + sub r11,rbp and r11,-4096 + lea rsp,QWORD PTR[rbp*1+r11] + mov r10,QWORD PTR[rsp] + cmp rsp,rbp + ja $L$mul4x_page_walk + jmp $L$mul4x_page_walk_done + $L$mul4x_page_walk:: - mov r10,QWORD PTR[r11*1+rsp] - sub r11,4096 -DB 02eh - jnc $L$mul4x_page_walk + lea rsp,QWORD PTR[((-4096))+rsp] + mov r10,QWORD PTR[rsp] + cmp rsp,rbp + ja $L$mul4x_page_walk +$L$mul4x_page_walk_done:: neg r9 @@ -1062,17 +1080,18 @@ $L$SEH_begin_bn_power5:: mov r9,QWORD PTR[48+rsp] + mov rax,rsp mov r11d,DWORD PTR[((OPENSSL_ia32cap_P+8))] and r11d,080108h cmp r11d,080108h je $L$powerx5_enter - mov rax,rsp push rbx push rbp push r12 push r13 push r14 push r15 +$L$power5_prologue:: shl r9d,3 lea r10d,DWORD PTR[r9*2+r9] @@ -1087,32 +1106,40 @@ $L$SEH_begin_bn_power5:: lea r11,QWORD PTR[((-320))+r9*2+rsp] + mov rbp,rsp sub r11,rdi and r11,4095 cmp r10,r11 jb $L$pwr_sp_alt - sub rsp,r11 - lea rsp,QWORD PTR[((-320))+r9*2+rsp] + sub rbp,r11 + lea rbp,QWORD PTR[((-320))+r9*2+rbp] jmp $L$pwr_sp_done ALIGN 32 $L$pwr_sp_alt:: lea r10,QWORD PTR[((4096-320))+r9*2] - lea rsp,QWORD PTR[((-320))+r9*2+rsp] + lea rbp,QWORD PTR[((-320))+r9*2+rbp] sub r11,r10 mov r10,0 cmovc r11,r10 - sub rsp,r11 + sub rbp,r11 $L$pwr_sp_done:: - and rsp,-64 - mov r11,rax - sub r11,rsp + and rbp,-64 + mov r11,rsp + sub r11,rbp and r11,-4096 + lea rsp,QWORD PTR[rbp*1+r11] + mov r10,QWORD PTR[rsp] + cmp rsp,rbp + ja $L$pwr_page_walk + jmp $L$pwr_page_walk_done + $L$pwr_page_walk:: - mov r10,QWORD PTR[r11*1+rsp] - sub r11,4096 -DB 02eh - jnc $L$pwr_page_walk + lea rsp,QWORD PTR[((-4096))+rsp] + mov r10,QWORD PTR[rsp] + cmp rsp,rbp + ja $L$pwr_page_walk +$L$pwr_page_walk_done:: mov r10,r9 neg r9 @@ -2038,6 +2065,7 @@ DB 067h push r13 push r14 push r15 +$L$from_prologue:: shl r9d,3 lea r10,QWORD PTR[r9*2+r9] @@ -2052,32 +2080,40 @@ DB 067h lea r11,QWORD PTR[((-320))+r9*2+rsp] + mov rbp,rsp sub r11,rdi and r11,4095 cmp r10,r11 jb $L$from_sp_alt - sub rsp,r11 - lea rsp,QWORD PTR[((-320))+r9*2+rsp] + sub rbp,r11 + lea rbp,QWORD PTR[((-320))+r9*2+rbp] jmp $L$from_sp_done ALIGN 32 $L$from_sp_alt:: lea r10,QWORD PTR[((4096-320))+r9*2] - lea rsp,QWORD PTR[((-320))+r9*2+rsp] + lea rbp,QWORD PTR[((-320))+r9*2+rbp] sub r11,r10 mov r10,0 cmovc r11,r10 - sub rsp,r11 + sub rbp,r11 $L$from_sp_done:: - and rsp,-64 - mov r11,rax - sub r11,rsp + and rbp,-64 + mov r11,rsp + sub r11,rbp and r11,-4096 + lea rsp,QWORD PTR[rbp*1+r11] + mov r10,QWORD PTR[rsp] + cmp rsp,rbp + ja $L$from_page_walk + jmp $L$from_page_walk_done + $L$from_page_walk:: - mov r10,QWORD PTR[r11*1+rsp] - sub r11,4096 -DB 02eh - jnc $L$from_page_walk + lea rsp,QWORD PTR[((-4096))+rsp] + mov r10,QWORD PTR[rsp] + cmp rsp,rbp + ja $L$from_page_walk +$L$from_page_walk_done:: mov r10,r9 neg r9 @@ -2186,14 +2222,15 @@ $L$SEH_begin_bn_mulx4x_mont_gather5:: mov r9,QWORD PTR[48+rsp] -$L$mulx4x_enter:: mov rax,rsp +$L$mulx4x_enter:: push rbx push rbp push r12 push r13 push r14 push r15 +$L$mulx4x_prologue:: shl r9d,3 lea r10,QWORD PTR[r9*2+r9] @@ -2210,31 +2247,39 @@ $L$mulx4x_enter:: lea r11,QWORD PTR[((-320))+r9*2+rsp] + mov rbp,rsp sub r11,rdi and r11,4095 cmp r10,r11 jb $L$mulx4xsp_alt - sub rsp,r11 - lea rsp,QWORD PTR[((-320))+r9*2+rsp] + sub rbp,r11 + lea rbp,QWORD PTR[((-320))+r9*2+rbp] jmp $L$mulx4xsp_done $L$mulx4xsp_alt:: lea r10,QWORD PTR[((4096-320))+r9*2] - lea rsp,QWORD PTR[((-320))+r9*2+rsp] + lea rbp,QWORD PTR[((-320))+r9*2+rbp] sub r11,r10 mov r10,0 cmovc r11,r10 - sub rsp,r11 + sub rbp,r11 $L$mulx4xsp_done:: - and rsp,-64 - mov r11,rax - sub r11,rsp + and rbp,-64 + mov r11,rsp + sub r11,rbp and r11,-4096 + lea rsp,QWORD PTR[rbp*1+r11] + mov r10,QWORD PTR[rsp] + cmp rsp,rbp + ja $L$mulx4x_page_walk + jmp $L$mulx4x_page_walk_done + $L$mulx4x_page_walk:: - mov r10,QWORD PTR[r11*1+rsp] - sub r11,4096 -DB 02eh - jnc $L$mulx4x_page_walk + lea rsp,QWORD PTR[((-4096))+rsp] + mov r10,QWORD PTR[rsp] + cmp rsp,rbp + ja $L$mulx4x_page_walk +$L$mulx4x_page_walk_done:: @@ -2707,14 +2752,15 @@ $L$SEH_begin_bn_powerx5:: mov r9,QWORD PTR[48+rsp] -$L$powerx5_enter:: mov rax,rsp +$L$powerx5_enter:: push rbx push rbp push r12 push r13 push r14 push r15 +$L$powerx5_prologue:: shl r9d,3 lea r10,QWORD PTR[r9*2+r9] @@ -2729,32 +2775,40 @@ $L$powerx5_enter:: lea r11,QWORD PTR[((-320))+r9*2+rsp] + mov rbp,rsp sub r11,rdi and r11,4095 cmp r10,r11 jb $L$pwrx_sp_alt - sub rsp,r11 - lea rsp,QWORD PTR[((-320))+r9*2+rsp] + sub rbp,r11 + lea rbp,QWORD PTR[((-320))+r9*2+rbp] jmp $L$pwrx_sp_done ALIGN 32 $L$pwrx_sp_alt:: lea r10,QWORD PTR[((4096-320))+r9*2] - lea rsp,QWORD PTR[((-320))+r9*2+rsp] + lea rbp,QWORD PTR[((-320))+r9*2+rbp] sub r11,r10 mov r10,0 cmovc r11,r10 - sub rsp,r11 + sub rbp,r11 $L$pwrx_sp_done:: - and rsp,-64 - mov r11,rax - sub r11,rsp + and rbp,-64 + mov r11,rsp + sub r11,rbp and r11,-4096 + lea rsp,QWORD PTR[rbp*1+r11] + mov r10,QWORD PTR[rsp] + cmp rsp,rbp + ja $L$pwrx_page_walk + jmp $L$pwrx_page_walk_done + $L$pwrx_page_walk:: - mov r10,QWORD PTR[r11*1+rsp] - sub r11,4096 -DB 02eh - jnc $L$pwrx_page_walk + lea rsp,QWORD PTR[((-4096))+rsp] + mov r10,QWORD PTR[rsp] + cmp rsp,rbp + ja $L$pwrx_page_walk +$L$pwrx_page_walk_done:: mov r10,r9 neg r9 @@ -3712,9 +3766,14 @@ mul_handler PROC PRIVATE cmp rbx,r10 jb $L$common_seh_tail + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_pop_regs + mov rax,QWORD PTR[152+r8] - mov r10d,DWORD PTR[4+r11] + mov r10d,DWORD PTR[8+r11] lea r10,QWORD PTR[r10*1+rsi] cmp rbx,r10 jae $L$common_seh_tail @@ -3726,11 +3785,11 @@ mul_handler PROC PRIVATE mov r10,QWORD PTR[192+r8] mov rax,QWORD PTR[8+r10*8+rax] - jmp $L$body_proceed + jmp $L$common_pop_regs $L$body_40:: mov rax,QWORD PTR[40+rax] -$L$body_proceed:: +$L$common_pop_regs:: mov rbx,QWORD PTR[((-8))+rax] mov rbp,QWORD PTR[((-16))+rax] mov r12,QWORD PTR[((-24))+rax] @@ -3819,32 +3878,32 @@ ALIGN 8 $L$SEH_info_bn_mul_mont_gather5:: DB 9,0,0,0 DD imagerel mul_handler - DD imagerel $L$mul_body,imagerel $L$mul_epilogue + DD imagerel $L$mul_body,imagerel $L$mul_body,imagerel $L$mul_epilogue ALIGN 8 $L$SEH_info_bn_mul4x_mont_gather5:: DB 9,0,0,0 DD imagerel mul_handler - DD imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue + DD imagerel $L$mul4x_prologue,imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue ALIGN 8 $L$SEH_info_bn_power5:: DB 9,0,0,0 DD imagerel mul_handler - DD imagerel $L$power5_body,imagerel $L$power5_epilogue + DD imagerel $L$power5_prologue,imagerel $L$power5_body,imagerel $L$power5_epilogue ALIGN 8 $L$SEH_info_bn_from_mont8x:: DB 9,0,0,0 DD imagerel mul_handler - DD imagerel $L$from_body,imagerel $L$from_epilogue + DD imagerel $L$from_prologue,imagerel $L$from_body,imagerel $L$from_epilogue ALIGN 8 $L$SEH_info_bn_mulx4x_mont_gather5:: DB 9,0,0,0 DD imagerel mul_handler - DD imagerel $L$mulx4x_body,imagerel $L$mulx4x_epilogue + DD imagerel $L$mulx4x_prologue,imagerel $L$mulx4x_body,imagerel $L$mulx4x_epilogue ALIGN 8 $L$SEH_info_bn_powerx5:: DB 9,0,0,0 DD imagerel mul_handler - DD imagerel $L$powerx5_body,imagerel $L$powerx5_epilogue + DD imagerel $L$powerx5_prologue,imagerel $L$powerx5_body,imagerel $L$powerx5_epilogue ALIGN 8 $L$SEH_info_bn_gather5:: DB 001h,00bh,003h,00ah diff --git a/deps/openssl/asm/x64-win32-masm/ec/ecp_nistz256-x86_64.asm b/deps/openssl/asm/x64-win32-masm/ec/ecp_nistz256-x86_64.asm index f38d253c16..7cf9277bfb 100644 --- a/deps/openssl/asm/x64-win32-masm/ec/ecp_nistz256-x86_64.asm +++ b/deps/openssl/asm/x64-win32-masm/ec/ecp_nistz256-x86_64.asm @@ -36,6 +36,7 @@ $L$SEH_begin_ecp_nistz256_mul_by_2:: push r13 mov r8,QWORD PTR[rsi] + xor r13,r13 mov r9,QWORD PTR[8+rsi] add r8,r8 mov r10,QWORD PTR[16+rsi] @@ -46,7 +47,7 @@ $L$SEH_begin_ecp_nistz256_mul_by_2:: adc r10,r10 adc r11,r11 mov rdx,r9 - sbb r13,r13 + adc r13,0 sub r8,QWORD PTR[rsi] mov rcx,r10 @@ -54,14 +55,14 @@ $L$SEH_begin_ecp_nistz256_mul_by_2:: sbb r10,QWORD PTR[16+rsi] mov r12,r11 sbb r11,QWORD PTR[24+rsi] - test r13,r13 + sbb r13,0 - cmovz r8,rax - cmovz r9,rdx + cmovc r8,rax + cmovc r9,rdx mov QWORD PTR[rdi],r8 - cmovz r10,rcx + cmovc r10,rcx mov QWORD PTR[8+rdi],r9 - cmovz r11,r12 + cmovc r11,r12 mov QWORD PTR[16+rdi],r10 mov QWORD PTR[24+rdi],r11 @@ -180,12 +181,12 @@ $L$SEH_begin_ecp_nistz256_mul_by_3:: sbb r10,0 mov r12,r11 sbb r11,QWORD PTR[(($L$poly+24))] - test r13,r13 + sbb r13,0 - cmovz r8,rax - cmovz r9,rdx - cmovz r10,rcx - cmovz r11,r12 + cmovc r8,rax + cmovc r9,rdx + cmovc r10,rcx + cmovc r11,r12 xor r13,r13 add r8,QWORD PTR[rsi] @@ -202,14 +203,14 @@ $L$SEH_begin_ecp_nistz256_mul_by_3:: sbb r10,0 mov r12,r11 sbb r11,QWORD PTR[(($L$poly+24))] - test r13,r13 + sbb r13,0 - cmovz r8,rax - cmovz r9,rdx + cmovc r8,rax + cmovc r9,rdx mov QWORD PTR[rdi],r8 - cmovz r10,rcx + cmovc r10,rcx mov QWORD PTR[8+rdi],r9 - cmovz r11,r12 + cmovc r11,r12 mov QWORD PTR[16+rdi],r10 mov QWORD PTR[24+rdi],r11 @@ -260,14 +261,14 @@ $L$SEH_begin_ecp_nistz256_add:: sbb r10,QWORD PTR[16+rsi] mov r12,r11 sbb r11,QWORD PTR[24+rsi] - test r13,r13 + sbb r13,0 - cmovz r8,rax - cmovz r9,rdx + cmovc r8,rax + cmovc r9,rdx mov QWORD PTR[rdi],r8 - cmovz r10,rcx + cmovc r10,rcx mov QWORD PTR[8+rdi],r9 - cmovz r11,r12 + cmovc r11,r12 mov QWORD PTR[16+rdi],r10 mov QWORD PTR[24+rdi],r11 @@ -1673,13 +1674,14 @@ ecp_nistz256_avx2_select_w7 ENDP ALIGN 32 __ecp_nistz256_add_toq PROC PRIVATE + xor r11,r11 add r12,QWORD PTR[rbx] adc r13,QWORD PTR[8+rbx] mov rax,r12 adc r8,QWORD PTR[16+rbx] adc r9,QWORD PTR[24+rbx] mov rbp,r13 - sbb r11,r11 + adc r11,0 sub r12,-1 mov rcx,r8 @@ -1687,14 +1689,14 @@ __ecp_nistz256_add_toq PROC PRIVATE sbb r8,0 mov r10,r9 sbb r9,r15 - test r11,r11 + sbb r11,0 - cmovz r12,rax - cmovz r13,rbp + cmovc r12,rax + cmovc r13,rbp mov QWORD PTR[rdi],r12 - cmovz r8,rcx + cmovc r8,rcx mov QWORD PTR[8+rdi],r13 - cmovz r9,r10 + cmovc r9,r10 mov QWORD PTR[16+rdi],r8 mov QWORD PTR[24+rdi],r9 @@ -1762,13 +1764,14 @@ __ecp_nistz256_subq ENDP ALIGN 32 __ecp_nistz256_mul_by_2q PROC PRIVATE + xor r11,r11 add r12,r12 adc r13,r13 mov rax,r12 adc r8,r8 adc r9,r9 mov rbp,r13 - sbb r11,r11 + adc r11,0 sub r12,-1 mov rcx,r8 @@ -1776,14 +1779,14 @@ __ecp_nistz256_mul_by_2q PROC PRIVATE sbb r8,0 mov r10,r9 sbb r9,r15 - test r11,r11 + sbb r11,0 - cmovz r12,rax - cmovz r13,rbp + cmovc r12,rax + cmovc r13,rbp mov QWORD PTR[rdi],r12 - cmovz r8,rcx + cmovc r8,rcx mov QWORD PTR[8+rdi],r13 - cmovz r9,r10 + cmovc r9,r10 mov QWORD PTR[16+rdi],r8 mov QWORD PTR[24+rdi],r9 @@ -2041,16 +2044,14 @@ $L$SEH_begin_ecp_nistz256_point_add:: mov rsi,rdx movdqa XMMWORD PTR[384+rsp],xmm0 movdqa XMMWORD PTR[(384+16)+rsp],xmm1 - por xmm1,xmm0 movdqa XMMWORD PTR[416+rsp],xmm2 movdqa XMMWORD PTR[(416+16)+rsp],xmm3 - por xmm3,xmm2 movdqa XMMWORD PTR[448+rsp],xmm4 movdqa XMMWORD PTR[(448+16)+rsp],xmm5 - por xmm3,xmm1 + por xmm5,xmm4 movdqu xmm0,XMMWORD PTR[rsi] - pshufd xmm5,xmm3,1h + pshufd xmm3,xmm5,1h movdqu xmm1,XMMWORD PTR[16+rsi] movdqu xmm2,XMMWORD PTR[32+rsi] por xmm5,xmm3 @@ -2062,14 +2063,14 @@ $L$SEH_begin_ecp_nistz256_point_add:: movdqa XMMWORD PTR[480+rsp],xmm0 pshufd xmm4,xmm5,01eh movdqa XMMWORD PTR[(480+16)+rsp],xmm1 - por xmm1,xmm0 -DB 102,72,15,110,199 + movdqu xmm0,XMMWORD PTR[64+rsi] + movdqu xmm1,XMMWORD PTR[80+rsi] movdqa XMMWORD PTR[512+rsp],xmm2 movdqa XMMWORD PTR[(512+16)+rsp],xmm3 - por xmm3,xmm2 por xmm5,xmm4 pxor xmm4,xmm4 - por xmm3,xmm1 + por xmm1,xmm0 +DB 102,72,15,110,199 lea rsi,QWORD PTR[((64-0))+rsi] mov QWORD PTR[((544+0))+rsp],rax @@ -2080,8 +2081,8 @@ DB 102,72,15,110,199 call __ecp_nistz256_sqr_montq pcmpeqd xmm5,xmm4 - pshufd xmm4,xmm3,1h - por xmm4,xmm3 + pshufd xmm4,xmm1,1h + por xmm4,xmm1 pshufd xmm5,xmm5,0 pshufd xmm3,xmm4,01eh por xmm4,xmm3 @@ -2264,6 +2265,7 @@ $L$add_proceedq:: + xor r11,r11 add r12,r12 lea rsi,QWORD PTR[96+rsp] adc r13,r13 @@ -2271,7 +2273,7 @@ $L$add_proceedq:: adc r8,r8 adc r9,r9 mov rbp,r13 - sbb r11,r11 + adc r11,0 sub r12,-1 mov rcx,r8 @@ -2279,15 +2281,15 @@ $L$add_proceedq:: sbb r8,0 mov r10,r9 sbb r9,r15 - test r11,r11 + sbb r11,0 - cmovz r12,rax + cmovc r12,rax mov rax,QWORD PTR[rsi] - cmovz r13,rbp + cmovc r13,rbp mov rbp,QWORD PTR[8+rsi] - cmovz r8,rcx + cmovc r8,rcx mov rcx,QWORD PTR[16+rsi] - cmovz r9,r10 + cmovc r9,r10 mov r10,QWORD PTR[24+rsi] call __ecp_nistz256_subq @@ -2457,16 +2459,14 @@ $L$SEH_begin_ecp_nistz256_point_add_affine:: mov r8,QWORD PTR[((64+24))+rsi] movdqa XMMWORD PTR[320+rsp],xmm0 movdqa XMMWORD PTR[(320+16)+rsp],xmm1 - por xmm1,xmm0 movdqa XMMWORD PTR[352+rsp],xmm2 movdqa XMMWORD PTR[(352+16)+rsp],xmm3 - por xmm3,xmm2 movdqa XMMWORD PTR[384+rsp],xmm4 movdqa XMMWORD PTR[(384+16)+rsp],xmm5 - por xmm3,xmm1 + por xmm5,xmm4 movdqu xmm0,XMMWORD PTR[rbx] - pshufd xmm5,xmm3,1h + pshufd xmm3,xmm5,1h movdqu xmm1,XMMWORD PTR[16+rbx] movdqu xmm2,XMMWORD PTR[32+rbx] por xmm5,xmm3 @@ -2584,6 +2584,7 @@ DB 102,72,15,110,199 + xor r11,r11 add r12,r12 lea rsi,QWORD PTR[192+rsp] adc r13,r13 @@ -2591,7 +2592,7 @@ DB 102,72,15,110,199 adc r8,r8 adc r9,r9 mov rbp,r13 - sbb r11,r11 + adc r11,0 sub r12,-1 mov rcx,r8 @@ -2599,15 +2600,15 @@ DB 102,72,15,110,199 sbb r8,0 mov r10,r9 sbb r9,r15 - test r11,r11 + sbb r11,0 - cmovz r12,rax + cmovc r12,rax mov rax,QWORD PTR[rsi] - cmovz r13,rbp + cmovc r13,rbp mov rbp,QWORD PTR[8+rsi] - cmovz r8,rcx + cmovc r8,rcx mov rcx,QWORD PTR[16+rsi] - cmovz r9,r10 + cmovc r9,r10 mov r10,QWORD PTR[24+rsi] call __ecp_nistz256_subq @@ -2757,14 +2758,14 @@ __ecp_nistz256_add_tox PROC PRIVATE sbb r8,0 mov r10,r9 sbb r9,r15 + sbb r11,0 - bt r11,0 - cmovnc r12,rax - cmovnc r13,rbp + cmovc r12,rax + cmovc r13,rbp mov QWORD PTR[rdi],r12 - cmovnc r8,rcx + cmovc r8,rcx mov QWORD PTR[8+rdi],r13 - cmovnc r9,r10 + cmovc r9,r10 mov QWORD PTR[16+rdi],r8 mov QWORD PTR[24+rdi],r9 @@ -2852,14 +2853,14 @@ __ecp_nistz256_mul_by_2x PROC PRIVATE sbb r8,0 mov r10,r9 sbb r9,r15 + sbb r11,0 - bt r11,0 - cmovnc r12,rax - cmovnc r13,rbp + cmovc r12,rax + cmovc r13,rbp mov QWORD PTR[rdi],r12 - cmovnc r8,rcx + cmovc r8,rcx mov QWORD PTR[8+rdi],r13 - cmovnc r9,r10 + cmovc r9,r10 mov QWORD PTR[16+rdi],r8 mov QWORD PTR[24+rdi],r9 @@ -3109,16 +3110,14 @@ $L$point_addx:: mov rsi,rdx movdqa XMMWORD PTR[384+rsp],xmm0 movdqa XMMWORD PTR[(384+16)+rsp],xmm1 - por xmm1,xmm0 movdqa XMMWORD PTR[416+rsp],xmm2 movdqa XMMWORD PTR[(416+16)+rsp],xmm3 - por xmm3,xmm2 movdqa XMMWORD PTR[448+rsp],xmm4 movdqa XMMWORD PTR[(448+16)+rsp],xmm5 - por xmm3,xmm1 + por xmm5,xmm4 movdqu xmm0,XMMWORD PTR[rsi] - pshufd xmm5,xmm3,1h + pshufd xmm3,xmm5,1h movdqu xmm1,XMMWORD PTR[16+rsi] movdqu xmm2,XMMWORD PTR[32+rsi] por xmm5,xmm3 @@ -3130,14 +3129,14 @@ $L$point_addx:: movdqa XMMWORD PTR[480+rsp],xmm0 pshufd xmm4,xmm5,01eh movdqa XMMWORD PTR[(480+16)+rsp],xmm1 - por xmm1,xmm0 -DB 102,72,15,110,199 + movdqu xmm0,XMMWORD PTR[64+rsi] + movdqu xmm1,XMMWORD PTR[80+rsi] movdqa XMMWORD PTR[512+rsp],xmm2 movdqa XMMWORD PTR[(512+16)+rsp],xmm3 - por xmm3,xmm2 por xmm5,xmm4 pxor xmm4,xmm4 - por xmm3,xmm1 + por xmm1,xmm0 +DB 102,72,15,110,199 lea rsi,QWORD PTR[((64-128))+rsi] mov QWORD PTR[((544+0))+rsp],rdx @@ -3148,8 +3147,8 @@ DB 102,72,15,110,199 call __ecp_nistz256_sqr_montx pcmpeqd xmm5,xmm4 - pshufd xmm4,xmm3,1h - por xmm4,xmm3 + pshufd xmm4,xmm1,1h + por xmm4,xmm1 pshufd xmm5,xmm5,0 pshufd xmm3,xmm4,01eh por xmm4,xmm3 @@ -3332,6 +3331,7 @@ $L$add_proceedx:: + xor r11,r11 add r12,r12 lea rsi,QWORD PTR[96+rsp] adc r13,r13 @@ -3339,7 +3339,7 @@ $L$add_proceedx:: adc r8,r8 adc r9,r9 mov rbp,r13 - sbb r11,r11 + adc r11,0 sub r12,-1 mov rcx,r8 @@ -3347,15 +3347,15 @@ $L$add_proceedx:: sbb r8,0 mov r10,r9 sbb r9,r15 - test r11,r11 + sbb r11,0 - cmovz r12,rax + cmovc r12,rax mov rax,QWORD PTR[rsi] - cmovz r13,rbp + cmovc r13,rbp mov rbp,QWORD PTR[8+rsi] - cmovz r8,rcx + cmovc r8,rcx mov rcx,QWORD PTR[16+rsi] - cmovz r9,r10 + cmovc r9,r10 mov r10,QWORD PTR[24+rsi] call __ecp_nistz256_subx @@ -3521,16 +3521,14 @@ $L$point_add_affinex:: mov r8,QWORD PTR[((64+24))+rsi] movdqa XMMWORD PTR[320+rsp],xmm0 movdqa XMMWORD PTR[(320+16)+rsp],xmm1 - por xmm1,xmm0 movdqa XMMWORD PTR[352+rsp],xmm2 movdqa XMMWORD PTR[(352+16)+rsp],xmm3 - por xmm3,xmm2 movdqa XMMWORD PTR[384+rsp],xmm4 movdqa XMMWORD PTR[(384+16)+rsp],xmm5 - por xmm3,xmm1 + por xmm5,xmm4 movdqu xmm0,XMMWORD PTR[rbx] - pshufd xmm5,xmm3,1h + pshufd xmm3,xmm5,1h movdqu xmm1,XMMWORD PTR[16+rbx] movdqu xmm2,XMMWORD PTR[32+rbx] por xmm5,xmm3 @@ -3648,6 +3646,7 @@ DB 102,72,15,110,199 + xor r11,r11 add r12,r12 lea rsi,QWORD PTR[192+rsp] adc r13,r13 @@ -3655,7 +3654,7 @@ DB 102,72,15,110,199 adc r8,r8 adc r9,r9 mov rbp,r13 - sbb r11,r11 + adc r11,0 sub r12,-1 mov rcx,r8 @@ -3663,15 +3662,15 @@ DB 102,72,15,110,199 sbb r8,0 mov r10,r9 sbb r9,r15 - test r11,r11 + sbb r11,0 - cmovz r12,rax + cmovc r12,rax mov rax,QWORD PTR[rsi] - cmovz r13,rbp + cmovc r13,rbp mov rbp,QWORD PTR[8+rsi] - cmovz r8,rcx + cmovc r8,rcx mov rcx,QWORD PTR[16+rsi] - cmovz r9,r10 + cmovc r9,r10 mov r10,QWORD PTR[24+rsi] call __ecp_nistz256_subx diff --git a/deps/openssl/asm/x64-win32-masm/sha/sha1-x86_64.asm b/deps/openssl/asm/x64-win32-masm/sha/sha1-x86_64.asm index 295a2c06ba..24df468123 100644 --- a/deps/openssl/asm/x64-win32-masm/sha/sha1-x86_64.asm +++ b/deps/openssl/asm/x64-win32-masm/sha/sha1-x86_64.asm @@ -1291,9 +1291,9 @@ DB 102,15,56,0,251 ALIGN 16 $L$oop_shaext:: dec rdx - lea rax,QWORD PTR[64+rsi] + lea r8,QWORD PTR[64+rsi] paddd xmm1,xmm4 - cmovne rsi,rax + cmovne rsi,r8 movdqa xmm8,xmm0 DB 15,56,201,229 movdqa xmm2,xmm0 diff --git a/deps/openssl/asm/x86-elf-gas/bn/x86-mont.s b/deps/openssl/asm/x86-elf-gas/bn/x86-mont.s index 2f7211d92e..b683577231 100644 --- a/deps/openssl/asm/x86-elf-gas/bn/x86-mont.s +++ b/deps/openssl/asm/x86-elf-gas/bn/x86-mont.s @@ -15,44 +15,51 @@ bn_mul_mont: jl .L000just_leave leal 20(%esp),%esi leal 24(%esp),%edx - movl %esp,%ebp addl $2,%edi negl %edi - leal -32(%esp,%edi,4),%esp + leal -32(%esp,%edi,4),%ebp negl %edi - movl %esp,%eax + movl %ebp,%eax subl %edx,%eax andl $2047,%eax - subl %eax,%esp - xorl %esp,%edx + subl %eax,%ebp + xorl %ebp,%edx andl $2048,%edx xorl $2048,%edx - subl %edx,%esp - andl $-64,%esp - movl %ebp,%eax - subl %esp,%eax + subl %edx,%ebp + andl $-64,%ebp + movl %esp,%eax + subl %ebp,%eax andl $-4096,%eax + movl %esp,%edx + leal (%ebp,%eax,1),%esp + movl (%esp),%eax + cmpl %ebp,%esp + ja .L001page_walk + jmp .L002page_walk_done +.align 16 .L001page_walk: - movl (%esp,%eax,1),%edx - subl $4096,%eax -.byte 46 - jnc .L001page_walk + leal -4096(%esp),%esp + movl (%esp),%eax + cmpl %ebp,%esp + ja .L001page_walk +.L002page_walk_done: movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx - movl 12(%esi),%edx + movl 12(%esi),%ebp movl 16(%esi),%esi movl (%esi),%esi movl %eax,4(%esp) movl %ebx,8(%esp) movl %ecx,12(%esp) - movl %edx,16(%esp) + movl %ebp,16(%esp) movl %esi,20(%esp) leal -3(%edi),%ebx - movl %ebp,24(%esp) + movl %edx,24(%esp) leal OPENSSL_ia32cap_P,%eax btl $26,(%eax) - jnc .L002non_sse2 + jnc .L003non_sse2 movl $-1,%eax movd %eax,%mm7 movl 8(%esp),%esi @@ -76,7 +83,7 @@ bn_mul_mont: psrlq $32,%mm3 incl %ecx .align 16 -.L0031st: +.L0041st: pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 @@ -91,7 +98,7 @@ bn_mul_mont: psrlq $32,%mm3 leal 1(%ecx),%ecx cmpl %ebx,%ecx - jl .L0031st + jl .L0041st pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 @@ -105,7 +112,7 @@ bn_mul_mont: paddq %mm2,%mm3 movq %mm3,32(%esp,%ebx,4) incl %edx -.L004outer: +.L005outer: xorl %ecx,%ecx movd (%edi,%edx,4),%mm4 movd (%esi),%mm5 @@ -127,7 +134,7 @@ bn_mul_mont: paddq %mm6,%mm2 incl %ecx decl %ebx -.L005inner: +.L006inner: pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 @@ -144,7 +151,7 @@ bn_mul_mont: paddq %mm6,%mm2 decl %ebx leal 1(%ecx),%ecx - jnz .L005inner + jnz .L006inner movl %ecx,%ebx pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 @@ -162,11 +169,11 @@ bn_mul_mont: movq %mm3,32(%esp,%ebx,4) leal 1(%edx),%edx cmpl %ebx,%edx - jle .L004outer + jle .L005outer emms - jmp .L006common_tail + jmp .L007common_tail .align 16 -.L002non_sse2: +.L003non_sse2: movl 8(%esp),%esi leal 1(%ebx),%ebp movl 12(%esp),%edi @@ -177,12 +184,12 @@ bn_mul_mont: leal 4(%edi,%ebx,4),%eax orl %edx,%ebp movl (%edi),%edi - jz .L007bn_sqr_mont + jz .L008bn_sqr_mont movl %eax,28(%esp) movl (%esi),%eax xorl %edx,%edx .align 16 -.L008mull: +.L009mull: movl %edx,%ebp mull %edi addl %eax,%ebp @@ -191,7 +198,7 @@ bn_mul_mont: movl (%esi,%ecx,4),%eax cmpl %ebx,%ecx movl %ebp,28(%esp,%ecx,4) - jl .L008mull + jl .L009mull movl %edx,%ebp mull %edi movl 20(%esp),%edi @@ -209,9 +216,9 @@ bn_mul_mont: movl 4(%esi),%eax adcl $0,%edx incl %ecx - jmp .L0092ndmadd + jmp .L0102ndmadd .align 16 -.L0101stmadd: +.L0111stmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -222,7 +229,7 @@ bn_mul_mont: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,28(%esp,%ecx,4) - jl .L0101stmadd + jl .L0111stmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%eax @@ -245,7 +252,7 @@ bn_mul_mont: adcl $0,%edx movl $1,%ecx .align 16 -.L0092ndmadd: +.L0102ndmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -256,7 +263,7 @@ bn_mul_mont: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,24(%esp,%ecx,4) - jl .L0092ndmadd + jl .L0102ndmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%ebp @@ -272,16 +279,16 @@ bn_mul_mont: movl %edx,32(%esp,%ebx,4) cmpl 28(%esp),%ecx movl %eax,36(%esp,%ebx,4) - je .L006common_tail + je .L007common_tail movl (%ecx),%edi movl 8(%esp),%esi movl %ecx,12(%esp) xorl %ecx,%ecx xorl %edx,%edx movl (%esi),%eax - jmp .L0101stmadd + jmp .L0111stmadd .align 16 -.L007bn_sqr_mont: +.L008bn_sqr_mont: movl %ebx,(%esp) movl %ecx,12(%esp) movl %edi,%eax @@ -292,7 +299,7 @@ bn_mul_mont: andl $1,%ebx incl %ecx .align 16 -.L011sqr: +.L012sqr: movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -304,7 +311,7 @@ bn_mul_mont: cmpl (%esp),%ecx movl %eax,%ebx movl %ebp,28(%esp,%ecx,4) - jl .L011sqr + jl .L012sqr movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -328,7 +335,7 @@ bn_mul_mont: movl 4(%esi),%eax movl $1,%ecx .align 16 -.L0123rdmadd: +.L0133rdmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -347,7 +354,7 @@ bn_mul_mont: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,24(%esp,%ecx,4) - jl .L0123rdmadd + jl .L0133rdmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%ebp @@ -363,7 +370,7 @@ bn_mul_mont: movl %edx,32(%esp,%ebx,4) cmpl %ebx,%ecx movl %eax,36(%esp,%ebx,4) - je .L006common_tail + je .L007common_tail movl 4(%esi,%ecx,4),%edi leal 1(%ecx),%ecx movl %edi,%eax @@ -375,12 +382,12 @@ bn_mul_mont: xorl %ebp,%ebp cmpl %ebx,%ecx leal 1(%ecx),%ecx - je .L013sqrlast + je .L014sqrlast movl %edx,%ebx shrl $1,%edx andl $1,%ebx .align 16 -.L014sqradd: +.L015sqradd: movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -396,13 +403,13 @@ bn_mul_mont: cmpl (%esp),%ecx movl %ebp,28(%esp,%ecx,4) movl %eax,%ebx - jle .L014sqradd + jle .L015sqradd movl %edx,%ebp addl %edx,%edx shrl $31,%ebp addl %ebx,%edx adcl $0,%ebp -.L013sqrlast: +.L014sqrlast: movl 20(%esp),%edi movl 16(%esp),%esi imull 32(%esp),%edi @@ -417,9 +424,9 @@ bn_mul_mont: adcl $0,%edx movl $1,%ecx movl 4(%esi),%eax - jmp .L0123rdmadd + jmp .L0133rdmadd .align 16 -.L006common_tail: +.L007common_tail: movl 16(%esp),%ebp movl 4(%esp),%edi leal 32(%esp),%esi @@ -427,13 +434,13 @@ bn_mul_mont: movl %ebx,%ecx xorl %edx,%edx .align 16 -.L015sub: +.L016sub: sbbl (%ebp,%edx,4),%eax movl %eax,(%edi,%edx,4) decl %ecx movl 4(%esi,%edx,4),%eax leal 1(%edx),%edx - jge .L015sub + jge .L016sub sbbl $0,%eax andl %eax,%esi notl %eax @@ -441,12 +448,12 @@ bn_mul_mont: andl %eax,%ebp orl %ebp,%esi .align 16 -.L016copy: +.L017copy: movl (%esi,%ebx,4),%eax movl %eax,(%edi,%ebx,4) movl %ecx,32(%esp,%ebx,4) decl %ebx - jge .L016copy + jge .L017copy movl 24(%esp),%esp movl $1,%eax .L000just_leave: diff --git a/deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s b/deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s index accec0e519..7bc58d24e0 100644 --- a/deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s +++ b/deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s @@ -14,47 +14,54 @@ L_bn_mul_mont_begin: jl L000just_leave leal 20(%esp),%esi leal 24(%esp),%edx - movl %esp,%ebp addl $2,%edi negl %edi - leal -32(%esp,%edi,4),%esp + leal -32(%esp,%edi,4),%ebp negl %edi - movl %esp,%eax + movl %ebp,%eax subl %edx,%eax andl $2047,%eax - subl %eax,%esp - xorl %esp,%edx + subl %eax,%ebp + xorl %ebp,%edx andl $2048,%edx xorl $2048,%edx - subl %edx,%esp - andl $-64,%esp - movl %ebp,%eax - subl %esp,%eax + subl %edx,%ebp + andl $-64,%ebp + movl %esp,%eax + subl %ebp,%eax andl $-4096,%eax + movl %esp,%edx + leal (%ebp,%eax,1),%esp + movl (%esp),%eax + cmpl %ebp,%esp + ja L001page_walk + jmp L002page_walk_done +.align 4,0x90 L001page_walk: - movl (%esp,%eax,1),%edx - subl $4096,%eax -.byte 46 - jnc L001page_walk + leal -4096(%esp),%esp + movl (%esp),%eax + cmpl %ebp,%esp + ja L001page_walk +L002page_walk_done: movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx - movl 12(%esi),%edx + movl 12(%esi),%ebp movl 16(%esi),%esi movl (%esi),%esi movl %eax,4(%esp) movl %ebx,8(%esp) movl %ecx,12(%esp) - movl %edx,16(%esp) + movl %ebp,16(%esp) movl %esi,20(%esp) leal -3(%edi),%ebx - movl %ebp,24(%esp) - call L002PIC_me_up -L002PIC_me_up: + movl %edx,24(%esp) + call L003PIC_me_up +L003PIC_me_up: popl %eax - movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L002PIC_me_up(%eax),%eax + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L003PIC_me_up(%eax),%eax btl $26,(%eax) - jnc L003non_sse2 + jnc L004non_sse2 movl $-1,%eax movd %eax,%mm7 movl 8(%esp),%esi @@ -78,7 +85,7 @@ L002PIC_me_up: psrlq $32,%mm3 incl %ecx .align 4,0x90 -L0041st: +L0051st: pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 @@ -93,7 +100,7 @@ L0041st: psrlq $32,%mm3 leal 1(%ecx),%ecx cmpl %ebx,%ecx - jl L0041st + jl L0051st pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 @@ -107,7 +114,7 @@ L0041st: paddq %mm2,%mm3 movq %mm3,32(%esp,%ebx,4) incl %edx -L005outer: +L006outer: xorl %ecx,%ecx movd (%edi,%edx,4),%mm4 movd (%esi),%mm5 @@ -129,7 +136,7 @@ L005outer: paddq %mm6,%mm2 incl %ecx decl %ebx -L006inner: +L007inner: pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 @@ -146,7 +153,7 @@ L006inner: paddq %mm6,%mm2 decl %ebx leal 1(%ecx),%ecx - jnz L006inner + jnz L007inner movl %ecx,%ebx pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 @@ -164,11 +171,11 @@ L006inner: movq %mm3,32(%esp,%ebx,4) leal 1(%edx),%edx cmpl %ebx,%edx - jle L005outer + jle L006outer emms - jmp L007common_tail + jmp L008common_tail .align 4,0x90 -L003non_sse2: +L004non_sse2: movl 8(%esp),%esi leal 1(%ebx),%ebp movl 12(%esp),%edi @@ -179,12 +186,12 @@ L003non_sse2: leal 4(%edi,%ebx,4),%eax orl %edx,%ebp movl (%edi),%edi - jz L008bn_sqr_mont + jz L009bn_sqr_mont movl %eax,28(%esp) movl (%esi),%eax xorl %edx,%edx .align 4,0x90 -L009mull: +L010mull: movl %edx,%ebp mull %edi addl %eax,%ebp @@ -193,7 +200,7 @@ L009mull: movl (%esi,%ecx,4),%eax cmpl %ebx,%ecx movl %ebp,28(%esp,%ecx,4) - jl L009mull + jl L010mull movl %edx,%ebp mull %edi movl 20(%esp),%edi @@ -211,9 +218,9 @@ L009mull: movl 4(%esi),%eax adcl $0,%edx incl %ecx - jmp L0102ndmadd + jmp L0112ndmadd .align 4,0x90 -L0111stmadd: +L0121stmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -224,7 +231,7 @@ L0111stmadd: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,28(%esp,%ecx,4) - jl L0111stmadd + jl L0121stmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%eax @@ -247,7 +254,7 @@ L0111stmadd: adcl $0,%edx movl $1,%ecx .align 4,0x90 -L0102ndmadd: +L0112ndmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -258,7 +265,7 @@ L0102ndmadd: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,24(%esp,%ecx,4) - jl L0102ndmadd + jl L0112ndmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%ebp @@ -274,16 +281,16 @@ L0102ndmadd: movl %edx,32(%esp,%ebx,4) cmpl 28(%esp),%ecx movl %eax,36(%esp,%ebx,4) - je L007common_tail + je L008common_tail movl (%ecx),%edi movl 8(%esp),%esi movl %ecx,12(%esp) xorl %ecx,%ecx xorl %edx,%edx movl (%esi),%eax - jmp L0111stmadd + jmp L0121stmadd .align 4,0x90 -L008bn_sqr_mont: +L009bn_sqr_mont: movl %ebx,(%esp) movl %ecx,12(%esp) movl %edi,%eax @@ -294,7 +301,7 @@ L008bn_sqr_mont: andl $1,%ebx incl %ecx .align 4,0x90 -L012sqr: +L013sqr: movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -306,7 +313,7 @@ L012sqr: cmpl (%esp),%ecx movl %eax,%ebx movl %ebp,28(%esp,%ecx,4) - jl L012sqr + jl L013sqr movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -330,7 +337,7 @@ L012sqr: movl 4(%esi),%eax movl $1,%ecx .align 4,0x90 -L0133rdmadd: +L0143rdmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -349,7 +356,7 @@ L0133rdmadd: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,24(%esp,%ecx,4) - jl L0133rdmadd + jl L0143rdmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%ebp @@ -365,7 +372,7 @@ L0133rdmadd: movl %edx,32(%esp,%ebx,4) cmpl %ebx,%ecx movl %eax,36(%esp,%ebx,4) - je L007common_tail + je L008common_tail movl 4(%esi,%ecx,4),%edi leal 1(%ecx),%ecx movl %edi,%eax @@ -377,12 +384,12 @@ L0133rdmadd: xorl %ebp,%ebp cmpl %ebx,%ecx leal 1(%ecx),%ecx - je L014sqrlast + je L015sqrlast movl %edx,%ebx shrl $1,%edx andl $1,%ebx .align 4,0x90 -L015sqradd: +L016sqradd: movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -398,13 +405,13 @@ L015sqradd: cmpl (%esp),%ecx movl %ebp,28(%esp,%ecx,4) movl %eax,%ebx - jle L015sqradd + jle L016sqradd movl %edx,%ebp addl %edx,%edx shrl $31,%ebp addl %ebx,%edx adcl $0,%ebp -L014sqrlast: +L015sqrlast: movl 20(%esp),%edi movl 16(%esp),%esi imull 32(%esp),%edi @@ -419,9 +426,9 @@ L014sqrlast: adcl $0,%edx movl $1,%ecx movl 4(%esi),%eax - jmp L0133rdmadd + jmp L0143rdmadd .align 4,0x90 -L007common_tail: +L008common_tail: movl 16(%esp),%ebp movl 4(%esp),%edi leal 32(%esp),%esi @@ -429,13 +436,13 @@ L007common_tail: movl %ebx,%ecx xorl %edx,%edx .align 4,0x90 -L016sub: +L017sub: sbbl (%ebp,%edx,4),%eax movl %eax,(%edi,%edx,4) decl %ecx movl 4(%esi,%edx,4),%eax leal 1(%edx),%edx - jge L016sub + jge L017sub sbbl $0,%eax andl %eax,%esi notl %eax @@ -443,12 +450,12 @@ L016sub: andl %eax,%ebp orl %ebp,%esi .align 4,0x90 -L017copy: +L018copy: movl (%esi,%ebx,4),%eax movl %eax,(%edi,%ebx,4) movl %ecx,32(%esp,%ebx,4) decl %ebx - jge L017copy + jge L018copy movl 24(%esp),%esp movl $1,%eax L000just_leave: diff --git a/deps/openssl/asm/x86-win32-masm/bn/x86-mont.asm b/deps/openssl/asm/x86-win32-masm/bn/x86-mont.asm index 4987f6fe91..f026dae738 100644 --- a/deps/openssl/asm/x86-win32-masm/bn/x86-mont.asm +++ b/deps/openssl/asm/x86-win32-masm/bn/x86-mont.asm @@ -31,44 +31,51 @@ $L_bn_mul_mont_begin:: jl $L000just_leave lea esi,DWORD PTR 20[esp] lea edx,DWORD PTR 24[esp] - mov ebp,esp add edi,2 neg edi - lea esp,DWORD PTR [edi*4+esp-32] + lea ebp,DWORD PTR [edi*4+esp-32] neg edi - mov eax,esp + mov eax,ebp sub eax,edx and eax,2047 - sub esp,eax - xor edx,esp + sub ebp,eax + xor edx,ebp and edx,2048 xor edx,2048 - sub esp,edx - and esp,-64 - mov eax,ebp - sub eax,esp + sub ebp,edx + and ebp,-64 + mov eax,esp + sub eax,ebp and eax,-4096 + mov edx,esp + lea esp,DWORD PTR [eax*1+ebp] + mov eax,DWORD PTR [esp] + cmp esp,ebp + ja $L001page_walk + jmp $L002page_walk_done +ALIGN 16 $L001page_walk: - mov edx,DWORD PTR [eax*1+esp] - sub eax,4096 -DB 46 - jnc $L001page_walk + lea esp,DWORD PTR [esp-4096] + mov eax,DWORD PTR [esp] + cmp esp,ebp + ja $L001page_walk +$L002page_walk_done: mov eax,DWORD PTR [esi] mov ebx,DWORD PTR 4[esi] mov ecx,DWORD PTR 8[esi] - mov edx,DWORD PTR 12[esi] + mov ebp,DWORD PTR 12[esi] mov esi,DWORD PTR 16[esi] mov esi,DWORD PTR [esi] mov DWORD PTR 4[esp],eax mov DWORD PTR 8[esp],ebx mov DWORD PTR 12[esp],ecx - mov DWORD PTR 16[esp],edx + mov DWORD PTR 16[esp],ebp mov DWORD PTR 20[esp],esi lea ebx,DWORD PTR [edi-3] - mov DWORD PTR 24[esp],ebp + mov DWORD PTR 24[esp],edx lea eax,DWORD PTR _OPENSSL_ia32cap_P bt DWORD PTR [eax],26 - jnc $L002non_sse2 + jnc $L003non_sse2 mov eax,-1 movd mm7,eax mov esi,DWORD PTR 8[esp] @@ -92,7 +99,7 @@ DB 46 psrlq mm3,32 inc ecx ALIGN 16 -$L0031st: +$L0041st: pmuludq mm0,mm4 pmuludq mm1,mm5 paddq mm2,mm0 @@ -107,7 +114,7 @@ $L0031st: psrlq mm3,32 lea ecx,DWORD PTR 1[ecx] cmp ecx,ebx - jl $L0031st + jl $L0041st pmuludq mm0,mm4 pmuludq mm1,mm5 paddq mm2,mm0 @@ -121,7 +128,7 @@ $L0031st: paddq mm3,mm2 movq QWORD PTR 32[ebx*4+esp],mm3 inc edx -$L004outer: +$L005outer: xor ecx,ecx movd mm4,DWORD PTR [edx*4+edi] movd mm5,DWORD PTR [esi] @@ -143,7 +150,7 @@ $L004outer: paddq mm2,mm6 inc ecx dec ebx -$L005inner: +$L006inner: pmuludq mm0,mm4 pmuludq mm1,mm5 paddq mm2,mm0 @@ -160,7 +167,7 @@ $L005inner: paddq mm2,mm6 dec ebx lea ecx,DWORD PTR 1[ecx] - jnz $L005inner + jnz $L006inner mov ebx,ecx pmuludq mm0,mm4 pmuludq mm1,mm5 @@ -178,11 +185,11 @@ $L005inner: movq QWORD PTR 32[ebx*4+esp],mm3 lea edx,DWORD PTR 1[edx] cmp edx,ebx - jle $L004outer + jle $L005outer emms - jmp $L006common_tail + jmp $L007common_tail ALIGN 16 -$L002non_sse2: +$L003non_sse2: mov esi,DWORD PTR 8[esp] lea ebp,DWORD PTR 1[ebx] mov edi,DWORD PTR 12[esp] @@ -193,12 +200,12 @@ $L002non_sse2: lea eax,DWORD PTR 4[ebx*4+edi] or ebp,edx mov edi,DWORD PTR [edi] - jz $L007bn_sqr_mont + jz $L008bn_sqr_mont mov DWORD PTR 28[esp],eax mov eax,DWORD PTR [esi] xor edx,edx ALIGN 16 -$L008mull: +$L009mull: mov ebp,edx mul edi add ebp,eax @@ -207,7 +214,7 @@ $L008mull: mov eax,DWORD PTR [ecx*4+esi] cmp ecx,ebx mov DWORD PTR 28[ecx*4+esp],ebp - jl $L008mull + jl $L009mull mov ebp,edx mul edi mov edi,DWORD PTR 20[esp] @@ -225,9 +232,9 @@ $L008mull: mov eax,DWORD PTR 4[esi] adc edx,0 inc ecx - jmp $L0092ndmadd + jmp $L0102ndmadd ALIGN 16 -$L0101stmadd: +$L0111stmadd: mov ebp,edx mul edi add ebp,DWORD PTR 32[ecx*4+esp] @@ -238,7 +245,7 @@ $L0101stmadd: adc edx,0 cmp ecx,ebx mov DWORD PTR 28[ecx*4+esp],ebp - jl $L0101stmadd + jl $L0111stmadd mov ebp,edx mul edi add eax,DWORD PTR 32[ebx*4+esp] @@ -261,7 +268,7 @@ $L0101stmadd: adc edx,0 mov ecx,1 ALIGN 16 -$L0092ndmadd: +$L0102ndmadd: mov ebp,edx mul edi add ebp,DWORD PTR 32[ecx*4+esp] @@ -272,7 +279,7 @@ $L0092ndmadd: adc edx,0 cmp ecx,ebx mov DWORD PTR 24[ecx*4+esp],ebp - jl $L0092ndmadd + jl $L0102ndmadd mov ebp,edx mul edi add ebp,DWORD PTR 32[ebx*4+esp] @@ -288,16 +295,16 @@ $L0092ndmadd: mov DWORD PTR 32[ebx*4+esp],edx cmp ecx,DWORD PTR 28[esp] mov DWORD PTR 36[ebx*4+esp],eax - je $L006common_tail + je $L007common_tail mov edi,DWORD PTR [ecx] mov esi,DWORD PTR 8[esp] mov DWORD PTR 12[esp],ecx xor ecx,ecx xor edx,edx mov eax,DWORD PTR [esi] - jmp $L0101stmadd + jmp $L0111stmadd ALIGN 16 -$L007bn_sqr_mont: +$L008bn_sqr_mont: mov DWORD PTR [esp],ebx mov DWORD PTR 12[esp],ecx mov eax,edi @@ -308,7 +315,7 @@ $L007bn_sqr_mont: and ebx,1 inc ecx ALIGN 16 -$L011sqr: +$L012sqr: mov eax,DWORD PTR [ecx*4+esi] mov ebp,edx mul edi @@ -320,7 +327,7 @@ $L011sqr: cmp ecx,DWORD PTR [esp] mov ebx,eax mov DWORD PTR 28[ecx*4+esp],ebp - jl $L011sqr + jl $L012sqr mov eax,DWORD PTR [ecx*4+esi] mov ebp,edx mul edi @@ -344,7 +351,7 @@ $L011sqr: mov eax,DWORD PTR 4[esi] mov ecx,1 ALIGN 16 -$L0123rdmadd: +$L0133rdmadd: mov ebp,edx mul edi add ebp,DWORD PTR 32[ecx*4+esp] @@ -363,7 +370,7 @@ $L0123rdmadd: adc edx,0 cmp ecx,ebx mov DWORD PTR 24[ecx*4+esp],ebp - jl $L0123rdmadd + jl $L0133rdmadd mov ebp,edx mul edi add ebp,DWORD PTR 32[ebx*4+esp] @@ -379,7 +386,7 @@ $L0123rdmadd: mov DWORD PTR 32[ebx*4+esp],edx cmp ecx,ebx mov DWORD PTR 36[ebx*4+esp],eax - je $L006common_tail + je $L007common_tail mov edi,DWORD PTR 4[ecx*4+esi] lea ecx,DWORD PTR 1[ecx] mov eax,edi @@ -391,12 +398,12 @@ $L0123rdmadd: xor ebp,ebp cmp ecx,ebx lea ecx,DWORD PTR 1[ecx] - je $L013sqrlast + je $L014sqrlast mov ebx,edx shr edx,1 and ebx,1 ALIGN 16 -$L014sqradd: +$L015sqradd: mov eax,DWORD PTR [ecx*4+esi] mov ebp,edx mul edi @@ -412,13 +419,13 @@ $L014sqradd: cmp ecx,DWORD PTR [esp] mov DWORD PTR 28[ecx*4+esp],ebp mov ebx,eax - jle $L014sqradd + jle $L015sqradd mov ebp,edx add edx,edx shr ebp,31 add edx,ebx adc ebp,0 -$L013sqrlast: +$L014sqrlast: mov edi,DWORD PTR 20[esp] mov esi,DWORD PTR 16[esp] imul edi,DWORD PTR 32[esp] @@ -433,9 +440,9 @@ $L013sqrlast: adc edx,0 mov ecx,1 mov eax,DWORD PTR 4[esi] - jmp $L0123rdmadd + jmp $L0133rdmadd ALIGN 16 -$L006common_tail: +$L007common_tail: mov ebp,DWORD PTR 16[esp] mov edi,DWORD PTR 4[esp] lea esi,DWORD PTR 32[esp] @@ -443,13 +450,13 @@ $L006common_tail: mov ecx,ebx xor edx,edx ALIGN 16 -$L015sub: +$L016sub: sbb eax,DWORD PTR [edx*4+ebp] mov DWORD PTR [edx*4+edi],eax dec ecx mov eax,DWORD PTR 4[edx*4+esi] lea edx,DWORD PTR 1[edx] - jge $L015sub + jge $L016sub sbb eax,0 and esi,eax not eax @@ -457,12 +464,12 @@ $L015sub: and ebp,eax or esi,ebp ALIGN 16 -$L016copy: +$L017copy: mov eax,DWORD PTR [ebx*4+esi] mov DWORD PTR [ebx*4+edi],eax mov DWORD PTR 32[ebx*4+esp],ecx dec ebx - jge $L016copy + jge $L017copy mov esp,DWORD PTR 24[esp] mov eax,1 $L000just_leave: diff --git a/deps/openssl/asm_obsolete/arm-void-gas/aes/bsaes-armv7.S b/deps/openssl/asm_obsolete/arm-void-gas/aes/bsaes-armv7.S index 9738ed5023..449e7a442e 100644 --- a/deps/openssl/asm_obsolete/arm-void-gas/aes/bsaes-armv7.S +++ b/deps/openssl/asm_obsolete/arm-void-gas/aes/bsaes-armv7.S @@ -1816,8 +1816,6 @@ bsaes_xts_encrypt: b .Lxts_enc_done .align 4 .Lxts_enc_6: - vst1.64 {q14}, [r0,:128] @ next round tweak - veor q4, q4, q12 #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule @@ -1853,8 +1851,6 @@ bsaes_xts_encrypt: .align 5 .Lxts_enc_5: - vst1.64 {q13}, [r0,:128] @ next round tweak - veor q3, q3, q11 #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule @@ -1883,8 +1879,6 @@ bsaes_xts_encrypt: b .Lxts_enc_done .align 4 .Lxts_enc_4: - vst1.64 {q12}, [r0,:128] @ next round tweak - veor q2, q2, q10 #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule @@ -1910,8 +1904,6 @@ bsaes_xts_encrypt: b .Lxts_enc_done .align 4 .Lxts_enc_3: - vst1.64 {q11}, [r0,:128] @ next round tweak - veor q1, q1, q9 #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule @@ -1936,8 +1928,6 @@ bsaes_xts_encrypt: b .Lxts_enc_done .align 4 .Lxts_enc_2: - vst1.64 {q10}, [r0,:128] @ next round tweak - veor q0, q0, q8 #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule @@ -1960,7 +1950,7 @@ bsaes_xts_encrypt: .align 4 .Lxts_enc_1: mov r0, sp - veor q0, q8 + veor q0, q0, q8 mov r1, sp vst1.8 {q0}, [sp,:128] mov r2, r10 @@ -2346,8 +2336,6 @@ bsaes_xts_decrypt: b .Lxts_dec_done .align 4 .Lxts_dec_5: - vst1.64 {q13}, [r0,:128] @ next round tweak - veor q3, q3, q11 #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule @@ -2376,8 +2364,6 @@ bsaes_xts_decrypt: b .Lxts_dec_done .align 4 .Lxts_dec_4: - vst1.64 {q12}, [r0,:128] @ next round tweak - veor q2, q2, q10 #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule @@ -2403,8 +2389,6 @@ bsaes_xts_decrypt: b .Lxts_dec_done .align 4 .Lxts_dec_3: - vst1.64 {q11}, [r0,:128] @ next round tweak - veor q1, q1, q9 #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule @@ -2429,8 +2413,6 @@ bsaes_xts_decrypt: b .Lxts_dec_done .align 4 .Lxts_dec_2: - vst1.64 {q10}, [r0,:128] @ next round tweak - veor q0, q0, q8 #ifndef BSAES_ASM_EXTENDED_KEY add r4, sp, #0x90 @ pass key schedule @@ -2453,12 +2435,12 @@ bsaes_xts_decrypt: .align 4 .Lxts_dec_1: mov r0, sp - veor q0, q8 + veor q0, q0, q8 mov r1, sp vst1.8 {q0}, [sp,:128] + mov r5, r2 @ preserve magic mov r2, r10 mov r4, r3 @ preserve fp - mov r5, r2 @ preserve magic bl AES_decrypt diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont.s b/deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont.s index 0d36e3d473..865c2ef5cb 100644 --- a/deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont.s +++ b/deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont.s @@ -6,6 +6,8 @@ .type bn_mul_mont,@function .align 16 bn_mul_mont: + movl %r9d,%r9d + movq %rsp,%rax testl $3,%r9d jnz .Lmul_enter cmpl $8,%r9d @@ -25,29 +27,36 @@ bn_mul_mont: pushq %r14 pushq %r15 - movl %r9d,%r9d - leaq 2(%r9),%r10 + negq %r9 movq %rsp,%r11 - negq %r10 - leaq (%rsp,%r10,8),%rsp - andq $-1024,%rsp + leaq -16(%rsp,%r9,8),%r10 + negq %r9 + andq $-1024,%r10 - movq %r11,8(%rsp,%r9,8) -.Lmul_body: - subq %rsp,%r11 + subq %r10,%r11 andq $-4096,%r11 + leaq (%r10,%r11,1),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja .Lmul_page_walk + jmp .Lmul_page_walk_done + +.align 16 .Lmul_page_walk: - movq (%rsp,%r11,1),%r10 - subq $4096,%r11 -.byte 0x66,0x2e - jnc .Lmul_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja .Lmul_page_walk +.Lmul_page_walk_done: + movq %rax,8(%rsp,%r9,8) +.Lmul_body: movq %rdx,%r12 movq (%r8),%r8 movq (%r12),%rbx @@ -215,19 +224,21 @@ bn_mul_mont: movq 8(%rsp,%r9,8),%rsi movq $1,%rax - movq (%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp .Lmul_epilogue: .byte 0xf3,0xc3 .size bn_mul_mont,.-bn_mul_mont .type bn_mul4x_mont,@function .align 16 bn_mul4x_mont: + movl %r9d,%r9d + movq %rsp,%rax .Lmul4x_enter: pushq %rbx pushq %rbp @@ -236,23 +247,29 @@ bn_mul4x_mont: pushq %r14 pushq %r15 - movl %r9d,%r9d - leaq 4(%r9),%r10 + negq %r9 movq %rsp,%r11 - negq %r10 - leaq (%rsp,%r10,8),%rsp - andq $-1024,%rsp + leaq -32(%rsp,%r9,8),%r10 + negq %r9 + andq $-1024,%r10 - movq %r11,8(%rsp,%r9,8) -.Lmul4x_body: - subq %rsp,%r11 + subq %r10,%r11 andq $-4096,%r11 + leaq (%r10,%r11,1),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja .Lmul4x_page_walk + jmp .Lmul4x_page_walk_done + .Lmul4x_page_walk: - movq (%rsp,%r11,1),%r10 - subq $4096,%r11 -.byte 0x2e - jnc .Lmul4x_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja .Lmul4x_page_walk +.Lmul4x_page_walk_done: + movq %rax,8(%rsp,%r9,8) +.Lmul4x_body: movq %rdi,16(%rsp,%r9,8) movq %rdx,%r12 movq (%r8),%r8 @@ -621,13 +638,13 @@ bn_mul4x_mont: movdqu %xmm2,16(%rdi,%r14,1) movq 8(%rsp,%r9,8),%rsi movq $1,%rax - movq (%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp .Lmul4x_epilogue: .byte 0xf3,0xc3 .size bn_mul4x_mont,.-bn_mul4x_mont @@ -636,14 +653,15 @@ bn_mul4x_mont: .type bn_sqr8x_mont,@function .align 32 bn_sqr8x_mont: -.Lsqr8x_enter: movq %rsp,%rax +.Lsqr8x_enter: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 +.Lsqr8x_prologue: movl %r9d,%r10d shll $3,%r9d @@ -656,33 +674,42 @@ bn_sqr8x_mont: leaq -64(%rsp,%r9,2),%r11 + movq %rsp,%rbp movq (%r8),%r8 subq %rsi,%r11 andq $4095,%r11 cmpq %r11,%r10 jb .Lsqr8x_sp_alt - subq %r11,%rsp - leaq -64(%rsp,%r9,2),%rsp + subq %r11,%rbp + leaq -64(%rbp,%r9,2),%rbp jmp .Lsqr8x_sp_done .align 32 .Lsqr8x_sp_alt: leaq 4096-64(,%r9,2),%r10 - leaq -64(%rsp,%r9,2),%rsp + leaq -64(%rbp,%r9,2),%rbp subq %r10,%r11 movq $0,%r10 cmovcq %r10,%r11 - subq %r11,%rsp + subq %r11,%rbp .Lsqr8x_sp_done: - andq $-64,%rsp - movq %rax,%r11 - subq %rsp,%r11 + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lsqr8x_page_walk + jmp .Lsqr8x_page_walk_done + +.align 16 .Lsqr8x_page_walk: - movq (%rsp,%r11,1),%r10 - subq $4096,%r11 -.byte 0x2e - jnc .Lsqr8x_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lsqr8x_page_walk +.Lsqr8x_page_walk_done: movq %r9,%r10 negq %r9 diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont5.s b/deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont5.s index a503f6bd8d..74ac8ee159 100644 --- a/deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont5.s +++ b/deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont5.s @@ -6,16 +6,15 @@ .type bn_mul_mont_gather5,@function .align 64 bn_mul_mont_gather5: + movl %r9d,%r9d + movq %rsp,%rax testl $7,%r9d jnz .Lmul_enter jmp .Lmul4x_enter .align 16 .Lmul_enter: - movl %r9d,%r9d - movq %rsp,%rax movd 8(%rsp),%xmm5 - leaq .Linc(%rip),%r10 pushq %rbx pushq %rbp pushq %r12 @@ -23,26 +22,36 @@ bn_mul_mont_gather5: pushq %r14 pushq %r15 - leaq 2(%r9),%r11 - negq %r11 - leaq -264(%rsp,%r11,8),%rsp - andq $-1024,%rsp + negq %r9 + movq %rsp,%r11 + leaq -280(%rsp,%r9,8),%r10 + negq %r9 + andq $-1024,%r10 + - movq %rax,8(%rsp,%r9,8) -.Lmul_body: + subq %r10,%r11 + andq $-4096,%r11 + leaq (%r10,%r11,1),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja .Lmul_page_walk + jmp .Lmul_page_walk_done - subq %rsp,%rax - andq $-4096,%rax .Lmul_page_walk: - movq (%rsp,%rax,1),%r11 - subq $4096,%rax -.byte 0x2e - jnc .Lmul_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja .Lmul_page_walk +.Lmul_page_walk_done: + + leaq .Linc(%rip),%r10 + movq %rax,8(%rsp,%r9,8) +.Lmul_body: leaq 128(%rdx),%r12 movdqa 0(%r10),%xmm0 @@ -413,15 +422,16 @@ bn_mul_mont_gather5: .type bn_mul4x_mont_gather5,@function .align 32 bn_mul4x_mont_gather5: -.Lmul4x_enter: .byte 0x67 movq %rsp,%rax +.Lmul4x_enter: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 +.Lmul4x_prologue: .byte 0x67 shll $3,%r9d @@ -438,32 +448,40 @@ bn_mul4x_mont_gather5: leaq -320(%rsp,%r9,2),%r11 + movq %rsp,%rbp subq %rdi,%r11 andq $4095,%r11 cmpq %r11,%r10 jb .Lmul4xsp_alt - subq %r11,%rsp - leaq -320(%rsp,%r9,2),%rsp + subq %r11,%rbp + leaq -320(%rbp,%r9,2),%rbp jmp .Lmul4xsp_done .align 32 .Lmul4xsp_alt: leaq 4096-320(,%r9,2),%r10 - leaq -320(%rsp,%r9,2),%rsp + leaq -320(%rbp,%r9,2),%rbp subq %r10,%r11 movq $0,%r10 cmovcq %r10,%r11 - subq %r11,%rsp + subq %r11,%rbp .Lmul4xsp_done: - andq $-64,%rsp - movq %rax,%r11 - subq %rsp,%r11 + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lmul4x_page_walk + jmp .Lmul4x_page_walk_done + .Lmul4x_page_walk: - movq (%rsp,%r11,1),%r10 - subq $4096,%r11 -.byte 0x2e - jnc .Lmul4x_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lmul4x_page_walk +.Lmul4x_page_walk_done: negq %r9 @@ -1022,6 +1040,7 @@ bn_power5: pushq %r13 pushq %r14 pushq %r15 +.Lpower5_prologue: shll $3,%r9d leal (%r9,%r9,2),%r10d @@ -1036,32 +1055,40 @@ bn_power5: leaq -320(%rsp,%r9,2),%r11 + movq %rsp,%rbp subq %rdi,%r11 andq $4095,%r11 cmpq %r11,%r10 jb .Lpwr_sp_alt - subq %r11,%rsp - leaq -320(%rsp,%r9,2),%rsp + subq %r11,%rbp + leaq -320(%rbp,%r9,2),%rbp jmp .Lpwr_sp_done .align 32 .Lpwr_sp_alt: leaq 4096-320(,%r9,2),%r10 - leaq -320(%rsp,%r9,2),%rsp + leaq -320(%rbp,%r9,2),%rbp subq %r10,%r11 movq $0,%r10 cmovcq %r10,%r11 - subq %r11,%rsp + subq %r11,%rbp .Lpwr_sp_done: - andq $-64,%rsp - movq %rax,%r11 - subq %rsp,%r11 + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lpwr_page_walk + jmp .Lpwr_page_walk_done + .Lpwr_page_walk: - movq (%rsp,%r11,1),%r10 - subq $4096,%r11 -.byte 0x2e - jnc .Lpwr_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lpwr_page_walk +.Lpwr_page_walk_done: movq %r9,%r10 negq %r9 @@ -1972,6 +1999,7 @@ bn_from_mont8x: pushq %r13 pushq %r14 pushq %r15 +.Lfrom_prologue: shll $3,%r9d leaq (%r9,%r9,2),%r10 @@ -1986,32 +2014,40 @@ bn_from_mont8x: leaq -320(%rsp,%r9,2),%r11 + movq %rsp,%rbp subq %rdi,%r11 andq $4095,%r11 cmpq %r11,%r10 jb .Lfrom_sp_alt - subq %r11,%rsp - leaq -320(%rsp,%r9,2),%rsp + subq %r11,%rbp + leaq -320(%rbp,%r9,2),%rbp jmp .Lfrom_sp_done .align 32 .Lfrom_sp_alt: leaq 4096-320(,%r9,2),%r10 - leaq -320(%rsp,%r9,2),%rsp + leaq -320(%rbp,%r9,2),%rbp subq %r10,%r11 movq $0,%r10 cmovcq %r10,%r11 - subq %r11,%rsp + subq %r11,%rbp .Lfrom_sp_done: - andq $-64,%rsp - movq %rax,%r11 - subq %rsp,%r11 + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lfrom_page_walk + jmp .Lfrom_page_walk_done + .Lfrom_page_walk: - movq (%rsp,%r11,1),%r10 - subq $4096,%r11 -.byte 0x2e - jnc .Lfrom_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lfrom_page_walk +.Lfrom_page_walk_done: movq %r9,%r10 negq %r9 diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/ec/ecp_nistz256-x86_64.s b/deps/openssl/asm_obsolete/x64-elf-gas/ec/ecp_nistz256-x86_64.s index 7876e38299..c7bc6f2636 100644 --- a/deps/openssl/asm_obsolete/x64-elf-gas/ec/ecp_nistz256-x86_64.s +++ b/deps/openssl/asm_obsolete/x64-elf-gas/ec/ecp_nistz256-x86_64.s @@ -27,6 +27,7 @@ ecp_nistz256_mul_by_2: pushq %r13 movq 0(%rsi),%r8 + xorq %r13,%r13 movq 8(%rsi),%r9 addq %r8,%r8 movq 16(%rsi),%r10 @@ -37,7 +38,7 @@ ecp_nistz256_mul_by_2: adcq %r10,%r10 adcq %r11,%r11 movq %r9,%rdx - sbbq %r13,%r13 + adcq $0,%r13 subq 0(%rsi),%r8 movq %r10,%rcx @@ -45,14 +46,14 @@ ecp_nistz256_mul_by_2: sbbq 16(%rsi),%r10 movq %r11,%r12 sbbq 24(%rsi),%r11 - testq %r13,%r13 + sbbq $0,%r13 - cmovzq %rax,%r8 - cmovzq %rdx,%r9 + cmovcq %rax,%r8 + cmovcq %rdx,%r9 movq %r8,0(%rdi) - cmovzq %rcx,%r10 + cmovcq %rcx,%r10 movq %r9,8(%rdi) - cmovzq %r12,%r11 + cmovcq %r12,%r11 movq %r10,16(%rdi) movq %r11,24(%rdi) @@ -149,12 +150,12 @@ ecp_nistz256_mul_by_3: sbbq $0,%r10 movq %r11,%r12 sbbq .Lpoly+24(%rip),%r11 - testq %r13,%r13 + sbbq $0,%r13 - cmovzq %rax,%r8 - cmovzq %rdx,%r9 - cmovzq %rcx,%r10 - cmovzq %r12,%r11 + cmovcq %rax,%r8 + cmovcq %rdx,%r9 + cmovcq %rcx,%r10 + cmovcq %r12,%r11 xorq %r13,%r13 addq 0(%rsi),%r8 @@ -171,14 +172,14 @@ ecp_nistz256_mul_by_3: sbbq $0,%r10 movq %r11,%r12 sbbq .Lpoly+24(%rip),%r11 - testq %r13,%r13 + sbbq $0,%r13 - cmovzq %rax,%r8 - cmovzq %rdx,%r9 + cmovcq %rax,%r8 + cmovcq %rdx,%r9 movq %r8,0(%rdi) - cmovzq %rcx,%r10 + cmovcq %rcx,%r10 movq %r9,8(%rdi) - cmovzq %r12,%r11 + cmovcq %r12,%r11 movq %r10,16(%rdi) movq %r11,24(%rdi) @@ -217,14 +218,14 @@ ecp_nistz256_add: sbbq 16(%rsi),%r10 movq %r11,%r12 sbbq 24(%rsi),%r11 - testq %r13,%r13 + sbbq $0,%r13 - cmovzq %rax,%r8 - cmovzq %rdx,%r9 + cmovcq %rax,%r8 + cmovcq %rdx,%r9 movq %r8,0(%rdi) - cmovzq %rcx,%r10 + cmovcq %rcx,%r10 movq %r9,8(%rdi) - cmovzq %r12,%r11 + cmovcq %r12,%r11 movq %r10,16(%rdi) movq %r11,24(%rdi) @@ -993,13 +994,14 @@ ecp_nistz256_avx2_select_w7: .type __ecp_nistz256_add_toq,@function .align 32 __ecp_nistz256_add_toq: + xorq %r11,%r11 addq 0(%rbx),%r12 adcq 8(%rbx),%r13 movq %r12,%rax adcq 16(%rbx),%r8 adcq 24(%rbx),%r9 movq %r13,%rbp - sbbq %r11,%r11 + adcq $0,%r11 subq $-1,%r12 movq %r8,%rcx @@ -1007,14 +1009,14 @@ __ecp_nistz256_add_toq: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 - testq %r11,%r11 + sbbq $0,%r11 - cmovzq %rax,%r12 - cmovzq %rbp,%r13 + cmovcq %rax,%r12 + cmovcq %rbp,%r13 movq %r12,0(%rdi) - cmovzq %rcx,%r8 + cmovcq %rcx,%r8 movq %r13,8(%rdi) - cmovzq %r10,%r9 + cmovcq %r10,%r9 movq %r8,16(%rdi) movq %r9,24(%rdi) @@ -1082,13 +1084,14 @@ __ecp_nistz256_subq: .type __ecp_nistz256_mul_by_2q,@function .align 32 __ecp_nistz256_mul_by_2q: + xorq %r11,%r11 addq %r12,%r12 adcq %r13,%r13 movq %r12,%rax adcq %r8,%r8 adcq %r9,%r9 movq %r13,%rbp - sbbq %r11,%r11 + adcq $0,%r11 subq $-1,%r12 movq %r8,%rcx @@ -1096,14 +1099,14 @@ __ecp_nistz256_mul_by_2q: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 - testq %r11,%r11 + sbbq $0,%r11 - cmovzq %rax,%r12 - cmovzq %rbp,%r13 + cmovcq %rax,%r12 + cmovcq %rbp,%r13 movq %r12,0(%rdi) - cmovzq %rcx,%r8 + cmovcq %rcx,%r8 movq %r13,8(%rdi) - cmovzq %r10,%r9 + cmovcq %r10,%r9 movq %r8,16(%rdi) movq %r9,24(%rdi) @@ -1333,16 +1336,14 @@ ecp_nistz256_point_add: movq %rdx,%rsi movdqa %xmm0,384(%rsp) movdqa %xmm1,384+16(%rsp) - por %xmm0,%xmm1 movdqa %xmm2,416(%rsp) movdqa %xmm3,416+16(%rsp) - por %xmm2,%xmm3 movdqa %xmm4,448(%rsp) movdqa %xmm5,448+16(%rsp) - por %xmm1,%xmm3 + por %xmm4,%xmm5 movdqu 0(%rsi),%xmm0 - pshufd $0xb1,%xmm3,%xmm5 + pshufd $0xb1,%xmm5,%xmm3 movdqu 16(%rsi),%xmm1 movdqu 32(%rsi),%xmm2 por %xmm3,%xmm5 @@ -1354,14 +1355,14 @@ ecp_nistz256_point_add: movdqa %xmm0,480(%rsp) pshufd $0x1e,%xmm5,%xmm4 movdqa %xmm1,480+16(%rsp) - por %xmm0,%xmm1 -.byte 102,72,15,110,199 + movdqu 64(%rsi),%xmm0 + movdqu 80(%rsi),%xmm1 movdqa %xmm2,512(%rsp) movdqa %xmm3,512+16(%rsp) - por %xmm2,%xmm3 por %xmm4,%xmm5 pxor %xmm4,%xmm4 - por %xmm1,%xmm3 + por %xmm0,%xmm1 +.byte 102,72,15,110,199 leaq 64-0(%rsi),%rsi movq %rax,544+0(%rsp) @@ -1372,8 +1373,8 @@ ecp_nistz256_point_add: call __ecp_nistz256_sqr_montq pcmpeqd %xmm4,%xmm5 - pshufd $0xb1,%xmm3,%xmm4 - por %xmm3,%xmm4 + pshufd $0xb1,%xmm1,%xmm4 + por %xmm1,%xmm4 pshufd $0,%xmm5,%xmm5 pshufd $0x1e,%xmm4,%xmm3 por %xmm3,%xmm4 @@ -1556,6 +1557,7 @@ ecp_nistz256_point_add: + xorq %r11,%r11 addq %r12,%r12 leaq 96(%rsp),%rsi adcq %r13,%r13 @@ -1563,7 +1565,7 @@ ecp_nistz256_point_add: adcq %r8,%r8 adcq %r9,%r9 movq %r13,%rbp - sbbq %r11,%r11 + adcq $0,%r11 subq $-1,%r12 movq %r8,%rcx @@ -1571,15 +1573,15 @@ ecp_nistz256_point_add: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 - testq %r11,%r11 + sbbq $0,%r11 - cmovzq %rax,%r12 + cmovcq %rax,%r12 movq 0(%rsi),%rax - cmovzq %rbp,%r13 + cmovcq %rbp,%r13 movq 8(%rsi),%rbp - cmovzq %rcx,%r8 + cmovcq %rcx,%r8 movq 16(%rsi),%rcx - cmovzq %r10,%r9 + cmovcq %r10,%r9 movq 24(%rsi),%r10 call __ecp_nistz256_subq @@ -1733,16 +1735,14 @@ ecp_nistz256_point_add_affine: movq 64+24(%rsi),%r8 movdqa %xmm0,320(%rsp) movdqa %xmm1,320+16(%rsp) - por %xmm0,%xmm1 movdqa %xmm2,352(%rsp) movdqa %xmm3,352+16(%rsp) - por %xmm2,%xmm3 movdqa %xmm4,384(%rsp) movdqa %xmm5,384+16(%rsp) - por %xmm1,%xmm3 + por %xmm4,%xmm5 movdqu 0(%rbx),%xmm0 - pshufd $0xb1,%xmm3,%xmm5 + pshufd $0xb1,%xmm5,%xmm3 movdqu 16(%rbx),%xmm1 movdqu 32(%rbx),%xmm2 por %xmm3,%xmm5 @@ -1860,6 +1860,7 @@ ecp_nistz256_point_add_affine: + xorq %r11,%r11 addq %r12,%r12 leaq 192(%rsp),%rsi adcq %r13,%r13 @@ -1867,7 +1868,7 @@ ecp_nistz256_point_add_affine: adcq %r8,%r8 adcq %r9,%r9 movq %r13,%rbp - sbbq %r11,%r11 + adcq $0,%r11 subq $-1,%r12 movq %r8,%rcx @@ -1875,15 +1876,15 @@ ecp_nistz256_point_add_affine: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 - testq %r11,%r11 + sbbq $0,%r11 - cmovzq %rax,%r12 + cmovcq %rax,%r12 movq 0(%rsi),%rax - cmovzq %rbp,%r13 + cmovcq %rbp,%r13 movq 8(%rsi),%rbp - cmovzq %rcx,%r8 + cmovcq %rcx,%r8 movq 16(%rsi),%rcx - cmovzq %r10,%r9 + cmovcq %r10,%r9 movq 24(%rsi),%r10 call __ecp_nistz256_subq diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/sha/sha1-x86_64.s b/deps/openssl/asm_obsolete/x64-elf-gas/sha/sha1-x86_64.s index 38b7df1970..d2fbc15044 100644 --- a/deps/openssl/asm_obsolete/x64-elf-gas/sha/sha1-x86_64.s +++ b/deps/openssl/asm_obsolete/x64-elf-gas/sha/sha1-x86_64.s @@ -1255,9 +1255,9 @@ _shaext_shortcut: .align 16 .Loop_shaext: decq %rdx - leaq 64(%rsi),%rax + leaq 64(%rsi),%r8 paddd %xmm4,%xmm1 - cmovneq %rax,%rsi + cmovneq %r8,%rsi movdqa %xmm0,%xmm8 .byte 15,56,201,229 movdqa %xmm0,%xmm2 diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont.s b/deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont.s index a5b25ebb4b..7c59d9b730 100644 --- a/deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont.s +++ b/deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont.s @@ -6,6 +6,8 @@ .p2align 4 _bn_mul_mont: + movl %r9d,%r9d + movq %rsp,%rax testl $3,%r9d jnz L$mul_enter cmpl $8,%r9d @@ -25,29 +27,36 @@ L$mul_enter: pushq %r14 pushq %r15 - movl %r9d,%r9d - leaq 2(%r9),%r10 + negq %r9 movq %rsp,%r11 - negq %r10 - leaq (%rsp,%r10,8),%rsp - andq $-1024,%rsp + leaq -16(%rsp,%r9,8),%r10 + negq %r9 + andq $-1024,%r10 - movq %r11,8(%rsp,%r9,8) -L$mul_body: - subq %rsp,%r11 + subq %r10,%r11 andq $-4096,%r11 + leaq (%r10,%r11,1),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja L$mul_page_walk + jmp L$mul_page_walk_done + +.p2align 4 L$mul_page_walk: - movq (%rsp,%r11,1),%r10 - subq $4096,%r11 -.byte 0x66,0x2e - jnc L$mul_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja L$mul_page_walk +L$mul_page_walk_done: + movq %rax,8(%rsp,%r9,8) +L$mul_body: movq %rdx,%r12 movq (%r8),%r8 movq (%r12),%rbx @@ -215,19 +224,21 @@ L$copy: movq 8(%rsp,%r9,8),%rsi movq $1,%rax - movq (%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp L$mul_epilogue: .byte 0xf3,0xc3 .p2align 4 bn_mul4x_mont: + movl %r9d,%r9d + movq %rsp,%rax L$mul4x_enter: pushq %rbx pushq %rbp @@ -236,23 +247,29 @@ L$mul4x_enter: pushq %r14 pushq %r15 - movl %r9d,%r9d - leaq 4(%r9),%r10 + negq %r9 movq %rsp,%r11 - negq %r10 - leaq (%rsp,%r10,8),%rsp - andq $-1024,%rsp + leaq -32(%rsp,%r9,8),%r10 + negq %r9 + andq $-1024,%r10 - movq %r11,8(%rsp,%r9,8) -L$mul4x_body: - subq %rsp,%r11 + subq %r10,%r11 andq $-4096,%r11 + leaq (%r10,%r11,1),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja L$mul4x_page_walk + jmp L$mul4x_page_walk_done + L$mul4x_page_walk: - movq (%rsp,%r11,1),%r10 - subq $4096,%r11 -.byte 0x2e - jnc L$mul4x_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja L$mul4x_page_walk +L$mul4x_page_walk_done: + movq %rax,8(%rsp,%r9,8) +L$mul4x_body: movq %rdi,16(%rsp,%r9,8) movq %rdx,%r12 movq (%r8),%r8 @@ -621,13 +638,13 @@ L$copy4x: movdqu %xmm2,16(%rdi,%r14,1) movq 8(%rsp,%r9,8),%rsi movq $1,%rax - movq (%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp L$mul4x_epilogue: .byte 0xf3,0xc3 @@ -636,14 +653,15 @@ L$mul4x_epilogue: .p2align 5 bn_sqr8x_mont: -L$sqr8x_enter: movq %rsp,%rax +L$sqr8x_enter: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 +L$sqr8x_prologue: movl %r9d,%r10d shll $3,%r9d @@ -656,33 +674,42 @@ L$sqr8x_enter: leaq -64(%rsp,%r9,2),%r11 + movq %rsp,%rbp movq (%r8),%r8 subq %rsi,%r11 andq $4095,%r11 cmpq %r11,%r10 jb L$sqr8x_sp_alt - subq %r11,%rsp - leaq -64(%rsp,%r9,2),%rsp + subq %r11,%rbp + leaq -64(%rbp,%r9,2),%rbp jmp L$sqr8x_sp_done .p2align 5 L$sqr8x_sp_alt: leaq 4096-64(,%r9,2),%r10 - leaq -64(%rsp,%r9,2),%rsp + leaq -64(%rbp,%r9,2),%rbp subq %r10,%r11 movq $0,%r10 cmovcq %r10,%r11 - subq %r11,%rsp + subq %r11,%rbp L$sqr8x_sp_done: - andq $-64,%rsp - movq %rax,%r11 - subq %rsp,%r11 + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$sqr8x_page_walk + jmp L$sqr8x_page_walk_done + +.p2align 4 L$sqr8x_page_walk: - movq (%rsp,%r11,1),%r10 - subq $4096,%r11 -.byte 0x2e - jnc L$sqr8x_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$sqr8x_page_walk +L$sqr8x_page_walk_done: movq %r9,%r10 negq %r9 diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont5.s b/deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont5.s index 8bb7c34c35..527abf5711 100644 --- a/deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont5.s +++ b/deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont5.s @@ -6,16 +6,15 @@ .p2align 6 _bn_mul_mont_gather5: + movl %r9d,%r9d + movq %rsp,%rax testl $7,%r9d jnz L$mul_enter jmp L$mul4x_enter .p2align 4 L$mul_enter: - movl %r9d,%r9d - movq %rsp,%rax movd 8(%rsp),%xmm5 - leaq L$inc(%rip),%r10 pushq %rbx pushq %rbp pushq %r12 @@ -23,26 +22,36 @@ L$mul_enter: pushq %r14 pushq %r15 - leaq 2(%r9),%r11 - negq %r11 - leaq -264(%rsp,%r11,8),%rsp - andq $-1024,%rsp + negq %r9 + movq %rsp,%r11 + leaq -280(%rsp,%r9,8),%r10 + negq %r9 + andq $-1024,%r10 + - movq %rax,8(%rsp,%r9,8) -L$mul_body: + subq %r10,%r11 + andq $-4096,%r11 + leaq (%r10,%r11,1),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja L$mul_page_walk + jmp L$mul_page_walk_done - subq %rsp,%rax - andq $-4096,%rax L$mul_page_walk: - movq (%rsp,%rax,1),%r11 - subq $4096,%rax -.byte 0x2e - jnc L$mul_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja L$mul_page_walk +L$mul_page_walk_done: + + leaq L$inc(%rip),%r10 + movq %rax,8(%rsp,%r9,8) +L$mul_body: leaq 128(%rdx),%r12 movdqa 0(%r10),%xmm0 @@ -413,15 +422,16 @@ L$mul_epilogue: .p2align 5 bn_mul4x_mont_gather5: -L$mul4x_enter: .byte 0x67 movq %rsp,%rax +L$mul4x_enter: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 +L$mul4x_prologue: .byte 0x67 shll $3,%r9d @@ -438,32 +448,40 @@ L$mul4x_enter: leaq -320(%rsp,%r9,2),%r11 + movq %rsp,%rbp subq %rdi,%r11 andq $4095,%r11 cmpq %r11,%r10 jb L$mul4xsp_alt - subq %r11,%rsp - leaq -320(%rsp,%r9,2),%rsp + subq %r11,%rbp + leaq -320(%rbp,%r9,2),%rbp jmp L$mul4xsp_done .p2align 5 L$mul4xsp_alt: leaq 4096-320(,%r9,2),%r10 - leaq -320(%rsp,%r9,2),%rsp + leaq -320(%rbp,%r9,2),%rbp subq %r10,%r11 movq $0,%r10 cmovcq %r10,%r11 - subq %r11,%rsp + subq %r11,%rbp L$mul4xsp_done: - andq $-64,%rsp - movq %rax,%r11 - subq %rsp,%r11 + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$mul4x_page_walk + jmp L$mul4x_page_walk_done + L$mul4x_page_walk: - movq (%rsp,%r11,1),%r10 - subq $4096,%r11 -.byte 0x2e - jnc L$mul4x_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$mul4x_page_walk +L$mul4x_page_walk_done: negq %r9 @@ -1022,6 +1040,7 @@ _bn_power5: pushq %r13 pushq %r14 pushq %r15 +L$power5_prologue: shll $3,%r9d leal (%r9,%r9,2),%r10d @@ -1036,32 +1055,40 @@ _bn_power5: leaq -320(%rsp,%r9,2),%r11 + movq %rsp,%rbp subq %rdi,%r11 andq $4095,%r11 cmpq %r11,%r10 jb L$pwr_sp_alt - subq %r11,%rsp - leaq -320(%rsp,%r9,2),%rsp + subq %r11,%rbp + leaq -320(%rbp,%r9,2),%rbp jmp L$pwr_sp_done .p2align 5 L$pwr_sp_alt: leaq 4096-320(,%r9,2),%r10 - leaq -320(%rsp,%r9,2),%rsp + leaq -320(%rbp,%r9,2),%rbp subq %r10,%r11 movq $0,%r10 cmovcq %r10,%r11 - subq %r11,%rsp + subq %r11,%rbp L$pwr_sp_done: - andq $-64,%rsp - movq %rax,%r11 - subq %rsp,%r11 + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$pwr_page_walk + jmp L$pwr_page_walk_done + L$pwr_page_walk: - movq (%rsp,%r11,1),%r10 - subq $4096,%r11 -.byte 0x2e - jnc L$pwr_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$pwr_page_walk +L$pwr_page_walk_done: movq %r9,%r10 negq %r9 @@ -1972,6 +1999,7 @@ bn_from_mont8x: pushq %r13 pushq %r14 pushq %r15 +L$from_prologue: shll $3,%r9d leaq (%r9,%r9,2),%r10 @@ -1986,32 +2014,40 @@ bn_from_mont8x: leaq -320(%rsp,%r9,2),%r11 + movq %rsp,%rbp subq %rdi,%r11 andq $4095,%r11 cmpq %r11,%r10 jb L$from_sp_alt - subq %r11,%rsp - leaq -320(%rsp,%r9,2),%rsp + subq %r11,%rbp + leaq -320(%rbp,%r9,2),%rbp jmp L$from_sp_done .p2align 5 L$from_sp_alt: leaq 4096-320(,%r9,2),%r10 - leaq -320(%rsp,%r9,2),%rsp + leaq -320(%rbp,%r9,2),%rbp subq %r10,%r11 movq $0,%r10 cmovcq %r10,%r11 - subq %r11,%rsp + subq %r11,%rbp L$from_sp_done: - andq $-64,%rsp - movq %rax,%r11 - subq %rsp,%r11 + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$from_page_walk + jmp L$from_page_walk_done + L$from_page_walk: - movq (%rsp,%r11,1),%r10 - subq $4096,%r11 -.byte 0x2e - jnc L$from_page_walk + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja L$from_page_walk +L$from_page_walk_done: movq %r9,%r10 negq %r9 diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/ec/ecp_nistz256-x86_64.s b/deps/openssl/asm_obsolete/x64-macosx-gas/ec/ecp_nistz256-x86_64.s index 30456b900f..62e8d00ccd 100644 --- a/deps/openssl/asm_obsolete/x64-macosx-gas/ec/ecp_nistz256-x86_64.s +++ b/deps/openssl/asm_obsolete/x64-macosx-gas/ec/ecp_nistz256-x86_64.s @@ -27,6 +27,7 @@ _ecp_nistz256_mul_by_2: pushq %r13 movq 0(%rsi),%r8 + xorq %r13,%r13 movq 8(%rsi),%r9 addq %r8,%r8 movq 16(%rsi),%r10 @@ -37,7 +38,7 @@ _ecp_nistz256_mul_by_2: adcq %r10,%r10 adcq %r11,%r11 movq %r9,%rdx - sbbq %r13,%r13 + adcq $0,%r13 subq 0(%rsi),%r8 movq %r10,%rcx @@ -45,14 +46,14 @@ _ecp_nistz256_mul_by_2: sbbq 16(%rsi),%r10 movq %r11,%r12 sbbq 24(%rsi),%r11 - testq %r13,%r13 + sbbq $0,%r13 - cmovzq %rax,%r8 - cmovzq %rdx,%r9 + cmovcq %rax,%r8 + cmovcq %rdx,%r9 movq %r8,0(%rdi) - cmovzq %rcx,%r10 + cmovcq %rcx,%r10 movq %r9,8(%rdi) - cmovzq %r12,%r11 + cmovcq %r12,%r11 movq %r10,16(%rdi) movq %r11,24(%rdi) @@ -149,12 +150,12 @@ _ecp_nistz256_mul_by_3: sbbq $0,%r10 movq %r11,%r12 sbbq L$poly+24(%rip),%r11 - testq %r13,%r13 + sbbq $0,%r13 - cmovzq %rax,%r8 - cmovzq %rdx,%r9 - cmovzq %rcx,%r10 - cmovzq %r12,%r11 + cmovcq %rax,%r8 + cmovcq %rdx,%r9 + cmovcq %rcx,%r10 + cmovcq %r12,%r11 xorq %r13,%r13 addq 0(%rsi),%r8 @@ -171,14 +172,14 @@ _ecp_nistz256_mul_by_3: sbbq $0,%r10 movq %r11,%r12 sbbq L$poly+24(%rip),%r11 - testq %r13,%r13 + sbbq $0,%r13 - cmovzq %rax,%r8 - cmovzq %rdx,%r9 + cmovcq %rax,%r8 + cmovcq %rdx,%r9 movq %r8,0(%rdi) - cmovzq %rcx,%r10 + cmovcq %rcx,%r10 movq %r9,8(%rdi) - cmovzq %r12,%r11 + cmovcq %r12,%r11 movq %r10,16(%rdi) movq %r11,24(%rdi) @@ -217,14 +218,14 @@ _ecp_nistz256_add: sbbq 16(%rsi),%r10 movq %r11,%r12 sbbq 24(%rsi),%r11 - testq %r13,%r13 + sbbq $0,%r13 - cmovzq %rax,%r8 - cmovzq %rdx,%r9 + cmovcq %rax,%r8 + cmovcq %rdx,%r9 movq %r8,0(%rdi) - cmovzq %rcx,%r10 + cmovcq %rcx,%r10 movq %r9,8(%rdi) - cmovzq %r12,%r11 + cmovcq %r12,%r11 movq %r10,16(%rdi) movq %r11,24(%rdi) @@ -993,13 +994,14 @@ _ecp_nistz256_avx2_select_w7: .p2align 5 __ecp_nistz256_add_toq: + xorq %r11,%r11 addq 0(%rbx),%r12 adcq 8(%rbx),%r13 movq %r12,%rax adcq 16(%rbx),%r8 adcq 24(%rbx),%r9 movq %r13,%rbp - sbbq %r11,%r11 + adcq $0,%r11 subq $-1,%r12 movq %r8,%rcx @@ -1007,14 +1009,14 @@ __ecp_nistz256_add_toq: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 - testq %r11,%r11 + sbbq $0,%r11 - cmovzq %rax,%r12 - cmovzq %rbp,%r13 + cmovcq %rax,%r12 + cmovcq %rbp,%r13 movq %r12,0(%rdi) - cmovzq %rcx,%r8 + cmovcq %rcx,%r8 movq %r13,8(%rdi) - cmovzq %r10,%r9 + cmovcq %r10,%r9 movq %r8,16(%rdi) movq %r9,24(%rdi) @@ -1082,13 +1084,14 @@ __ecp_nistz256_subq: .p2align 5 __ecp_nistz256_mul_by_2q: + xorq %r11,%r11 addq %r12,%r12 adcq %r13,%r13 movq %r12,%rax adcq %r8,%r8 adcq %r9,%r9 movq %r13,%rbp - sbbq %r11,%r11 + adcq $0,%r11 subq $-1,%r12 movq %r8,%rcx @@ -1096,14 +1099,14 @@ __ecp_nistz256_mul_by_2q: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 - testq %r11,%r11 + sbbq $0,%r11 - cmovzq %rax,%r12 - cmovzq %rbp,%r13 + cmovcq %rax,%r12 + cmovcq %rbp,%r13 movq %r12,0(%rdi) - cmovzq %rcx,%r8 + cmovcq %rcx,%r8 movq %r13,8(%rdi) - cmovzq %r10,%r9 + cmovcq %r10,%r9 movq %r8,16(%rdi) movq %r9,24(%rdi) @@ -1333,16 +1336,14 @@ _ecp_nistz256_point_add: movq %rdx,%rsi movdqa %xmm0,384(%rsp) movdqa %xmm1,384+16(%rsp) - por %xmm0,%xmm1 movdqa %xmm2,416(%rsp) movdqa %xmm3,416+16(%rsp) - por %xmm2,%xmm3 movdqa %xmm4,448(%rsp) movdqa %xmm5,448+16(%rsp) - por %xmm1,%xmm3 + por %xmm4,%xmm5 movdqu 0(%rsi),%xmm0 - pshufd $0xb1,%xmm3,%xmm5 + pshufd $0xb1,%xmm5,%xmm3 movdqu 16(%rsi),%xmm1 movdqu 32(%rsi),%xmm2 por %xmm3,%xmm5 @@ -1354,14 +1355,14 @@ _ecp_nistz256_point_add: movdqa %xmm0,480(%rsp) pshufd $0x1e,%xmm5,%xmm4 movdqa %xmm1,480+16(%rsp) - por %xmm0,%xmm1 -.byte 102,72,15,110,199 + movdqu 64(%rsi),%xmm0 + movdqu 80(%rsi),%xmm1 movdqa %xmm2,512(%rsp) movdqa %xmm3,512+16(%rsp) - por %xmm2,%xmm3 por %xmm4,%xmm5 pxor %xmm4,%xmm4 - por %xmm1,%xmm3 + por %xmm0,%xmm1 +.byte 102,72,15,110,199 leaq 64-0(%rsi),%rsi movq %rax,544+0(%rsp) @@ -1372,8 +1373,8 @@ _ecp_nistz256_point_add: call __ecp_nistz256_sqr_montq pcmpeqd %xmm4,%xmm5 - pshufd $0xb1,%xmm3,%xmm4 - por %xmm3,%xmm4 + pshufd $0xb1,%xmm1,%xmm4 + por %xmm1,%xmm4 pshufd $0,%xmm5,%xmm5 pshufd $0x1e,%xmm4,%xmm3 por %xmm3,%xmm4 @@ -1556,6 +1557,7 @@ L$add_proceedq: + xorq %r11,%r11 addq %r12,%r12 leaq 96(%rsp),%rsi adcq %r13,%r13 @@ -1563,7 +1565,7 @@ L$add_proceedq: adcq %r8,%r8 adcq %r9,%r9 movq %r13,%rbp - sbbq %r11,%r11 + adcq $0,%r11 subq $-1,%r12 movq %r8,%rcx @@ -1571,15 +1573,15 @@ L$add_proceedq: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 - testq %r11,%r11 + sbbq $0,%r11 - cmovzq %rax,%r12 + cmovcq %rax,%r12 movq 0(%rsi),%rax - cmovzq %rbp,%r13 + cmovcq %rbp,%r13 movq 8(%rsi),%rbp - cmovzq %rcx,%r8 + cmovcq %rcx,%r8 movq 16(%rsi),%rcx - cmovzq %r10,%r9 + cmovcq %r10,%r9 movq 24(%rsi),%r10 call __ecp_nistz256_subq @@ -1733,16 +1735,14 @@ _ecp_nistz256_point_add_affine: movq 64+24(%rsi),%r8 movdqa %xmm0,320(%rsp) movdqa %xmm1,320+16(%rsp) - por %xmm0,%xmm1 movdqa %xmm2,352(%rsp) movdqa %xmm3,352+16(%rsp) - por %xmm2,%xmm3 movdqa %xmm4,384(%rsp) movdqa %xmm5,384+16(%rsp) - por %xmm1,%xmm3 + por %xmm4,%xmm5 movdqu 0(%rbx),%xmm0 - pshufd $0xb1,%xmm3,%xmm5 + pshufd $0xb1,%xmm5,%xmm3 movdqu 16(%rbx),%xmm1 movdqu 32(%rbx),%xmm2 por %xmm3,%xmm5 @@ -1860,6 +1860,7 @@ _ecp_nistz256_point_add_affine: + xorq %r11,%r11 addq %r12,%r12 leaq 192(%rsp),%rsi adcq %r13,%r13 @@ -1867,7 +1868,7 @@ _ecp_nistz256_point_add_affine: adcq %r8,%r8 adcq %r9,%r9 movq %r13,%rbp - sbbq %r11,%r11 + adcq $0,%r11 subq $-1,%r12 movq %r8,%rcx @@ -1875,15 +1876,15 @@ _ecp_nistz256_point_add_affine: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 - testq %r11,%r11 + sbbq $0,%r11 - cmovzq %rax,%r12 + cmovcq %rax,%r12 movq 0(%rsi),%rax - cmovzq %rbp,%r13 + cmovcq %rbp,%r13 movq 8(%rsi),%rbp - cmovzq %rcx,%r8 + cmovcq %rcx,%r8 movq 16(%rsi),%rcx - cmovzq %r10,%r9 + cmovcq %r10,%r9 movq 24(%rsi),%r10 call __ecp_nistz256_subq diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha1-x86_64.s b/deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha1-x86_64.s index 671034cdaf..47c5f633cd 100644 --- a/deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha1-x86_64.s +++ b/deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha1-x86_64.s @@ -1255,9 +1255,9 @@ _shaext_shortcut: .p2align 4 L$oop_shaext: decq %rdx - leaq 64(%rsi),%rax + leaq 64(%rsi),%r8 paddd %xmm4,%xmm1 - cmovneq %rax,%rsi + cmovneq %r8,%rsi movdqa %xmm0,%xmm8 .byte 15,56,201,229 movdqa %xmm0,%xmm2 diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont.asm b/deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont.asm index ed588a016b..2b46716247 100644 --- a/deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont.asm +++ b/deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont.asm @@ -19,6 +19,8 @@ $L$SEH_begin_bn_mul_mont:: mov r9,QWORD PTR[48+rsp] + mov r9d,r9d + mov rax,rsp test r9d,3 jnz $L$mul_enter cmp r9d,8 @@ -38,29 +40,36 @@ $L$mul_enter:: push r14 push r15 - mov r9d,r9d - lea r10,QWORD PTR[2+r9] + neg r9 mov r11,rsp - neg r10 - lea rsp,QWORD PTR[r10*8+rsp] - and rsp,-1024 + lea r10,QWORD PTR[((-16))+r9*8+rsp] + neg r9 + and r10,-1024 - mov QWORD PTR[8+r9*8+rsp],r11 -$L$mul_body:: - sub r11,rsp + sub r11,r10 and r11,-4096 + lea rsp,QWORD PTR[r11*1+r10] + mov r11,QWORD PTR[rsp] + cmp rsp,r10 + ja $L$mul_page_walk + jmp $L$mul_page_walk_done + +ALIGN 16 $L$mul_page_walk:: - mov r10,QWORD PTR[r11*1+rsp] - sub r11,4096 -DB 066h,02eh - jnc $L$mul_page_walk + lea rsp,QWORD PTR[((-4096))+rsp] + mov r11,QWORD PTR[rsp] + cmp rsp,r10 + ja $L$mul_page_walk +$L$mul_page_walk_done:: + mov QWORD PTR[8+r9*8+rsp],rax +$L$mul_body:: mov r12,rdx mov r8,QWORD PTR[r8] mov rbx,QWORD PTR[r12] @@ -228,13 +237,13 @@ $L$copy:: mov rsi,QWORD PTR[8+r9*8+rsp] mov rax,1 - mov r15,QWORD PTR[rsi] - mov r14,QWORD PTR[8+rsi] - mov r13,QWORD PTR[16+rsi] - mov r12,QWORD PTR[24+rsi] - mov rbp,QWORD PTR[32+rsi] - mov rbx,QWORD PTR[40+rsi] - lea rsp,QWORD PTR[48+rsi] + mov r15,QWORD PTR[((-48))+rsi] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] $L$mul_epilogue:: mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue mov rsi,QWORD PTR[16+rsp] @@ -256,6 +265,8 @@ $L$SEH_begin_bn_mul4x_mont:: mov r9,QWORD PTR[48+rsp] + mov r9d,r9d + mov rax,rsp $L$mul4x_enter:: push rbx push rbp @@ -264,23 +275,29 @@ $L$mul4x_enter:: push r14 push r15 - mov r9d,r9d - lea r10,QWORD PTR[4+r9] + neg r9 mov r11,rsp - neg r10 - lea rsp,QWORD PTR[r10*8+rsp] - and rsp,-1024 + lea r10,QWORD PTR[((-32))+r9*8+rsp] + neg r9 + and r10,-1024 - mov QWORD PTR[8+r9*8+rsp],r11 -$L$mul4x_body:: - sub r11,rsp + sub r11,r10 and r11,-4096 + lea rsp,QWORD PTR[r11*1+r10] + mov r11,QWORD PTR[rsp] + cmp rsp,r10 + ja $L$mul4x_page_walk + jmp $L$mul4x_page_walk_done + $L$mul4x_page_walk:: - mov r10,QWORD PTR[r11*1+rsp] - sub r11,4096 -DB 02eh - jnc $L$mul4x_page_walk + lea rsp,QWORD PTR[((-4096))+rsp] + mov r11,QWORD PTR[rsp] + cmp rsp,r10 + ja $L$mul4x_page_walk +$L$mul4x_page_walk_done:: + mov QWORD PTR[8+r9*8+rsp],rax +$L$mul4x_body:: mov QWORD PTR[16+r9*8+rsp],rdi mov r12,rdx mov r8,QWORD PTR[r8] @@ -649,13 +666,13 @@ $L$copy4x:: movdqu XMMWORD PTR[16+r14*1+rdi],xmm2 mov rsi,QWORD PTR[8+r9*8+rsp] mov rax,1 - mov r15,QWORD PTR[rsi] - mov r14,QWORD PTR[8+rsi] - mov r13,QWORD PTR[16+rsi] - mov r12,QWORD PTR[24+rsi] - mov rbp,QWORD PTR[32+rsi] - mov rbx,QWORD PTR[40+rsi] - lea rsp,QWORD PTR[48+rsi] + mov r15,QWORD PTR[((-48))+rsi] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] $L$mul4x_epilogue:: mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue mov rsi,QWORD PTR[16+rsp] @@ -679,14 +696,15 @@ $L$SEH_begin_bn_sqr8x_mont:: mov r9,QWORD PTR[48+rsp] -$L$sqr8x_enter:: mov rax,rsp +$L$sqr8x_enter:: push rbx push rbp push r12 push r13 push r14 push r15 +$L$sqr8x_prologue:: mov r10d,r9d shl r9d,3 @@ -699,33 +717,42 @@ $L$sqr8x_enter:: lea r11,QWORD PTR[((-64))+r9*2+rsp] + mov rbp,rsp mov r8,QWORD PTR[r8] sub r11,rsi and r11,4095 cmp r10,r11 jb $L$sqr8x_sp_alt - sub rsp,r11 - lea rsp,QWORD PTR[((-64))+r9*2+rsp] + sub rbp,r11 + lea rbp,QWORD PTR[((-64))+r9*2+rbp] jmp $L$sqr8x_sp_done ALIGN 32 $L$sqr8x_sp_alt:: lea r10,QWORD PTR[((4096-64))+r9*2] - lea rsp,QWORD PTR[((-64))+r9*2+rsp] + lea rbp,QWORD PTR[((-64))+r9*2+rbp] sub r11,r10 mov r10,0 cmovc r11,r10 - sub rsp,r11 + sub rbp,r11 $L$sqr8x_sp_done:: - and rsp,-64 - mov r11,rax - sub r11,rsp + and rbp,-64 + mov r11,rsp + sub r11,rbp and r11,-4096 + lea rsp,QWORD PTR[rbp*1+r11] + mov r10,QWORD PTR[rsp] + cmp rsp,rbp + ja $L$sqr8x_page_walk + jmp $L$sqr8x_page_walk_done + +ALIGN 16 $L$sqr8x_page_walk:: - mov r10,QWORD PTR[r11*1+rsp] - sub r11,4096 -DB 02eh - jnc $L$sqr8x_page_walk + lea rsp,QWORD PTR[((-4096))+rsp] + mov r10,QWORD PTR[rsp] + cmp rsp,rbp + ja $L$sqr8x_page_walk +$L$sqr8x_page_walk_done:: mov r10,r9 neg r9 @@ -860,22 +887,8 @@ mul_handler PROC PRIVATE mov r10,QWORD PTR[192+r8] mov rax,QWORD PTR[8+r10*8+rax] - lea rax,QWORD PTR[48+rax] - mov rbx,QWORD PTR[((-8))+rax] - mov rbp,QWORD PTR[((-16))+rax] - mov r12,QWORD PTR[((-24))+rax] - mov r13,QWORD PTR[((-32))+rax] - mov r14,QWORD PTR[((-40))+rax] - mov r15,QWORD PTR[((-48))+rax] - mov QWORD PTR[144+r8],rbx - mov QWORD PTR[160+r8],rbp - mov QWORD PTR[216+r8],r12 - mov QWORD PTR[224+r8],r13 - mov QWORD PTR[232+r8],r14 - mov QWORD PTR[240+r8],r15 - - jmp $L$common_seh_tail + jmp $L$common_pop_regs mul_handler ENDP @@ -903,15 +916,21 @@ sqr_handler PROC PRIVATE cmp rbx,r10 jb $L$common_seh_tail + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_pop_regs + mov rax,QWORD PTR[152+r8] - mov r10d,DWORD PTR[4+r11] + mov r10d,DWORD PTR[8+r11] lea r10,QWORD PTR[r10*1+rsi] cmp rbx,r10 jae $L$common_seh_tail mov rax,QWORD PTR[40+rax] +$L$common_pop_regs:: mov rbx,QWORD PTR[((-8))+rax] mov rbp,QWORD PTR[((-16))+rax] mov r12,QWORD PTR[((-24))+rax] @@ -993,7 +1012,8 @@ DB 9,0,0,0 $L$SEH_info_bn_sqr8x_mont:: DB 9,0,0,0 DD imagerel sqr_handler - DD imagerel $L$sqr8x_body,imagerel $L$sqr8x_epilogue + DD imagerel $L$sqr8x_prologue,imagerel $L$sqr8x_body,imagerel $L$sqr8x_epilogue +ALIGN 8 .xdata ENDS END diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont5.asm b/deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont5.asm index fb3c27a0ff..89f45a4915 100644 --- a/deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont5.asm +++ b/deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont5.asm @@ -19,16 +19,15 @@ $L$SEH_begin_bn_mul_mont_gather5:: mov r9,QWORD PTR[48+rsp] + mov r9d,r9d + mov rax,rsp test r9d,7 jnz $L$mul_enter jmp $L$mul4x_enter ALIGN 16 $L$mul_enter:: - mov r9d,r9d - mov rax,rsp movd xmm5,DWORD PTR[56+rsp] - lea r10,QWORD PTR[$L$inc] push rbx push rbp push r12 @@ -36,26 +35,36 @@ $L$mul_enter:: push r14 push r15 - lea r11,QWORD PTR[2+r9] - neg r11 - lea rsp,QWORD PTR[((-264))+r11*8+rsp] - and rsp,-1024 + neg r9 + mov r11,rsp + lea r10,QWORD PTR[((-280))+r9*8+rsp] + neg r9 + and r10,-1024 + - mov QWORD PTR[8+r9*8+rsp],rax -$L$mul_body:: + sub r11,r10 + and r11,-4096 + lea rsp,QWORD PTR[r11*1+r10] + mov r11,QWORD PTR[rsp] + cmp rsp,r10 + ja $L$mul_page_walk + jmp $L$mul_page_walk_done - sub rax,rsp - and rax,-4096 $L$mul_page_walk:: - mov r11,QWORD PTR[rax*1+rsp] - sub rax,4096 -DB 02eh - jnc $L$mul_page_walk + lea rsp,QWORD PTR[((-4096))+rsp] + mov r11,QWORD PTR[rsp] + cmp rsp,r10 + ja $L$mul_page_walk +$L$mul_page_walk_done:: + + lea r10,QWORD PTR[$L$inc] + mov QWORD PTR[8+r9*8+rsp],rax +$L$mul_body:: lea r12,QWORD PTR[128+rdx] movdqa xmm0,XMMWORD PTR[r10] @@ -441,15 +450,16 @@ $L$SEH_begin_bn_mul4x_mont_gather5:: mov r9,QWORD PTR[48+rsp] -$L$mul4x_enter:: DB 067h mov rax,rsp +$L$mul4x_enter:: push rbx push rbp push r12 push r13 push r14 push r15 +$L$mul4x_prologue:: DB 067h shl r9d,3 @@ -466,32 +476,40 @@ DB 067h lea r11,QWORD PTR[((-320))+r9*2+rsp] + mov rbp,rsp sub r11,rdi and r11,4095 cmp r10,r11 jb $L$mul4xsp_alt - sub rsp,r11 - lea rsp,QWORD PTR[((-320))+r9*2+rsp] + sub rbp,r11 + lea rbp,QWORD PTR[((-320))+r9*2+rbp] jmp $L$mul4xsp_done ALIGN 32 $L$mul4xsp_alt:: lea r10,QWORD PTR[((4096-320))+r9*2] - lea rsp,QWORD PTR[((-320))+r9*2+rsp] + lea rbp,QWORD PTR[((-320))+r9*2+rbp] sub r11,r10 mov r10,0 cmovc r11,r10 - sub rsp,r11 + sub rbp,r11 $L$mul4xsp_done:: - and rsp,-64 - mov r11,rax - sub r11,rsp + and rbp,-64 + mov r11,rsp + sub r11,rbp and r11,-4096 + lea rsp,QWORD PTR[rbp*1+r11] + mov r10,QWORD PTR[rsp] + cmp rsp,rbp + ja $L$mul4x_page_walk + jmp $L$mul4x_page_walk_done + $L$mul4x_page_walk:: - mov r10,QWORD PTR[r11*1+rsp] - sub r11,4096 -DB 02eh - jnc $L$mul4x_page_walk + lea rsp,QWORD PTR[((-4096))+rsp] + mov r10,QWORD PTR[rsp] + cmp rsp,rbp + ja $L$mul4x_page_walk +$L$mul4x_page_walk_done:: neg r9 @@ -1065,6 +1083,7 @@ $L$SEH_begin_bn_power5:: push r13 push r14 push r15 +$L$power5_prologue:: shl r9d,3 lea r10d,DWORD PTR[r9*2+r9] @@ -1079,32 +1098,40 @@ $L$SEH_begin_bn_power5:: lea r11,QWORD PTR[((-320))+r9*2+rsp] + mov rbp,rsp sub r11,rdi and r11,4095 cmp r10,r11 jb $L$pwr_sp_alt - sub rsp,r11 - lea rsp,QWORD PTR[((-320))+r9*2+rsp] + sub rbp,r11 + lea rbp,QWORD PTR[((-320))+r9*2+rbp] jmp $L$pwr_sp_done ALIGN 32 $L$pwr_sp_alt:: lea r10,QWORD PTR[((4096-320))+r9*2] - lea rsp,QWORD PTR[((-320))+r9*2+rsp] + lea rbp,QWORD PTR[((-320))+r9*2+rbp] sub r11,r10 mov r10,0 cmovc r11,r10 - sub rsp,r11 + sub rbp,r11 $L$pwr_sp_done:: - and rsp,-64 - mov r11,rax - sub r11,rsp + and rbp,-64 + mov r11,rsp + sub r11,rbp and r11,-4096 + lea rsp,QWORD PTR[rbp*1+r11] + mov r10,QWORD PTR[rsp] + cmp rsp,rbp + ja $L$pwr_page_walk + jmp $L$pwr_page_walk_done + $L$pwr_page_walk:: - mov r10,QWORD PTR[r11*1+rsp] - sub r11,4096 -DB 02eh - jnc $L$pwr_page_walk + lea rsp,QWORD PTR[((-4096))+rsp] + mov r10,QWORD PTR[rsp] + cmp rsp,rbp + ja $L$pwr_page_walk +$L$pwr_page_walk_done:: mov r10,r9 neg r9 @@ -2030,6 +2057,7 @@ DB 067h push r13 push r14 push r15 +$L$from_prologue:: shl r9d,3 lea r10,QWORD PTR[r9*2+r9] @@ -2044,32 +2072,40 @@ DB 067h lea r11,QWORD PTR[((-320))+r9*2+rsp] + mov rbp,rsp sub r11,rdi and r11,4095 cmp r10,r11 jb $L$from_sp_alt - sub rsp,r11 - lea rsp,QWORD PTR[((-320))+r9*2+rsp] + sub rbp,r11 + lea rbp,QWORD PTR[((-320))+r9*2+rbp] jmp $L$from_sp_done ALIGN 32 $L$from_sp_alt:: lea r10,QWORD PTR[((4096-320))+r9*2] - lea rsp,QWORD PTR[((-320))+r9*2+rsp] + lea rbp,QWORD PTR[((-320))+r9*2+rbp] sub r11,r10 mov r10,0 cmovc r11,r10 - sub rsp,r11 + sub rbp,r11 $L$from_sp_done:: - and rsp,-64 - mov r11,rax - sub r11,rsp + and rbp,-64 + mov r11,rsp + sub r11,rbp and r11,-4096 + lea rsp,QWORD PTR[rbp*1+r11] + mov r10,QWORD PTR[rsp] + cmp rsp,rbp + ja $L$from_page_walk + jmp $L$from_page_walk_done + $L$from_page_walk:: - mov r10,QWORD PTR[r11*1+rsp] - sub r11,4096 -DB 02eh - jnc $L$from_page_walk + lea rsp,QWORD PTR[((-4096))+rsp] + mov r10,QWORD PTR[rsp] + cmp rsp,rbp + ja $L$from_page_walk +$L$from_page_walk_done:: mov r10,r9 neg r9 @@ -2383,9 +2419,14 @@ mul_handler PROC PRIVATE cmp rbx,r10 jb $L$common_seh_tail + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_pop_regs + mov rax,QWORD PTR[152+r8] - mov r10d,DWORD PTR[4+r11] + mov r10d,DWORD PTR[8+r11] lea r10,QWORD PTR[r10*1+rsi] cmp rbx,r10 jae $L$common_seh_tail @@ -2397,11 +2438,11 @@ mul_handler PROC PRIVATE mov r10,QWORD PTR[192+r8] mov rax,QWORD PTR[8+r10*8+rax] - jmp $L$body_proceed + jmp $L$common_pop_regs $L$body_40:: mov rax,QWORD PTR[40+rax] -$L$body_proceed:: +$L$common_pop_regs:: mov rbx,QWORD PTR[((-8))+rax] mov rbp,QWORD PTR[((-16))+rax] mov r12,QWORD PTR[((-24))+rax] @@ -2483,22 +2524,22 @@ ALIGN 8 $L$SEH_info_bn_mul_mont_gather5:: DB 9,0,0,0 DD imagerel mul_handler - DD imagerel $L$mul_body,imagerel $L$mul_epilogue + DD imagerel $L$mul_body,imagerel $L$mul_body,imagerel $L$mul_epilogue ALIGN 8 $L$SEH_info_bn_mul4x_mont_gather5:: DB 9,0,0,0 DD imagerel mul_handler - DD imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue + DD imagerel $L$mul4x_prologue,imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue ALIGN 8 $L$SEH_info_bn_power5:: DB 9,0,0,0 DD imagerel mul_handler - DD imagerel $L$power5_body,imagerel $L$power5_epilogue + DD imagerel $L$power5_prologue,imagerel $L$power5_body,imagerel $L$power5_epilogue ALIGN 8 $L$SEH_info_bn_from_mont8x:: DB 9,0,0,0 DD imagerel mul_handler - DD imagerel $L$from_body,imagerel $L$from_epilogue + DD imagerel $L$from_prologue,imagerel $L$from_body,imagerel $L$from_epilogue ALIGN 8 $L$SEH_info_bn_gather5:: DB 001h,00bh,003h,00ah diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/ec/ecp_nistz256-x86_64.asm b/deps/openssl/asm_obsolete/x64-win32-masm/ec/ecp_nistz256-x86_64.asm index ca78bd52cc..c985159a7b 100644 --- a/deps/openssl/asm_obsolete/x64-win32-masm/ec/ecp_nistz256-x86_64.asm +++ b/deps/openssl/asm_obsolete/x64-win32-masm/ec/ecp_nistz256-x86_64.asm @@ -36,6 +36,7 @@ $L$SEH_begin_ecp_nistz256_mul_by_2:: push r13 mov r8,QWORD PTR[rsi] + xor r13,r13 mov r9,QWORD PTR[8+rsi] add r8,r8 mov r10,QWORD PTR[16+rsi] @@ -46,7 +47,7 @@ $L$SEH_begin_ecp_nistz256_mul_by_2:: adc r10,r10 adc r11,r11 mov rdx,r9 - sbb r13,r13 + adc r13,0 sub r8,QWORD PTR[rsi] mov rcx,r10 @@ -54,14 +55,14 @@ $L$SEH_begin_ecp_nistz256_mul_by_2:: sbb r10,QWORD PTR[16+rsi] mov r12,r11 sbb r11,QWORD PTR[24+rsi] - test r13,r13 + sbb r13,0 - cmovz r8,rax - cmovz r9,rdx + cmovc r8,rax + cmovc r9,rdx mov QWORD PTR[rdi],r8 - cmovz r10,rcx + cmovc r10,rcx mov QWORD PTR[8+rdi],r9 - cmovz r11,r12 + cmovc r11,r12 mov QWORD PTR[16+rdi],r10 mov QWORD PTR[24+rdi],r11 @@ -180,12 +181,12 @@ $L$SEH_begin_ecp_nistz256_mul_by_3:: sbb r10,0 mov r12,r11 sbb r11,QWORD PTR[(($L$poly+24))] - test r13,r13 + sbb r13,0 - cmovz r8,rax - cmovz r9,rdx - cmovz r10,rcx - cmovz r11,r12 + cmovc r8,rax + cmovc r9,rdx + cmovc r10,rcx + cmovc r11,r12 xor r13,r13 add r8,QWORD PTR[rsi] @@ -202,14 +203,14 @@ $L$SEH_begin_ecp_nistz256_mul_by_3:: sbb r10,0 mov r12,r11 sbb r11,QWORD PTR[(($L$poly+24))] - test r13,r13 + sbb r13,0 - cmovz r8,rax - cmovz r9,rdx + cmovc r8,rax + cmovc r9,rdx mov QWORD PTR[rdi],r8 - cmovz r10,rcx + cmovc r10,rcx mov QWORD PTR[8+rdi],r9 - cmovz r11,r12 + cmovc r11,r12 mov QWORD PTR[16+rdi],r10 mov QWORD PTR[24+rdi],r11 @@ -260,14 +261,14 @@ $L$SEH_begin_ecp_nistz256_add:: sbb r10,QWORD PTR[16+rsi] mov r12,r11 sbb r11,QWORD PTR[24+rsi] - test r13,r13 + sbb r13,0 - cmovz r8,rax - cmovz r9,rdx + cmovc r8,rax + cmovc r9,rdx mov QWORD PTR[rdi],r8 - cmovz r10,rcx + cmovc r10,rcx mov QWORD PTR[8+rdi],r9 - cmovz r11,r12 + cmovc r11,r12 mov QWORD PTR[16+rdi],r10 mov QWORD PTR[24+rdi],r11 @@ -1167,13 +1168,14 @@ ecp_nistz256_avx2_select_w7 ENDP ALIGN 32 __ecp_nistz256_add_toq PROC PRIVATE + xor r11,r11 add r12,QWORD PTR[rbx] adc r13,QWORD PTR[8+rbx] mov rax,r12 adc r8,QWORD PTR[16+rbx] adc r9,QWORD PTR[24+rbx] mov rbp,r13 - sbb r11,r11 + adc r11,0 sub r12,-1 mov rcx,r8 @@ -1181,14 +1183,14 @@ __ecp_nistz256_add_toq PROC PRIVATE sbb r8,0 mov r10,r9 sbb r9,r15 - test r11,r11 + sbb r11,0 - cmovz r12,rax - cmovz r13,rbp + cmovc r12,rax + cmovc r13,rbp mov QWORD PTR[rdi],r12 - cmovz r8,rcx + cmovc r8,rcx mov QWORD PTR[8+rdi],r13 - cmovz r9,r10 + cmovc r9,r10 mov QWORD PTR[16+rdi],r8 mov QWORD PTR[24+rdi],r9 @@ -1256,13 +1258,14 @@ __ecp_nistz256_subq ENDP ALIGN 32 __ecp_nistz256_mul_by_2q PROC PRIVATE + xor r11,r11 add r12,r12 adc r13,r13 mov rax,r12 adc r8,r8 adc r9,r9 mov rbp,r13 - sbb r11,r11 + adc r11,0 sub r12,-1 mov rcx,r8 @@ -1270,14 +1273,14 @@ __ecp_nistz256_mul_by_2q PROC PRIVATE sbb r8,0 mov r10,r9 sbb r9,r15 - test r11,r11 + sbb r11,0 - cmovz r12,rax - cmovz r13,rbp + cmovc r12,rax + cmovc r13,rbp mov QWORD PTR[rdi],r12 - cmovz r8,rcx + cmovc r8,rcx mov QWORD PTR[8+rdi],r13 - cmovz r9,r10 + cmovc r9,r10 mov QWORD PTR[16+rdi],r8 mov QWORD PTR[24+rdi],r9 @@ -1527,16 +1530,14 @@ $L$SEH_begin_ecp_nistz256_point_add:: mov rsi,rdx movdqa XMMWORD PTR[384+rsp],xmm0 movdqa XMMWORD PTR[(384+16)+rsp],xmm1 - por xmm1,xmm0 movdqa XMMWORD PTR[416+rsp],xmm2 movdqa XMMWORD PTR[(416+16)+rsp],xmm3 - por xmm3,xmm2 movdqa XMMWORD PTR[448+rsp],xmm4 movdqa XMMWORD PTR[(448+16)+rsp],xmm5 - por xmm3,xmm1 + por xmm5,xmm4 movdqu xmm0,XMMWORD PTR[rsi] - pshufd xmm5,xmm3,1h + pshufd xmm3,xmm5,1h movdqu xmm1,XMMWORD PTR[16+rsi] movdqu xmm2,XMMWORD PTR[32+rsi] por xmm5,xmm3 @@ -1548,14 +1549,14 @@ $L$SEH_begin_ecp_nistz256_point_add:: movdqa XMMWORD PTR[480+rsp],xmm0 pshufd xmm4,xmm5,01eh movdqa XMMWORD PTR[(480+16)+rsp],xmm1 - por xmm1,xmm0 -DB 102,72,15,110,199 + movdqu xmm0,XMMWORD PTR[64+rsi] + movdqu xmm1,XMMWORD PTR[80+rsi] movdqa XMMWORD PTR[512+rsp],xmm2 movdqa XMMWORD PTR[(512+16)+rsp],xmm3 - por xmm3,xmm2 por xmm5,xmm4 pxor xmm4,xmm4 - por xmm3,xmm1 + por xmm1,xmm0 +DB 102,72,15,110,199 lea rsi,QWORD PTR[((64-0))+rsi] mov QWORD PTR[((544+0))+rsp],rax @@ -1566,8 +1567,8 @@ DB 102,72,15,110,199 call __ecp_nistz256_sqr_montq pcmpeqd xmm5,xmm4 - pshufd xmm4,xmm3,1h - por xmm4,xmm3 + pshufd xmm4,xmm1,1h + por xmm4,xmm1 pshufd xmm5,xmm5,0 pshufd xmm3,xmm4,01eh por xmm4,xmm3 @@ -1750,6 +1751,7 @@ $L$add_proceedq:: + xor r11,r11 add r12,r12 lea rsi,QWORD PTR[96+rsp] adc r13,r13 @@ -1757,7 +1759,7 @@ $L$add_proceedq:: adc r8,r8 adc r9,r9 mov rbp,r13 - sbb r11,r11 + adc r11,0 sub r12,-1 mov rcx,r8 @@ -1765,15 +1767,15 @@ $L$add_proceedq:: sbb r8,0 mov r10,r9 sbb r9,r15 - test r11,r11 + sbb r11,0 - cmovz r12,rax + cmovc r12,rax mov rax,QWORD PTR[rsi] - cmovz r13,rbp + cmovc r13,rbp mov rbp,QWORD PTR[8+rsi] - cmovz r8,rcx + cmovc r8,rcx mov rcx,QWORD PTR[16+rsi] - cmovz r9,r10 + cmovc r9,r10 mov r10,QWORD PTR[24+rsi] call __ecp_nistz256_subq @@ -1939,16 +1941,14 @@ $L$SEH_begin_ecp_nistz256_point_add_affine:: mov r8,QWORD PTR[((64+24))+rsi] movdqa XMMWORD PTR[320+rsp],xmm0 movdqa XMMWORD PTR[(320+16)+rsp],xmm1 - por xmm1,xmm0 movdqa XMMWORD PTR[352+rsp],xmm2 movdqa XMMWORD PTR[(352+16)+rsp],xmm3 - por xmm3,xmm2 movdqa XMMWORD PTR[384+rsp],xmm4 movdqa XMMWORD PTR[(384+16)+rsp],xmm5 - por xmm3,xmm1 + por xmm5,xmm4 movdqu xmm0,XMMWORD PTR[rbx] - pshufd xmm5,xmm3,1h + pshufd xmm3,xmm5,1h movdqu xmm1,XMMWORD PTR[16+rbx] movdqu xmm2,XMMWORD PTR[32+rbx] por xmm5,xmm3 @@ -2066,6 +2066,7 @@ DB 102,72,15,110,199 + xor r11,r11 add r12,r12 lea rsi,QWORD PTR[192+rsp] adc r13,r13 @@ -2073,7 +2074,7 @@ DB 102,72,15,110,199 adc r8,r8 adc r9,r9 mov rbp,r13 - sbb r11,r11 + adc r11,0 sub r12,-1 mov rcx,r8 @@ -2081,15 +2082,15 @@ DB 102,72,15,110,199 sbb r8,0 mov r10,r9 sbb r9,r15 - test r11,r11 + sbb r11,0 - cmovz r12,rax + cmovc r12,rax mov rax,QWORD PTR[rsi] - cmovz r13,rbp + cmovc r13,rbp mov rbp,QWORD PTR[8+rsi] - cmovz r8,rcx + cmovc r8,rcx mov rcx,QWORD PTR[16+rsi] - cmovz r9,r10 + cmovc r9,r10 mov r10,QWORD PTR[24+rsi] call __ecp_nistz256_subq diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/sha/sha1-x86_64.asm b/deps/openssl/asm_obsolete/x64-win32-masm/sha/sha1-x86_64.asm index 07b7882a96..7528c8a8a6 100644 --- a/deps/openssl/asm_obsolete/x64-win32-masm/sha/sha1-x86_64.asm +++ b/deps/openssl/asm_obsolete/x64-win32-masm/sha/sha1-x86_64.asm @@ -1283,9 +1283,9 @@ DB 102,15,56,0,251 ALIGN 16 $L$oop_shaext:: dec rdx - lea rax,QWORD PTR[64+rsi] + lea r8,QWORD PTR[64+rsi] paddd xmm1,xmm4 - cmovne rsi,rax + cmovne rsi,r8 movdqa xmm8,xmm0 DB 15,56,201,229 movdqa xmm2,xmm0 diff --git a/deps/openssl/asm_obsolete/x86-elf-gas/bn/x86-mont.s b/deps/openssl/asm_obsolete/x86-elf-gas/bn/x86-mont.s index 2f7211d92e..b683577231 100644 --- a/deps/openssl/asm_obsolete/x86-elf-gas/bn/x86-mont.s +++ b/deps/openssl/asm_obsolete/x86-elf-gas/bn/x86-mont.s @@ -15,44 +15,51 @@ bn_mul_mont: jl .L000just_leave leal 20(%esp),%esi leal 24(%esp),%edx - movl %esp,%ebp addl $2,%edi negl %edi - leal -32(%esp,%edi,4),%esp + leal -32(%esp,%edi,4),%ebp negl %edi - movl %esp,%eax + movl %ebp,%eax subl %edx,%eax andl $2047,%eax - subl %eax,%esp - xorl %esp,%edx + subl %eax,%ebp + xorl %ebp,%edx andl $2048,%edx xorl $2048,%edx - subl %edx,%esp - andl $-64,%esp - movl %ebp,%eax - subl %esp,%eax + subl %edx,%ebp + andl $-64,%ebp + movl %esp,%eax + subl %ebp,%eax andl $-4096,%eax + movl %esp,%edx + leal (%ebp,%eax,1),%esp + movl (%esp),%eax + cmpl %ebp,%esp + ja .L001page_walk + jmp .L002page_walk_done +.align 16 .L001page_walk: - movl (%esp,%eax,1),%edx - subl $4096,%eax -.byte 46 - jnc .L001page_walk + leal -4096(%esp),%esp + movl (%esp),%eax + cmpl %ebp,%esp + ja .L001page_walk +.L002page_walk_done: movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx - movl 12(%esi),%edx + movl 12(%esi),%ebp movl 16(%esi),%esi movl (%esi),%esi movl %eax,4(%esp) movl %ebx,8(%esp) movl %ecx,12(%esp) - movl %edx,16(%esp) + movl %ebp,16(%esp) movl %esi,20(%esp) leal -3(%edi),%ebx - movl %ebp,24(%esp) + movl %edx,24(%esp) leal OPENSSL_ia32cap_P,%eax btl $26,(%eax) - jnc .L002non_sse2 + jnc .L003non_sse2 movl $-1,%eax movd %eax,%mm7 movl 8(%esp),%esi @@ -76,7 +83,7 @@ bn_mul_mont: psrlq $32,%mm3 incl %ecx .align 16 -.L0031st: +.L0041st: pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 @@ -91,7 +98,7 @@ bn_mul_mont: psrlq $32,%mm3 leal 1(%ecx),%ecx cmpl %ebx,%ecx - jl .L0031st + jl .L0041st pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 @@ -105,7 +112,7 @@ bn_mul_mont: paddq %mm2,%mm3 movq %mm3,32(%esp,%ebx,4) incl %edx -.L004outer: +.L005outer: xorl %ecx,%ecx movd (%edi,%edx,4),%mm4 movd (%esi),%mm5 @@ -127,7 +134,7 @@ bn_mul_mont: paddq %mm6,%mm2 incl %ecx decl %ebx -.L005inner: +.L006inner: pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 @@ -144,7 +151,7 @@ bn_mul_mont: paddq %mm6,%mm2 decl %ebx leal 1(%ecx),%ecx - jnz .L005inner + jnz .L006inner movl %ecx,%ebx pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 @@ -162,11 +169,11 @@ bn_mul_mont: movq %mm3,32(%esp,%ebx,4) leal 1(%edx),%edx cmpl %ebx,%edx - jle .L004outer + jle .L005outer emms - jmp .L006common_tail + jmp .L007common_tail .align 16 -.L002non_sse2: +.L003non_sse2: movl 8(%esp),%esi leal 1(%ebx),%ebp movl 12(%esp),%edi @@ -177,12 +184,12 @@ bn_mul_mont: leal 4(%edi,%ebx,4),%eax orl %edx,%ebp movl (%edi),%edi - jz .L007bn_sqr_mont + jz .L008bn_sqr_mont movl %eax,28(%esp) movl (%esi),%eax xorl %edx,%edx .align 16 -.L008mull: +.L009mull: movl %edx,%ebp mull %edi addl %eax,%ebp @@ -191,7 +198,7 @@ bn_mul_mont: movl (%esi,%ecx,4),%eax cmpl %ebx,%ecx movl %ebp,28(%esp,%ecx,4) - jl .L008mull + jl .L009mull movl %edx,%ebp mull %edi movl 20(%esp),%edi @@ -209,9 +216,9 @@ bn_mul_mont: movl 4(%esi),%eax adcl $0,%edx incl %ecx - jmp .L0092ndmadd + jmp .L0102ndmadd .align 16 -.L0101stmadd: +.L0111stmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -222,7 +229,7 @@ bn_mul_mont: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,28(%esp,%ecx,4) - jl .L0101stmadd + jl .L0111stmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%eax @@ -245,7 +252,7 @@ bn_mul_mont: adcl $0,%edx movl $1,%ecx .align 16 -.L0092ndmadd: +.L0102ndmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -256,7 +263,7 @@ bn_mul_mont: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,24(%esp,%ecx,4) - jl .L0092ndmadd + jl .L0102ndmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%ebp @@ -272,16 +279,16 @@ bn_mul_mont: movl %edx,32(%esp,%ebx,4) cmpl 28(%esp),%ecx movl %eax,36(%esp,%ebx,4) - je .L006common_tail + je .L007common_tail movl (%ecx),%edi movl 8(%esp),%esi movl %ecx,12(%esp) xorl %ecx,%ecx xorl %edx,%edx movl (%esi),%eax - jmp .L0101stmadd + jmp .L0111stmadd .align 16 -.L007bn_sqr_mont: +.L008bn_sqr_mont: movl %ebx,(%esp) movl %ecx,12(%esp) movl %edi,%eax @@ -292,7 +299,7 @@ bn_mul_mont: andl $1,%ebx incl %ecx .align 16 -.L011sqr: +.L012sqr: movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -304,7 +311,7 @@ bn_mul_mont: cmpl (%esp),%ecx movl %eax,%ebx movl %ebp,28(%esp,%ecx,4) - jl .L011sqr + jl .L012sqr movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -328,7 +335,7 @@ bn_mul_mont: movl 4(%esi),%eax movl $1,%ecx .align 16 -.L0123rdmadd: +.L0133rdmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -347,7 +354,7 @@ bn_mul_mont: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,24(%esp,%ecx,4) - jl .L0123rdmadd + jl .L0133rdmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%ebp @@ -363,7 +370,7 @@ bn_mul_mont: movl %edx,32(%esp,%ebx,4) cmpl %ebx,%ecx movl %eax,36(%esp,%ebx,4) - je .L006common_tail + je .L007common_tail movl 4(%esi,%ecx,4),%edi leal 1(%ecx),%ecx movl %edi,%eax @@ -375,12 +382,12 @@ bn_mul_mont: xorl %ebp,%ebp cmpl %ebx,%ecx leal 1(%ecx),%ecx - je .L013sqrlast + je .L014sqrlast movl %edx,%ebx shrl $1,%edx andl $1,%ebx .align 16 -.L014sqradd: +.L015sqradd: movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -396,13 +403,13 @@ bn_mul_mont: cmpl (%esp),%ecx movl %ebp,28(%esp,%ecx,4) movl %eax,%ebx - jle .L014sqradd + jle .L015sqradd movl %edx,%ebp addl %edx,%edx shrl $31,%ebp addl %ebx,%edx adcl $0,%ebp -.L013sqrlast: +.L014sqrlast: movl 20(%esp),%edi movl 16(%esp),%esi imull 32(%esp),%edi @@ -417,9 +424,9 @@ bn_mul_mont: adcl $0,%edx movl $1,%ecx movl 4(%esi),%eax - jmp .L0123rdmadd + jmp .L0133rdmadd .align 16 -.L006common_tail: +.L007common_tail: movl 16(%esp),%ebp movl 4(%esp),%edi leal 32(%esp),%esi @@ -427,13 +434,13 @@ bn_mul_mont: movl %ebx,%ecx xorl %edx,%edx .align 16 -.L015sub: +.L016sub: sbbl (%ebp,%edx,4),%eax movl %eax,(%edi,%edx,4) decl %ecx movl 4(%esi,%edx,4),%eax leal 1(%edx),%edx - jge .L015sub + jge .L016sub sbbl $0,%eax andl %eax,%esi notl %eax @@ -441,12 +448,12 @@ bn_mul_mont: andl %eax,%ebp orl %ebp,%esi .align 16 -.L016copy: +.L017copy: movl (%esi,%ebx,4),%eax movl %eax,(%edi,%ebx,4) movl %ecx,32(%esp,%ebx,4) decl %ebx - jge .L016copy + jge .L017copy movl 24(%esp),%esp movl $1,%eax .L000just_leave: diff --git a/deps/openssl/asm_obsolete/x86-macosx-gas/bn/x86-mont.s b/deps/openssl/asm_obsolete/x86-macosx-gas/bn/x86-mont.s index accec0e519..7bc58d24e0 100644 --- a/deps/openssl/asm_obsolete/x86-macosx-gas/bn/x86-mont.s +++ b/deps/openssl/asm_obsolete/x86-macosx-gas/bn/x86-mont.s @@ -14,47 +14,54 @@ L_bn_mul_mont_begin: jl L000just_leave leal 20(%esp),%esi leal 24(%esp),%edx - movl %esp,%ebp addl $2,%edi negl %edi - leal -32(%esp,%edi,4),%esp + leal -32(%esp,%edi,4),%ebp negl %edi - movl %esp,%eax + movl %ebp,%eax subl %edx,%eax andl $2047,%eax - subl %eax,%esp - xorl %esp,%edx + subl %eax,%ebp + xorl %ebp,%edx andl $2048,%edx xorl $2048,%edx - subl %edx,%esp - andl $-64,%esp - movl %ebp,%eax - subl %esp,%eax + subl %edx,%ebp + andl $-64,%ebp + movl %esp,%eax + subl %ebp,%eax andl $-4096,%eax + movl %esp,%edx + leal (%ebp,%eax,1),%esp + movl (%esp),%eax + cmpl %ebp,%esp + ja L001page_walk + jmp L002page_walk_done +.align 4,0x90 L001page_walk: - movl (%esp,%eax,1),%edx - subl $4096,%eax -.byte 46 - jnc L001page_walk + leal -4096(%esp),%esp + movl (%esp),%eax + cmpl %ebp,%esp + ja L001page_walk +L002page_walk_done: movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx - movl 12(%esi),%edx + movl 12(%esi),%ebp movl 16(%esi),%esi movl (%esi),%esi movl %eax,4(%esp) movl %ebx,8(%esp) movl %ecx,12(%esp) - movl %edx,16(%esp) + movl %ebp,16(%esp) movl %esi,20(%esp) leal -3(%edi),%ebx - movl %ebp,24(%esp) - call L002PIC_me_up -L002PIC_me_up: + movl %edx,24(%esp) + call L003PIC_me_up +L003PIC_me_up: popl %eax - movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L002PIC_me_up(%eax),%eax + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L003PIC_me_up(%eax),%eax btl $26,(%eax) - jnc L003non_sse2 + jnc L004non_sse2 movl $-1,%eax movd %eax,%mm7 movl 8(%esp),%esi @@ -78,7 +85,7 @@ L002PIC_me_up: psrlq $32,%mm3 incl %ecx .align 4,0x90 -L0041st: +L0051st: pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 @@ -93,7 +100,7 @@ L0041st: psrlq $32,%mm3 leal 1(%ecx),%ecx cmpl %ebx,%ecx - jl L0041st + jl L0051st pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 @@ -107,7 +114,7 @@ L0041st: paddq %mm2,%mm3 movq %mm3,32(%esp,%ebx,4) incl %edx -L005outer: +L006outer: xorl %ecx,%ecx movd (%edi,%edx,4),%mm4 movd (%esi),%mm5 @@ -129,7 +136,7 @@ L005outer: paddq %mm6,%mm2 incl %ecx decl %ebx -L006inner: +L007inner: pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 @@ -146,7 +153,7 @@ L006inner: paddq %mm6,%mm2 decl %ebx leal 1(%ecx),%ecx - jnz L006inner + jnz L007inner movl %ecx,%ebx pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 @@ -164,11 +171,11 @@ L006inner: movq %mm3,32(%esp,%ebx,4) leal 1(%edx),%edx cmpl %ebx,%edx - jle L005outer + jle L006outer emms - jmp L007common_tail + jmp L008common_tail .align 4,0x90 -L003non_sse2: +L004non_sse2: movl 8(%esp),%esi leal 1(%ebx),%ebp movl 12(%esp),%edi @@ -179,12 +186,12 @@ L003non_sse2: leal 4(%edi,%ebx,4),%eax orl %edx,%ebp movl (%edi),%edi - jz L008bn_sqr_mont + jz L009bn_sqr_mont movl %eax,28(%esp) movl (%esi),%eax xorl %edx,%edx .align 4,0x90 -L009mull: +L010mull: movl %edx,%ebp mull %edi addl %eax,%ebp @@ -193,7 +200,7 @@ L009mull: movl (%esi,%ecx,4),%eax cmpl %ebx,%ecx movl %ebp,28(%esp,%ecx,4) - jl L009mull + jl L010mull movl %edx,%ebp mull %edi movl 20(%esp),%edi @@ -211,9 +218,9 @@ L009mull: movl 4(%esi),%eax adcl $0,%edx incl %ecx - jmp L0102ndmadd + jmp L0112ndmadd .align 4,0x90 -L0111stmadd: +L0121stmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -224,7 +231,7 @@ L0111stmadd: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,28(%esp,%ecx,4) - jl L0111stmadd + jl L0121stmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%eax @@ -247,7 +254,7 @@ L0111stmadd: adcl $0,%edx movl $1,%ecx .align 4,0x90 -L0102ndmadd: +L0112ndmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -258,7 +265,7 @@ L0102ndmadd: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,24(%esp,%ecx,4) - jl L0102ndmadd + jl L0112ndmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%ebp @@ -274,16 +281,16 @@ L0102ndmadd: movl %edx,32(%esp,%ebx,4) cmpl 28(%esp),%ecx movl %eax,36(%esp,%ebx,4) - je L007common_tail + je L008common_tail movl (%ecx),%edi movl 8(%esp),%esi movl %ecx,12(%esp) xorl %ecx,%ecx xorl %edx,%edx movl (%esi),%eax - jmp L0111stmadd + jmp L0121stmadd .align 4,0x90 -L008bn_sqr_mont: +L009bn_sqr_mont: movl %ebx,(%esp) movl %ecx,12(%esp) movl %edi,%eax @@ -294,7 +301,7 @@ L008bn_sqr_mont: andl $1,%ebx incl %ecx .align 4,0x90 -L012sqr: +L013sqr: movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -306,7 +313,7 @@ L012sqr: cmpl (%esp),%ecx movl %eax,%ebx movl %ebp,28(%esp,%ecx,4) - jl L012sqr + jl L013sqr movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -330,7 +337,7 @@ L012sqr: movl 4(%esi),%eax movl $1,%ecx .align 4,0x90 -L0133rdmadd: +L0143rdmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -349,7 +356,7 @@ L0133rdmadd: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,24(%esp,%ecx,4) - jl L0133rdmadd + jl L0143rdmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%ebp @@ -365,7 +372,7 @@ L0133rdmadd: movl %edx,32(%esp,%ebx,4) cmpl %ebx,%ecx movl %eax,36(%esp,%ebx,4) - je L007common_tail + je L008common_tail movl 4(%esi,%ecx,4),%edi leal 1(%ecx),%ecx movl %edi,%eax @@ -377,12 +384,12 @@ L0133rdmadd: xorl %ebp,%ebp cmpl %ebx,%ecx leal 1(%ecx),%ecx - je L014sqrlast + je L015sqrlast movl %edx,%ebx shrl $1,%edx andl $1,%ebx .align 4,0x90 -L015sqradd: +L016sqradd: movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -398,13 +405,13 @@ L015sqradd: cmpl (%esp),%ecx movl %ebp,28(%esp,%ecx,4) movl %eax,%ebx - jle L015sqradd + jle L016sqradd movl %edx,%ebp addl %edx,%edx shrl $31,%ebp addl %ebx,%edx adcl $0,%ebp -L014sqrlast: +L015sqrlast: movl 20(%esp),%edi movl 16(%esp),%esi imull 32(%esp),%edi @@ -419,9 +426,9 @@ L014sqrlast: adcl $0,%edx movl $1,%ecx movl 4(%esi),%eax - jmp L0133rdmadd + jmp L0143rdmadd .align 4,0x90 -L007common_tail: +L008common_tail: movl 16(%esp),%ebp movl 4(%esp),%edi leal 32(%esp),%esi @@ -429,13 +436,13 @@ L007common_tail: movl %ebx,%ecx xorl %edx,%edx .align 4,0x90 -L016sub: +L017sub: sbbl (%ebp,%edx,4),%eax movl %eax,(%edi,%edx,4) decl %ecx movl 4(%esi,%edx,4),%eax leal 1(%edx),%edx - jge L016sub + jge L017sub sbbl $0,%eax andl %eax,%esi notl %eax @@ -443,12 +450,12 @@ L016sub: andl %eax,%ebp orl %ebp,%esi .align 4,0x90 -L017copy: +L018copy: movl (%esi,%ebx,4),%eax movl %eax,(%edi,%ebx,4) movl %ecx,32(%esp,%ebx,4) decl %ebx - jge L017copy + jge L018copy movl 24(%esp),%esp movl $1,%eax L000just_leave: diff --git a/deps/openssl/asm_obsolete/x86-win32-masm/bn/x86-mont.asm b/deps/openssl/asm_obsolete/x86-win32-masm/bn/x86-mont.asm index 4987f6fe91..f026dae738 100644 --- a/deps/openssl/asm_obsolete/x86-win32-masm/bn/x86-mont.asm +++ b/deps/openssl/asm_obsolete/x86-win32-masm/bn/x86-mont.asm @@ -31,44 +31,51 @@ $L_bn_mul_mont_begin:: jl $L000just_leave lea esi,DWORD PTR 20[esp] lea edx,DWORD PTR 24[esp] - mov ebp,esp add edi,2 neg edi - lea esp,DWORD PTR [edi*4+esp-32] + lea ebp,DWORD PTR [edi*4+esp-32] neg edi - mov eax,esp + mov eax,ebp sub eax,edx and eax,2047 - sub esp,eax - xor edx,esp + sub ebp,eax + xor edx,ebp and edx,2048 xor edx,2048 - sub esp,edx - and esp,-64 - mov eax,ebp - sub eax,esp + sub ebp,edx + and ebp,-64 + mov eax,esp + sub eax,ebp and eax,-4096 + mov edx,esp + lea esp,DWORD PTR [eax*1+ebp] + mov eax,DWORD PTR [esp] + cmp esp,ebp + ja $L001page_walk + jmp $L002page_walk_done +ALIGN 16 $L001page_walk: - mov edx,DWORD PTR [eax*1+esp] - sub eax,4096 -DB 46 - jnc $L001page_walk + lea esp,DWORD PTR [esp-4096] + mov eax,DWORD PTR [esp] + cmp esp,ebp + ja $L001page_walk +$L002page_walk_done: mov eax,DWORD PTR [esi] mov ebx,DWORD PTR 4[esi] mov ecx,DWORD PTR 8[esi] - mov edx,DWORD PTR 12[esi] + mov ebp,DWORD PTR 12[esi] mov esi,DWORD PTR 16[esi] mov esi,DWORD PTR [esi] mov DWORD PTR 4[esp],eax mov DWORD PTR 8[esp],ebx mov DWORD PTR 12[esp],ecx - mov DWORD PTR 16[esp],edx + mov DWORD PTR 16[esp],ebp mov DWORD PTR 20[esp],esi lea ebx,DWORD PTR [edi-3] - mov DWORD PTR 24[esp],ebp + mov DWORD PTR 24[esp],edx lea eax,DWORD PTR _OPENSSL_ia32cap_P bt DWORD PTR [eax],26 - jnc $L002non_sse2 + jnc $L003non_sse2 mov eax,-1 movd mm7,eax mov esi,DWORD PTR 8[esp] @@ -92,7 +99,7 @@ DB 46 psrlq mm3,32 inc ecx ALIGN 16 -$L0031st: +$L0041st: pmuludq mm0,mm4 pmuludq mm1,mm5 paddq mm2,mm0 @@ -107,7 +114,7 @@ $L0031st: psrlq mm3,32 lea ecx,DWORD PTR 1[ecx] cmp ecx,ebx - jl $L0031st + jl $L0041st pmuludq mm0,mm4 pmuludq mm1,mm5 paddq mm2,mm0 @@ -121,7 +128,7 @@ $L0031st: paddq mm3,mm2 movq QWORD PTR 32[ebx*4+esp],mm3 inc edx -$L004outer: +$L005outer: xor ecx,ecx movd mm4,DWORD PTR [edx*4+edi] movd mm5,DWORD PTR [esi] @@ -143,7 +150,7 @@ $L004outer: paddq mm2,mm6 inc ecx dec ebx -$L005inner: +$L006inner: pmuludq mm0,mm4 pmuludq mm1,mm5 paddq mm2,mm0 @@ -160,7 +167,7 @@ $L005inner: paddq mm2,mm6 dec ebx lea ecx,DWORD PTR 1[ecx] - jnz $L005inner + jnz $L006inner mov ebx,ecx pmuludq mm0,mm4 pmuludq mm1,mm5 @@ -178,11 +185,11 @@ $L005inner: movq QWORD PTR 32[ebx*4+esp],mm3 lea edx,DWORD PTR 1[edx] cmp edx,ebx - jle $L004outer + jle $L005outer emms - jmp $L006common_tail + jmp $L007common_tail ALIGN 16 -$L002non_sse2: +$L003non_sse2: mov esi,DWORD PTR 8[esp] lea ebp,DWORD PTR 1[ebx] mov edi,DWORD PTR 12[esp] @@ -193,12 +200,12 @@ $L002non_sse2: lea eax,DWORD PTR 4[ebx*4+edi] or ebp,edx mov edi,DWORD PTR [edi] - jz $L007bn_sqr_mont + jz $L008bn_sqr_mont mov DWORD PTR 28[esp],eax mov eax,DWORD PTR [esi] xor edx,edx ALIGN 16 -$L008mull: +$L009mull: mov ebp,edx mul edi add ebp,eax @@ -207,7 +214,7 @@ $L008mull: mov eax,DWORD PTR [ecx*4+esi] cmp ecx,ebx mov DWORD PTR 28[ecx*4+esp],ebp - jl $L008mull + jl $L009mull mov ebp,edx mul edi mov edi,DWORD PTR 20[esp] @@ -225,9 +232,9 @@ $L008mull: mov eax,DWORD PTR 4[esi] adc edx,0 inc ecx - jmp $L0092ndmadd + jmp $L0102ndmadd ALIGN 16 -$L0101stmadd: +$L0111stmadd: mov ebp,edx mul edi add ebp,DWORD PTR 32[ecx*4+esp] @@ -238,7 +245,7 @@ $L0101stmadd: adc edx,0 cmp ecx,ebx mov DWORD PTR 28[ecx*4+esp],ebp - jl $L0101stmadd + jl $L0111stmadd mov ebp,edx mul edi add eax,DWORD PTR 32[ebx*4+esp] @@ -261,7 +268,7 @@ $L0101stmadd: adc edx,0 mov ecx,1 ALIGN 16 -$L0092ndmadd: +$L0102ndmadd: mov ebp,edx mul edi add ebp,DWORD PTR 32[ecx*4+esp] @@ -272,7 +279,7 @@ $L0092ndmadd: adc edx,0 cmp ecx,ebx mov DWORD PTR 24[ecx*4+esp],ebp - jl $L0092ndmadd + jl $L0102ndmadd mov ebp,edx mul edi add ebp,DWORD PTR 32[ebx*4+esp] @@ -288,16 +295,16 @@ $L0092ndmadd: mov DWORD PTR 32[ebx*4+esp],edx cmp ecx,DWORD PTR 28[esp] mov DWORD PTR 36[ebx*4+esp],eax - je $L006common_tail + je $L007common_tail mov edi,DWORD PTR [ecx] mov esi,DWORD PTR 8[esp] mov DWORD PTR 12[esp],ecx xor ecx,ecx xor edx,edx mov eax,DWORD PTR [esi] - jmp $L0101stmadd + jmp $L0111stmadd ALIGN 16 -$L007bn_sqr_mont: +$L008bn_sqr_mont: mov DWORD PTR [esp],ebx mov DWORD PTR 12[esp],ecx mov eax,edi @@ -308,7 +315,7 @@ $L007bn_sqr_mont: and ebx,1 inc ecx ALIGN 16 -$L011sqr: +$L012sqr: mov eax,DWORD PTR [ecx*4+esi] mov ebp,edx mul edi @@ -320,7 +327,7 @@ $L011sqr: cmp ecx,DWORD PTR [esp] mov ebx,eax mov DWORD PTR 28[ecx*4+esp],ebp - jl $L011sqr + jl $L012sqr mov eax,DWORD PTR [ecx*4+esi] mov ebp,edx mul edi @@ -344,7 +351,7 @@ $L011sqr: mov eax,DWORD PTR 4[esi] mov ecx,1 ALIGN 16 -$L0123rdmadd: +$L0133rdmadd: mov ebp,edx mul edi add ebp,DWORD PTR 32[ecx*4+esp] @@ -363,7 +370,7 @@ $L0123rdmadd: adc edx,0 cmp ecx,ebx mov DWORD PTR 24[ecx*4+esp],ebp - jl $L0123rdmadd + jl $L0133rdmadd mov ebp,edx mul edi add ebp,DWORD PTR 32[ebx*4+esp] @@ -379,7 +386,7 @@ $L0123rdmadd: mov DWORD PTR 32[ebx*4+esp],edx cmp ecx,ebx mov DWORD PTR 36[ebx*4+esp],eax - je $L006common_tail + je $L007common_tail mov edi,DWORD PTR 4[ecx*4+esi] lea ecx,DWORD PTR 1[ecx] mov eax,edi @@ -391,12 +398,12 @@ $L0123rdmadd: xor ebp,ebp cmp ecx,ebx lea ecx,DWORD PTR 1[ecx] - je $L013sqrlast + je $L014sqrlast mov ebx,edx shr edx,1 and ebx,1 ALIGN 16 -$L014sqradd: +$L015sqradd: mov eax,DWORD PTR [ecx*4+esi] mov ebp,edx mul edi @@ -412,13 +419,13 @@ $L014sqradd: cmp ecx,DWORD PTR [esp] mov DWORD PTR 28[ecx*4+esp],ebp mov ebx,eax - jle $L014sqradd + jle $L015sqradd mov ebp,edx add edx,edx shr ebp,31 add edx,ebx adc ebp,0 -$L013sqrlast: +$L014sqrlast: mov edi,DWORD PTR 20[esp] mov esi,DWORD PTR 16[esp] imul edi,DWORD PTR 32[esp] @@ -433,9 +440,9 @@ $L013sqrlast: adc edx,0 mov ecx,1 mov eax,DWORD PTR 4[esi] - jmp $L0123rdmadd + jmp $L0133rdmadd ALIGN 16 -$L006common_tail: +$L007common_tail: mov ebp,DWORD PTR 16[esp] mov edi,DWORD PTR 4[esp] lea esi,DWORD PTR 32[esp] @@ -443,13 +450,13 @@ $L006common_tail: mov ecx,ebx xor edx,edx ALIGN 16 -$L015sub: +$L016sub: sbb eax,DWORD PTR [edx*4+ebp] mov DWORD PTR [edx*4+edi],eax dec ecx mov eax,DWORD PTR 4[edx*4+esi] lea edx,DWORD PTR 1[edx] - jge $L015sub + jge $L016sub sbb eax,0 and esi,eax not eax @@ -457,12 +464,12 @@ $L015sub: and ebp,eax or esi,ebp ALIGN 16 -$L016copy: +$L017copy: mov eax,DWORD PTR [ebx*4+esi] mov DWORD PTR [ebx*4+edi],eax mov DWORD PTR 32[ebx*4+esp],ecx dec ebx - jge $L016copy + jge $L017copy mov esp,DWORD PTR 24[esp] mov eax,1 $L000just_leave: -- cgit v1.2.3