summaryrefslogtreecommitdiff
path: root/deps/openssl
diff options
context:
space:
mode:
authorShigeki Ohtsu <ohtsu@ohtsu.org>2016-09-22 21:07:52 +0900
committerShigeki Ohtsu <ohtsu@ohtsu.org>2016-09-22 23:06:49 +0900
commitc17a1fedd8cea4b0c4c265adfcb9048b0918788b (patch)
tree7f561b9f9b986027d4746585d3718390ab474fef /deps/openssl
parent6a485bfa7511d8b11402788016881c45b4e6a2e0 (diff)
downloadandroid-node-v8-c17a1fedd8cea4b0c4c265adfcb9048b0918788b.tar.gz
android-node-v8-c17a1fedd8cea4b0c4c265adfcb9048b0918788b.tar.bz2
android-node-v8-c17a1fedd8cea4b0c4c265adfcb9048b0918788b.zip
deps: update openssl asm and asm_obsolete files
Regenerate asm files with Makefile and CC=gcc and ASM=gcc where gcc-5.4.0. Also asm files in asm_obsolete dir to support old compiler and assembler are regenerated without CC and ASM envs. PR-URL: https://github.com/nodejs/node/pull/8714 Reviewed-By: Fedor Indutny <fedor@indutny.com>
Diffstat (limited to 'deps/openssl')
-rw-r--r--deps/openssl/asm/arm-void-gas/aes/bsaes-armv7.S24
-rw-r--r--deps/openssl/asm/x64-elf-gas/bn/x86_64-mont.s155
-rw-r--r--deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s204
-rw-r--r--deps/openssl/asm/x64-elf-gas/ec/ecp_nistz256-x86_64.s189
-rw-r--r--deps/openssl/asm/x64-elf-gas/sha/sha1-x86_64.s4
-rw-r--r--deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont.s155
-rw-r--r--deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s204
-rw-r--r--deps/openssl/asm/x64-macosx-gas/ec/ecp_nistz256-x86_64.s189
-rw-r--r--deps/openssl/asm/x64-macosx-gas/sha/sha1-x86_64.s4
-rw-r--r--deps/openssl/asm/x64-win32-masm/bn/x86_64-mont.asm185
-rw-r--r--deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm227
-rw-r--r--deps/openssl/asm/x64-win32-masm/ec/ecp_nistz256-x86_64.asm189
-rw-r--r--deps/openssl/asm/x64-win32-masm/sha/sha1-x86_64.asm4
-rw-r--r--deps/openssl/asm/x86-elf-gas/bn/x86-mont.s109
-rw-r--r--deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s115
-rw-r--r--deps/openssl/asm/x86-win32-masm/bn/x86-mont.asm109
-rw-r--r--deps/openssl/asm_obsolete/arm-void-gas/aes/bsaes-armv7.S24
-rw-r--r--deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont.s127
-rw-r--r--deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont5.s134
-rw-r--r--deps/openssl/asm_obsolete/x64-elf-gas/ec/ecp_nistz256-x86_64.s119
-rw-r--r--deps/openssl/asm_obsolete/x64-elf-gas/sha/sha1-x86_64.s4
-rw-r--r--deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont.s127
-rw-r--r--deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont5.s134
-rw-r--r--deps/openssl/asm_obsolete/x64-macosx-gas/ec/ecp_nistz256-x86_64.s119
-rw-r--r--deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha1-x86_64.s4
-rw-r--r--deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont.asm154
-rw-r--r--deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont5.asm153
-rw-r--r--deps/openssl/asm_obsolete/x64-win32-masm/ec/ecp_nistz256-x86_64.asm119
-rw-r--r--deps/openssl/asm_obsolete/x64-win32-masm/sha/sha1-x86_64.asm4
-rw-r--r--deps/openssl/asm_obsolete/x86-elf-gas/bn/x86-mont.s109
-rw-r--r--deps/openssl/asm_obsolete/x86-macosx-gas/bn/x86-mont.s115
-rw-r--r--deps/openssl/asm_obsolete/x86-win32-masm/bn/x86-mont.asm109
32 files changed, 2040 insertions, 1581 deletions
diff --git a/deps/openssl/asm/arm-void-gas/aes/bsaes-armv7.S b/deps/openssl/asm/arm-void-gas/aes/bsaes-armv7.S
index 9738ed5023..449e7a442e 100644
--- a/deps/openssl/asm/arm-void-gas/aes/bsaes-armv7.S
+++ b/deps/openssl/asm/arm-void-gas/aes/bsaes-armv7.S
@@ -1816,8 +1816,6 @@ bsaes_xts_encrypt:
b .Lxts_enc_done
.align 4
.Lxts_enc_6:
- vst1.64 {q14}, [r0,:128] @ next round tweak
-
veor q4, q4, q12
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@@ -1853,8 +1851,6 @@ bsaes_xts_encrypt:
.align 5
.Lxts_enc_5:
- vst1.64 {q13}, [r0,:128] @ next round tweak
-
veor q3, q3, q11
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@@ -1883,8 +1879,6 @@ bsaes_xts_encrypt:
b .Lxts_enc_done
.align 4
.Lxts_enc_4:
- vst1.64 {q12}, [r0,:128] @ next round tweak
-
veor q2, q2, q10
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@@ -1910,8 +1904,6 @@ bsaes_xts_encrypt:
b .Lxts_enc_done
.align 4
.Lxts_enc_3:
- vst1.64 {q11}, [r0,:128] @ next round tweak
-
veor q1, q1, q9
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@@ -1936,8 +1928,6 @@ bsaes_xts_encrypt:
b .Lxts_enc_done
.align 4
.Lxts_enc_2:
- vst1.64 {q10}, [r0,:128] @ next round tweak
-
veor q0, q0, q8
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@@ -1960,7 +1950,7 @@ bsaes_xts_encrypt:
.align 4
.Lxts_enc_1:
mov r0, sp
- veor q0, q8
+ veor q0, q0, q8
mov r1, sp
vst1.8 {q0}, [sp,:128]
mov r2, r10
@@ -2346,8 +2336,6 @@ bsaes_xts_decrypt:
b .Lxts_dec_done
.align 4
.Lxts_dec_5:
- vst1.64 {q13}, [r0,:128] @ next round tweak
-
veor q3, q3, q11
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@@ -2376,8 +2364,6 @@ bsaes_xts_decrypt:
b .Lxts_dec_done
.align 4
.Lxts_dec_4:
- vst1.64 {q12}, [r0,:128] @ next round tweak
-
veor q2, q2, q10
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@@ -2403,8 +2389,6 @@ bsaes_xts_decrypt:
b .Lxts_dec_done
.align 4
.Lxts_dec_3:
- vst1.64 {q11}, [r0,:128] @ next round tweak
-
veor q1, q1, q9
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@@ -2429,8 +2413,6 @@ bsaes_xts_decrypt:
b .Lxts_dec_done
.align 4
.Lxts_dec_2:
- vst1.64 {q10}, [r0,:128] @ next round tweak
-
veor q0, q0, q8
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@@ -2453,12 +2435,12 @@ bsaes_xts_decrypt:
.align 4
.Lxts_dec_1:
mov r0, sp
- veor q0, q8
+ veor q0, q0, q8
mov r1, sp
vst1.8 {q0}, [sp,:128]
+ mov r5, r2 @ preserve magic
mov r2, r10
mov r4, r3 @ preserve fp
- mov r5, r2 @ preserve magic
bl AES_decrypt
diff --git a/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont.s b/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont.s
index b4fb5fe7e9..21175f570d 100644
--- a/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont.s
+++ b/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont.s
@@ -6,6 +6,8 @@
.type bn_mul_mont,@function
.align 16
bn_mul_mont:
+ movl %r9d,%r9d
+ movq %rsp,%rax
testl $3,%r9d
jnz .Lmul_enter
cmpl $8,%r9d
@@ -26,29 +28,36 @@ bn_mul_mont:
pushq %r14
pushq %r15
- movl %r9d,%r9d
- leaq 2(%r9),%r10
+ negq %r9
movq %rsp,%r11
- negq %r10
- leaq (%rsp,%r10,8),%rsp
- andq $-1024,%rsp
+ leaq -16(%rsp,%r9,8),%r10
+ negq %r9
+ andq $-1024,%r10
- movq %r11,8(%rsp,%r9,8)
-.Lmul_body:
- subq %rsp,%r11
+ subq %r10,%r11
andq $-4096,%r11
+ leaq (%r10,%r11,1),%rsp
+ movq (%rsp),%r11
+ cmpq %r10,%rsp
+ ja .Lmul_page_walk
+ jmp .Lmul_page_walk_done
+
+.align 16
.Lmul_page_walk:
- movq (%rsp,%r11,1),%r10
- subq $4096,%r11
-.byte 0x66,0x2e
- jnc .Lmul_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r11
+ cmpq %r10,%rsp
+ ja .Lmul_page_walk
+.Lmul_page_walk_done:
+ movq %rax,8(%rsp,%r9,8)
+.Lmul_body:
movq %rdx,%r12
movq (%r8),%r8
movq (%r12),%rbx
@@ -216,19 +225,21 @@ bn_mul_mont:
movq 8(%rsp,%r9,8),%rsi
movq $1,%rax
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq -48(%rsi),%r15
+ movq -40(%rsi),%r14
+ movq -32(%rsi),%r13
+ movq -24(%rsi),%r12
+ movq -16(%rsi),%rbp
+ movq -8(%rsi),%rbx
+ leaq (%rsi),%rsp
.Lmul_epilogue:
.byte 0xf3,0xc3
.size bn_mul_mont,.-bn_mul_mont
.type bn_mul4x_mont,@function
.align 16
bn_mul4x_mont:
+ movl %r9d,%r9d
+ movq %rsp,%rax
.Lmul4x_enter:
andl $0x80100,%r11d
cmpl $0x80100,%r11d
@@ -240,23 +251,29 @@ bn_mul4x_mont:
pushq %r14
pushq %r15
- movl %r9d,%r9d
- leaq 4(%r9),%r10
+ negq %r9
movq %rsp,%r11
- negq %r10
- leaq (%rsp,%r10,8),%rsp
- andq $-1024,%rsp
+ leaq -32(%rsp,%r9,8),%r10
+ negq %r9
+ andq $-1024,%r10
- movq %r11,8(%rsp,%r9,8)
-.Lmul4x_body:
- subq %rsp,%r11
+ subq %r10,%r11
andq $-4096,%r11
+ leaq (%r10,%r11,1),%rsp
+ movq (%rsp),%r11
+ cmpq %r10,%rsp
+ ja .Lmul4x_page_walk
+ jmp .Lmul4x_page_walk_done
+
.Lmul4x_page_walk:
- movq (%rsp,%r11,1),%r10
- subq $4096,%r11
-.byte 0x2e
- jnc .Lmul4x_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r11
+ cmpq %r10,%rsp
+ ja .Lmul4x_page_walk
+.Lmul4x_page_walk_done:
+ movq %rax,8(%rsp,%r9,8)
+.Lmul4x_body:
movq %rdi,16(%rsp,%r9,8)
movq %rdx,%r12
movq (%r8),%r8
@@ -625,13 +642,13 @@ bn_mul4x_mont:
movdqu %xmm2,16(%rdi,%r14,1)
movq 8(%rsp,%r9,8),%rsi
movq $1,%rax
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq -48(%rsi),%r15
+ movq -40(%rsi),%r14
+ movq -32(%rsi),%r13
+ movq -24(%rsi),%r12
+ movq -16(%rsi),%rbp
+ movq -8(%rsi),%rbx
+ leaq (%rsi),%rsp
.Lmul4x_epilogue:
.byte 0xf3,0xc3
.size bn_mul4x_mont,.-bn_mul4x_mont
@@ -641,14 +658,15 @@ bn_mul4x_mont:
.type bn_sqr8x_mont,@function
.align 32
bn_sqr8x_mont:
-.Lsqr8x_enter:
movq %rsp,%rax
+.Lsqr8x_enter:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
+.Lsqr8x_prologue:
movl %r9d,%r10d
shll $3,%r9d
@@ -661,33 +679,42 @@ bn_sqr8x_mont:
leaq -64(%rsp,%r9,2),%r11
+ movq %rsp,%rbp
movq (%r8),%r8
subq %rsi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb .Lsqr8x_sp_alt
- subq %r11,%rsp
- leaq -64(%rsp,%r9,2),%rsp
+ subq %r11,%rbp
+ leaq -64(%rbp,%r9,2),%rbp
jmp .Lsqr8x_sp_done
.align 32
.Lsqr8x_sp_alt:
leaq 4096-64(,%r9,2),%r10
- leaq -64(%rsp,%r9,2),%rsp
+ leaq -64(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
- subq %r11,%rsp
+ subq %r11,%rbp
.Lsqr8x_sp_done:
- andq $-64,%rsp
- movq %rax,%r11
- subq %rsp,%r11
+ andq $-64,%rbp
+ movq %rsp,%r11
+ subq %rbp,%r11
andq $-4096,%r11
+ leaq (%r11,%rbp,1),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja .Lsqr8x_page_walk
+ jmp .Lsqr8x_page_walk_done
+
+.align 16
.Lsqr8x_page_walk:
- movq (%rsp,%r11,1),%r10
- subq $4096,%r11
-.byte 0x2e
- jnc .Lsqr8x_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja .Lsqr8x_page_walk
+.Lsqr8x_page_walk_done:
movq %r9,%r10
negq %r9
@@ -800,30 +827,38 @@ bn_sqr8x_mont:
.type bn_mulx4x_mont,@function
.align 32
bn_mulx4x_mont:
-.Lmulx4x_enter:
movq %rsp,%rax
+.Lmulx4x_enter:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
+.Lmulx4x_prologue:
shll $3,%r9d
-.byte 0x67
xorq %r10,%r10
subq %r9,%r10
movq (%r8),%r8
- leaq -72(%rsp,%r10,1),%rsp
- andq $-128,%rsp
- movq %rax,%r11
- subq %rsp,%r11
+ leaq -72(%rsp,%r10,1),%rbp
+ andq $-128,%rbp
+ movq %rsp,%r11
+ subq %rbp,%r11
andq $-4096,%r11
+ leaq (%r11,%rbp,1),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja .Lmulx4x_page_walk
+ jmp .Lmulx4x_page_walk_done
+
+.align 16
.Lmulx4x_page_walk:
- movq (%rsp,%r11,1),%r10
- subq $4096,%r11
-.byte 0x66,0x2e
- jnc .Lmulx4x_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja .Lmulx4x_page_walk
+.Lmulx4x_page_walk_done:
leaq (%rdx,%r9,1),%r10
diff --git a/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s b/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s
index e4dfd83460..416152560a 100644
--- a/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s
+++ b/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s
@@ -6,6 +6,8 @@
.type bn_mul_mont_gather5,@function
.align 64
bn_mul_mont_gather5:
+ movl %r9d,%r9d
+ movq %rsp,%rax
testl $7,%r9d
jnz .Lmul_enter
movl OPENSSL_ia32cap_P+8(%rip),%r11d
@@ -13,10 +15,7 @@ bn_mul_mont_gather5:
.align 16
.Lmul_enter:
- movl %r9d,%r9d
- movq %rsp,%rax
movd 8(%rsp),%xmm5
- leaq .Linc(%rip),%r10
pushq %rbx
pushq %rbp
pushq %r12
@@ -24,26 +23,36 @@ bn_mul_mont_gather5:
pushq %r14
pushq %r15
- leaq 2(%r9),%r11
- negq %r11
- leaq -264(%rsp,%r11,8),%rsp
- andq $-1024,%rsp
+ negq %r9
+ movq %rsp,%r11
+ leaq -280(%rsp,%r9,8),%r10
+ negq %r9
+ andq $-1024,%r10
- movq %rax,8(%rsp,%r9,8)
-.Lmul_body:
- subq %rsp,%rax
- andq $-4096,%rax
+ subq %r10,%r11
+ andq $-4096,%r11
+ leaq (%r10,%r11,1),%rsp
+ movq (%rsp),%r11
+ cmpq %r10,%rsp
+ ja .Lmul_page_walk
+ jmp .Lmul_page_walk_done
+
.Lmul_page_walk:
- movq (%rsp,%rax,1),%r11
- subq $4096,%rax
-.byte 0x2e
- jnc .Lmul_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r11
+ cmpq %r10,%rsp
+ ja .Lmul_page_walk
+.Lmul_page_walk_done:
+
+ leaq .Linc(%rip),%r10
+ movq %rax,8(%rsp,%r9,8)
+.Lmul_body:
leaq 128(%rdx),%r12
movdqa 0(%r10),%xmm0
@@ -414,18 +423,19 @@ bn_mul_mont_gather5:
.type bn_mul4x_mont_gather5,@function
.align 32
bn_mul4x_mont_gather5:
+.byte 0x67
+ movq %rsp,%rax
.Lmul4x_enter:
andl $0x80108,%r11d
cmpl $0x80108,%r11d
je .Lmulx4x_enter
-.byte 0x67
- movq %rsp,%rax
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
+.Lmul4x_prologue:
.byte 0x67
shll $3,%r9d
@@ -442,32 +452,40 @@ bn_mul4x_mont_gather5:
leaq -320(%rsp,%r9,2),%r11
+ movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb .Lmul4xsp_alt
- subq %r11,%rsp
- leaq -320(%rsp,%r9,2),%rsp
+ subq %r11,%rbp
+ leaq -320(%rbp,%r9,2),%rbp
jmp .Lmul4xsp_done
.align 32
.Lmul4xsp_alt:
leaq 4096-320(,%r9,2),%r10
- leaq -320(%rsp,%r9,2),%rsp
+ leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
- subq %r11,%rsp
+ subq %r11,%rbp
.Lmul4xsp_done:
- andq $-64,%rsp
- movq %rax,%r11
- subq %rsp,%r11
+ andq $-64,%rbp
+ movq %rsp,%r11
+ subq %rbp,%r11
andq $-4096,%r11
+ leaq (%r11,%rbp,1),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja .Lmul4x_page_walk
+ jmp .Lmul4x_page_walk_done
+
.Lmul4x_page_walk:
- movq (%rsp,%r11,1),%r10
- subq $4096,%r11
-.byte 0x2e
- jnc .Lmul4x_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja .Lmul4x_page_walk
+.Lmul4x_page_walk_done:
negq %r9
@@ -1019,17 +1037,18 @@ mul4x_internal:
.type bn_power5,@function
.align 32
bn_power5:
+ movq %rsp,%rax
movl OPENSSL_ia32cap_P+8(%rip),%r11d
andl $0x80108,%r11d
cmpl $0x80108,%r11d
je .Lpowerx5_enter
- movq %rsp,%rax
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
+.Lpower5_prologue:
shll $3,%r9d
leal (%r9,%r9,2),%r10d
@@ -1044,32 +1063,40 @@ bn_power5:
leaq -320(%rsp,%r9,2),%r11
+ movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb .Lpwr_sp_alt
- subq %r11,%rsp
- leaq -320(%rsp,%r9,2),%rsp
+ subq %r11,%rbp
+ leaq -320(%rbp,%r9,2),%rbp
jmp .Lpwr_sp_done
.align 32
.Lpwr_sp_alt:
leaq 4096-320(,%r9,2),%r10
- leaq -320(%rsp,%r9,2),%rsp
+ leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
- subq %r11,%rsp
+ subq %r11,%rbp
.Lpwr_sp_done:
- andq $-64,%rsp
- movq %rax,%r11
- subq %rsp,%r11
+ andq $-64,%rbp
+ movq %rsp,%r11
+ subq %rbp,%r11
andq $-4096,%r11
+ leaq (%r11,%rbp,1),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja .Lpwr_page_walk
+ jmp .Lpwr_page_walk_done
+
.Lpwr_page_walk:
- movq (%rsp,%r11,1),%r10
- subq $4096,%r11
-.byte 0x2e
- jnc .Lpwr_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja .Lpwr_page_walk
+.Lpwr_page_walk_done:
movq %r9,%r10
negq %r9
@@ -1980,6 +2007,7 @@ bn_from_mont8x:
pushq %r13
pushq %r14
pushq %r15
+.Lfrom_prologue:
shll $3,%r9d
leaq (%r9,%r9,2),%r10
@@ -1994,32 +2022,40 @@ bn_from_mont8x:
leaq -320(%rsp,%r9,2),%r11
+ movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb .Lfrom_sp_alt
- subq %r11,%rsp
- leaq -320(%rsp,%r9,2),%rsp
+ subq %r11,%rbp
+ leaq -320(%rbp,%r9,2),%rbp
jmp .Lfrom_sp_done
.align 32
.Lfrom_sp_alt:
leaq 4096-320(,%r9,2),%r10
- leaq -320(%rsp,%r9,2),%rsp
+ leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
- subq %r11,%rsp
+ subq %r11,%rbp
.Lfrom_sp_done:
- andq $-64,%rsp
- movq %rax,%r11
- subq %rsp,%r11
+ andq $-64,%rbp
+ movq %rsp,%r11
+ subq %rbp,%r11
andq $-4096,%r11
+ leaq (%r11,%rbp,1),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja .Lfrom_page_walk
+ jmp .Lfrom_page_walk_done
+
.Lfrom_page_walk:
- movq (%rsp,%r11,1),%r10
- subq $4096,%r11
-.byte 0x2e
- jnc .Lfrom_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja .Lfrom_page_walk
+.Lfrom_page_walk_done:
movq %r9,%r10
negq %r9
@@ -2113,14 +2149,15 @@ bn_from_mont8x:
.type bn_mulx4x_mont_gather5,@function
.align 32
bn_mulx4x_mont_gather5:
-.Lmulx4x_enter:
movq %rsp,%rax
+.Lmulx4x_enter:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
+.Lmulx4x_prologue:
shll $3,%r9d
leaq (%r9,%r9,2),%r10
@@ -2137,31 +2174,39 @@ bn_mulx4x_mont_gather5:
leaq -320(%rsp,%r9,2),%r11
+ movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb .Lmulx4xsp_alt
- subq %r11,%rsp
- leaq -320(%rsp,%r9,2),%rsp
+ subq %r11,%rbp
+ leaq -320(%rbp,%r9,2),%rbp
jmp .Lmulx4xsp_done
.Lmulx4xsp_alt:
leaq 4096-320(,%r9,2),%r10
- leaq -320(%rsp,%r9,2),%rsp
+ leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
- subq %r11,%rsp
+ subq %r11,%rbp
.Lmulx4xsp_done:
- andq $-64,%rsp
- movq %rax,%r11
- subq %rsp,%r11
+ andq $-64,%rbp
+ movq %rsp,%r11
+ subq %rbp,%r11
andq $-4096,%r11
+ leaq (%r11,%rbp,1),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja .Lmulx4x_page_walk
+ jmp .Lmulx4x_page_walk_done
+
.Lmulx4x_page_walk:
- movq (%rsp,%r11,1),%r10
- subq $4096,%r11
-.byte 0x2e
- jnc .Lmulx4x_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja .Lmulx4x_page_walk
+.Lmulx4x_page_walk_done:
@@ -2619,14 +2664,15 @@ mulx4x_internal:
.type bn_powerx5,@function
.align 32
bn_powerx5:
-.Lpowerx5_enter:
movq %rsp,%rax
+.Lpowerx5_enter:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
+.Lpowerx5_prologue:
shll $3,%r9d
leaq (%r9,%r9,2),%r10
@@ -2641,32 +2687,40 @@ bn_powerx5:
leaq -320(%rsp,%r9,2),%r11
+ movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb .Lpwrx_sp_alt
- subq %r11,%rsp
- leaq -320(%rsp,%r9,2),%rsp
+ subq %r11,%rbp
+ leaq -320(%rbp,%r9,2),%rbp
jmp .Lpwrx_sp_done
.align 32
.Lpwrx_sp_alt:
leaq 4096-320(,%r9,2),%r10
- leaq -320(%rsp,%r9,2),%rsp
+ leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
- subq %r11,%rsp
+ subq %r11,%rbp
.Lpwrx_sp_done:
- andq $-64,%rsp
- movq %rax,%r11
- subq %rsp,%r11
+ andq $-64,%rbp
+ movq %rsp,%r11
+ subq %rbp,%r11
andq $-4096,%r11
+ leaq (%r11,%rbp,1),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja .Lpwrx_page_walk
+ jmp .Lpwrx_page_walk_done
+
.Lpwrx_page_walk:
- movq (%rsp,%r11,1),%r10
- subq $4096,%r11
-.byte 0x2e
- jnc .Lpwrx_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja .Lpwrx_page_walk
+.Lpwrx_page_walk_done:
movq %r9,%r10
negq %r9
diff --git a/deps/openssl/asm/x64-elf-gas/ec/ecp_nistz256-x86_64.s b/deps/openssl/asm/x64-elf-gas/ec/ecp_nistz256-x86_64.s
index 6d1be614f3..23188cda6e 100644
--- a/deps/openssl/asm/x64-elf-gas/ec/ecp_nistz256-x86_64.s
+++ b/deps/openssl/asm/x64-elf-gas/ec/ecp_nistz256-x86_64.s
@@ -27,6 +27,7 @@ ecp_nistz256_mul_by_2:
pushq %r13
movq 0(%rsi),%r8
+ xorq %r13,%r13
movq 8(%rsi),%r9
addq %r8,%r8
movq 16(%rsi),%r10
@@ -37,7 +38,7 @@ ecp_nistz256_mul_by_2:
adcq %r10,%r10
adcq %r11,%r11
movq %r9,%rdx
- sbbq %r13,%r13
+ adcq $0,%r13
subq 0(%rsi),%r8
movq %r10,%rcx
@@ -45,14 +46,14 @@ ecp_nistz256_mul_by_2:
sbbq 16(%rsi),%r10
movq %r11,%r12
sbbq 24(%rsi),%r11
- testq %r13,%r13
+ sbbq $0,%r13
- cmovzq %rax,%r8
- cmovzq %rdx,%r9
+ cmovcq %rax,%r8
+ cmovcq %rdx,%r9
movq %r8,0(%rdi)
- cmovzq %rcx,%r10
+ cmovcq %rcx,%r10
movq %r9,8(%rdi)
- cmovzq %r12,%r11
+ cmovcq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
@@ -149,12 +150,12 @@ ecp_nistz256_mul_by_3:
sbbq $0,%r10
movq %r11,%r12
sbbq .Lpoly+24(%rip),%r11
- testq %r13,%r13
+ sbbq $0,%r13
- cmovzq %rax,%r8
- cmovzq %rdx,%r9
- cmovzq %rcx,%r10
- cmovzq %r12,%r11
+ cmovcq %rax,%r8
+ cmovcq %rdx,%r9
+ cmovcq %rcx,%r10
+ cmovcq %r12,%r11
xorq %r13,%r13
addq 0(%rsi),%r8
@@ -171,14 +172,14 @@ ecp_nistz256_mul_by_3:
sbbq $0,%r10
movq %r11,%r12
sbbq .Lpoly+24(%rip),%r11
- testq %r13,%r13
+ sbbq $0,%r13
- cmovzq %rax,%r8
- cmovzq %rdx,%r9
+ cmovcq %rax,%r8
+ cmovcq %rdx,%r9
movq %r8,0(%rdi)
- cmovzq %rcx,%r10
+ cmovcq %rcx,%r10
movq %r9,8(%rdi)
- cmovzq %r12,%r11
+ cmovcq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
@@ -217,14 +218,14 @@ ecp_nistz256_add:
sbbq 16(%rsi),%r10
movq %r11,%r12
sbbq 24(%rsi),%r11
- testq %r13,%r13
+ sbbq $0,%r13
- cmovzq %rax,%r8
- cmovzq %rdx,%r9
+ cmovcq %rax,%r8
+ cmovcq %rdx,%r9
movq %r8,0(%rdi)
- cmovzq %rcx,%r10
+ cmovcq %rcx,%r10
movq %r9,8(%rdi)
- cmovzq %r12,%r11
+ cmovcq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
@@ -1461,13 +1462,14 @@ ecp_nistz256_avx2_select_w7:
.type __ecp_nistz256_add_toq,@function
.align 32
__ecp_nistz256_add_toq:
+ xorq %r11,%r11
addq 0(%rbx),%r12
adcq 8(%rbx),%r13
movq %r12,%rax
adcq 16(%rbx),%r8
adcq 24(%rbx),%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -1475,14 +1477,14 @@ __ecp_nistz256_add_toq:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
- cmovzq %rbp,%r13
+ cmovcq %rax,%r12
+ cmovcq %rbp,%r13
movq %r12,0(%rdi)
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq %r13,8(%rdi)
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@@ -1550,13 +1552,14 @@ __ecp_nistz256_subq:
.type __ecp_nistz256_mul_by_2q,@function
.align 32
__ecp_nistz256_mul_by_2q:
+ xorq %r11,%r11
addq %r12,%r12
adcq %r13,%r13
movq %r12,%rax
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -1564,14 +1567,14 @@ __ecp_nistz256_mul_by_2q:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
- cmovzq %rbp,%r13
+ cmovcq %rax,%r12
+ cmovcq %rbp,%r13
movq %r12,0(%rdi)
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq %r13,8(%rdi)
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@@ -1809,16 +1812,14 @@ ecp_nistz256_point_add:
movq %rdx,%rsi
movdqa %xmm0,384(%rsp)
movdqa %xmm1,384+16(%rsp)
- por %xmm0,%xmm1
movdqa %xmm2,416(%rsp)
movdqa %xmm3,416+16(%rsp)
- por %xmm2,%xmm3
movdqa %xmm4,448(%rsp)
movdqa %xmm5,448+16(%rsp)
- por %xmm1,%xmm3
+ por %xmm4,%xmm5
movdqu 0(%rsi),%xmm0
- pshufd $0xb1,%xmm3,%xmm5
+ pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rsi),%xmm1
movdqu 32(%rsi),%xmm2
por %xmm3,%xmm5
@@ -1830,14 +1831,14 @@ ecp_nistz256_point_add:
movdqa %xmm0,480(%rsp)
pshufd $0x1e,%xmm5,%xmm4
movdqa %xmm1,480+16(%rsp)
- por %xmm0,%xmm1
-.byte 102,72,15,110,199
+ movdqu 64(%rsi),%xmm0
+ movdqu 80(%rsi),%xmm1
movdqa %xmm2,512(%rsp)
movdqa %xmm3,512+16(%rsp)
- por %xmm2,%xmm3
por %xmm4,%xmm5
pxor %xmm4,%xmm4
- por %xmm1,%xmm3
+ por %xmm0,%xmm1
+.byte 102,72,15,110,199
leaq 64-0(%rsi),%rsi
movq %rax,544+0(%rsp)
@@ -1848,8 +1849,8 @@ ecp_nistz256_point_add:
call __ecp_nistz256_sqr_montq
pcmpeqd %xmm4,%xmm5
- pshufd $0xb1,%xmm3,%xmm4
- por %xmm3,%xmm4
+ pshufd $0xb1,%xmm1,%xmm4
+ por %xmm1,%xmm4
pshufd $0,%xmm5,%xmm5
pshufd $0x1e,%xmm4,%xmm3
por %xmm3,%xmm4
@@ -2032,6 +2033,7 @@ ecp_nistz256_point_add:
+ xorq %r11,%r11
addq %r12,%r12
leaq 96(%rsp),%rsi
adcq %r13,%r13
@@ -2039,7 +2041,7 @@ ecp_nistz256_point_add:
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -2047,15 +2049,15 @@ ecp_nistz256_point_add:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
+ cmovcq %rax,%r12
movq 0(%rsi),%rax
- cmovzq %rbp,%r13
+ cmovcq %rbp,%r13
movq 8(%rsi),%rbp
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq 16(%rsi),%rcx
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subq
@@ -2213,16 +2215,14 @@ ecp_nistz256_point_add_affine:
movq 64+24(%rsi),%r8
movdqa %xmm0,320(%rsp)
movdqa %xmm1,320+16(%rsp)
- por %xmm0,%xmm1
movdqa %xmm2,352(%rsp)
movdqa %xmm3,352+16(%rsp)
- por %xmm2,%xmm3
movdqa %xmm4,384(%rsp)
movdqa %xmm5,384+16(%rsp)
- por %xmm1,%xmm3
+ por %xmm4,%xmm5
movdqu 0(%rbx),%xmm0
- pshufd $0xb1,%xmm3,%xmm5
+ pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rbx),%xmm1
movdqu 32(%rbx),%xmm2
por %xmm3,%xmm5
@@ -2340,6 +2340,7 @@ ecp_nistz256_point_add_affine:
+ xorq %r11,%r11
addq %r12,%r12
leaq 192(%rsp),%rsi
adcq %r13,%r13
@@ -2347,7 +2348,7 @@ ecp_nistz256_point_add_affine:
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -2355,15 +2356,15 @@ ecp_nistz256_point_add_affine:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
+ cmovcq %rax,%r12
movq 0(%rsi),%rax
- cmovzq %rbp,%r13
+ cmovcq %rbp,%r13
movq 8(%rsi),%rbp
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq 16(%rsi),%rcx
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subq
@@ -2510,14 +2511,14 @@ __ecp_nistz256_add_tox:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
+ sbbq $0,%r11
- btq $0,%r11
- cmovncq %rax,%r12
- cmovncq %rbp,%r13
+ cmovcq %rax,%r12
+ cmovcq %rbp,%r13
movq %r12,0(%rdi)
- cmovncq %rcx,%r8
+ cmovcq %rcx,%r8
movq %r13,8(%rdi)
- cmovncq %r10,%r9
+ cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@@ -2605,14 +2606,14 @@ __ecp_nistz256_mul_by_2x:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
+ sbbq $0,%r11
- btq $0,%r11
- cmovncq %rax,%r12
- cmovncq %rbp,%r13
+ cmovcq %rax,%r12
+ cmovcq %rbp,%r13
movq %r12,0(%rdi)
- cmovncq %rcx,%r8
+ cmovcq %rcx,%r8
movq %r13,8(%rdi)
- cmovncq %r10,%r9
+ cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@@ -2842,16 +2843,14 @@ ecp_nistz256_point_addx:
movq %rdx,%rsi
movdqa %xmm0,384(%rsp)
movdqa %xmm1,384+16(%rsp)
- por %xmm0,%xmm1
movdqa %xmm2,416(%rsp)
movdqa %xmm3,416+16(%rsp)
- por %xmm2,%xmm3
movdqa %xmm4,448(%rsp)
movdqa %xmm5,448+16(%rsp)
- por %xmm1,%xmm3
+ por %xmm4,%xmm5
movdqu 0(%rsi),%xmm0
- pshufd $0xb1,%xmm3,%xmm5
+ pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rsi),%xmm1
movdqu 32(%rsi),%xmm2
por %xmm3,%xmm5
@@ -2863,14 +2862,14 @@ ecp_nistz256_point_addx:
movdqa %xmm0,480(%rsp)
pshufd $0x1e,%xmm5,%xmm4
movdqa %xmm1,480+16(%rsp)
- por %xmm0,%xmm1
-.byte 102,72,15,110,199
+ movdqu 64(%rsi),%xmm0
+ movdqu 80(%rsi),%xmm1
movdqa %xmm2,512(%rsp)
movdqa %xmm3,512+16(%rsp)
- por %xmm2,%xmm3
por %xmm4,%xmm5
pxor %xmm4,%xmm4
- por %xmm1,%xmm3
+ por %xmm0,%xmm1
+.byte 102,72,15,110,199
leaq 64-128(%rsi),%rsi
movq %rdx,544+0(%rsp)
@@ -2881,8 +2880,8 @@ ecp_nistz256_point_addx:
call __ecp_nistz256_sqr_montx
pcmpeqd %xmm4,%xmm5
- pshufd $0xb1,%xmm3,%xmm4
- por %xmm3,%xmm4
+ pshufd $0xb1,%xmm1,%xmm4
+ por %xmm1,%xmm4
pshufd $0,%xmm5,%xmm5
pshufd $0x1e,%xmm4,%xmm3
por %xmm3,%xmm4
@@ -3065,6 +3064,7 @@ ecp_nistz256_point_addx:
+ xorq %r11,%r11
addq %r12,%r12
leaq 96(%rsp),%rsi
adcq %r13,%r13
@@ -3072,7 +3072,7 @@ ecp_nistz256_point_addx:
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -3080,15 +3080,15 @@ ecp_nistz256_point_addx:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
+ cmovcq %rax,%r12
movq 0(%rsi),%rax
- cmovzq %rbp,%r13
+ cmovcq %rbp,%r13
movq 8(%rsi),%rbp
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq 16(%rsi),%rcx
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subx
@@ -3242,16 +3242,14 @@ ecp_nistz256_point_add_affinex:
movq 64+24(%rsi),%r8
movdqa %xmm0,320(%rsp)
movdqa %xmm1,320+16(%rsp)
- por %xmm0,%xmm1
movdqa %xmm2,352(%rsp)
movdqa %xmm3,352+16(%rsp)
- por %xmm2,%xmm3
movdqa %xmm4,384(%rsp)
movdqa %xmm5,384+16(%rsp)
- por %xmm1,%xmm3
+ por %xmm4,%xmm5
movdqu 0(%rbx),%xmm0
- pshufd $0xb1,%xmm3,%xmm5
+ pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rbx),%xmm1
movdqu 32(%rbx),%xmm2
por %xmm3,%xmm5
@@ -3369,6 +3367,7 @@ ecp_nistz256_point_add_affinex:
+ xorq %r11,%r11
addq %r12,%r12
leaq 192(%rsp),%rsi
adcq %r13,%r13
@@ -3376,7 +3375,7 @@ ecp_nistz256_point_add_affinex:
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -3384,15 +3383,15 @@ ecp_nistz256_point_add_affinex:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
+ cmovcq %rax,%r12
movq 0(%rsi),%rax
- cmovzq %rbp,%r13
+ cmovcq %rbp,%r13
movq 8(%rsi),%rbp
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq 16(%rsi),%rcx
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subx
diff --git a/deps/openssl/asm/x64-elf-gas/sha/sha1-x86_64.s b/deps/openssl/asm/x64-elf-gas/sha/sha1-x86_64.s
index 22a031f368..195a148bb9 100644
--- a/deps/openssl/asm/x64-elf-gas/sha/sha1-x86_64.s
+++ b/deps/openssl/asm/x64-elf-gas/sha/sha1-x86_64.s
@@ -1263,9 +1263,9 @@ _shaext_shortcut:
.align 16
.Loop_shaext:
decq %rdx
- leaq 64(%rsi),%rax
+ leaq 64(%rsi),%r8
paddd %xmm4,%xmm1
- cmovneq %rax,%rsi
+ cmovneq %r8,%rsi
movdqa %xmm0,%xmm8
.byte 15,56,201,229
movdqa %xmm0,%xmm2
diff --git a/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont.s b/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont.s
index bca9fbda7f..1d5e63892f 100644
--- a/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont.s
+++ b/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont.s
@@ -6,6 +6,8 @@
.p2align 4
_bn_mul_mont:
+ movl %r9d,%r9d
+ movq %rsp,%rax
testl $3,%r9d
jnz L$mul_enter
cmpl $8,%r9d
@@ -26,29 +28,36 @@ L$mul_enter:
pushq %r14
pushq %r15
- movl %r9d,%r9d
- leaq 2(%r9),%r10
+ negq %r9
movq %rsp,%r11
- negq %r10
- leaq (%rsp,%r10,8),%rsp
- andq $-1024,%rsp
+ leaq -16(%rsp,%r9,8),%r10
+ negq %r9
+ andq $-1024,%r10
- movq %r11,8(%rsp,%r9,8)
-L$mul_body:
- subq %rsp,%r11
+ subq %r10,%r11
andq $-4096,%r11
+ leaq (%r10,%r11,1),%rsp
+ movq (%rsp),%r11
+ cmpq %r10,%rsp
+ ja L$mul_page_walk
+ jmp L$mul_page_walk_done
+
+.p2align 4
L$mul_page_walk:
- movq (%rsp,%r11,1),%r10
- subq $4096,%r11
-.byte 0x66,0x2e
- jnc L$mul_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r11
+ cmpq %r10,%rsp
+ ja L$mul_page_walk
+L$mul_page_walk_done:
+ movq %rax,8(%rsp,%r9,8)
+L$mul_body:
movq %rdx,%r12
movq (%r8),%r8
movq (%r12),%rbx
@@ -216,19 +225,21 @@ L$copy:
movq 8(%rsp,%r9,8),%rsi
movq $1,%rax
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq -48(%rsi),%r15
+ movq -40(%rsi),%r14
+ movq -32(%rsi),%r13
+ movq -24(%rsi),%r12
+ movq -16(%rsi),%rbp
+ movq -8(%rsi),%rbx
+ leaq (%rsi),%rsp
L$mul_epilogue:
.byte 0xf3,0xc3
.p2align 4
bn_mul4x_mont:
+ movl %r9d,%r9d
+ movq %rsp,%rax
L$mul4x_enter:
andl $0x80100,%r11d
cmpl $0x80100,%r11d
@@ -240,23 +251,29 @@ L$mul4x_enter:
pushq %r14
pushq %r15
- movl %r9d,%r9d
- leaq 4(%r9),%r10
+ negq %r9
movq %rsp,%r11
- negq %r10
- leaq (%rsp,%r10,8),%rsp
- andq $-1024,%rsp
+ leaq -32(%rsp,%r9,8),%r10
+ negq %r9
+ andq $-1024,%r10
- movq %r11,8(%rsp,%r9,8)
-L$mul4x_body:
- subq %rsp,%r11
+ subq %r10,%r11
andq $-4096,%r11
+ leaq (%r10,%r11,1),%rsp
+ movq (%rsp),%r11
+ cmpq %r10,%rsp
+ ja L$mul4x_page_walk
+ jmp L$mul4x_page_walk_done
+
L$mul4x_page_walk:
- movq (%rsp,%r11,1),%r10
- subq $4096,%r11
-.byte 0x2e
- jnc L$mul4x_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r11
+ cmpq %r10,%rsp
+ ja L$mul4x_page_walk
+L$mul4x_page_walk_done:
+ movq %rax,8(%rsp,%r9,8)
+L$mul4x_body:
movq %rdi,16(%rsp,%r9,8)
movq %rdx,%r12
movq (%r8),%r8
@@ -625,13 +642,13 @@ L$copy4x:
movdqu %xmm2,16(%rdi,%r14,1)
movq 8(%rsp,%r9,8),%rsi
movq $1,%rax
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq -48(%rsi),%r15
+ movq -40(%rsi),%r14
+ movq -32(%rsi),%r13
+ movq -24(%rsi),%r12
+ movq -16(%rsi),%rbp
+ movq -8(%rsi),%rbx
+ leaq (%rsi),%rsp
L$mul4x_epilogue:
.byte 0xf3,0xc3
@@ -641,14 +658,15 @@ L$mul4x_epilogue:
.p2align 5
bn_sqr8x_mont:
-L$sqr8x_enter:
movq %rsp,%rax
+L$sqr8x_enter:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
+L$sqr8x_prologue:
movl %r9d,%r10d
shll $3,%r9d
@@ -661,33 +679,42 @@ L$sqr8x_enter:
leaq -64(%rsp,%r9,2),%r11
+ movq %rsp,%rbp
movq (%r8),%r8
subq %rsi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb L$sqr8x_sp_alt
- subq %r11,%rsp
- leaq -64(%rsp,%r9,2),%rsp
+ subq %r11,%rbp
+ leaq -64(%rbp,%r9,2),%rbp
jmp L$sqr8x_sp_done
.p2align 5
L$sqr8x_sp_alt:
leaq 4096-64(,%r9,2),%r10
- leaq -64(%rsp,%r9,2),%rsp
+ leaq -64(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
- subq %r11,%rsp
+ subq %r11,%rbp
L$sqr8x_sp_done:
- andq $-64,%rsp
- movq %rax,%r11
- subq %rsp,%r11
+ andq $-64,%rbp
+ movq %rsp,%r11
+ subq %rbp,%r11
andq $-4096,%r11
+ leaq (%r11,%rbp,1),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja L$sqr8x_page_walk
+ jmp L$sqr8x_page_walk_done
+
+.p2align 4
L$sqr8x_page_walk:
- movq (%rsp,%r11,1),%r10
- subq $4096,%r11
-.byte 0x2e
- jnc L$sqr8x_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja L$sqr8x_page_walk
+L$sqr8x_page_walk_done:
movq %r9,%r10
negq %r9
@@ -800,30 +827,38 @@ L$sqr8x_epilogue:
.p2align 5
bn_mulx4x_mont:
-L$mulx4x_enter:
movq %rsp,%rax
+L$mulx4x_enter:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
+L$mulx4x_prologue:
shll $3,%r9d
-.byte 0x67
xorq %r10,%r10
subq %r9,%r10
movq (%r8),%r8
- leaq -72(%rsp,%r10,1),%rsp
- andq $-128,%rsp
- movq %rax,%r11
- subq %rsp,%r11
+ leaq -72(%rsp,%r10,1),%rbp
+ andq $-128,%rbp
+ movq %rsp,%r11
+ subq %rbp,%r11
andq $-4096,%r11
+ leaq (%r11,%rbp,1),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja L$mulx4x_page_walk
+ jmp L$mulx4x_page_walk_done
+
+.p2align 4
L$mulx4x_page_walk:
- movq (%rsp,%r11,1),%r10
- subq $4096,%r11
-.byte 0x66,0x2e
- jnc L$mulx4x_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja L$mulx4x_page_walk
+L$mulx4x_page_walk_done:
leaq (%rdx,%r9,1),%r10
diff --git a/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s b/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s
index 5d987a1229..878c31ffac 100644
--- a/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s
+++ b/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s
@@ -6,6 +6,8 @@
.p2align 6
_bn_mul_mont_gather5:
+ movl %r9d,%r9d
+ movq %rsp,%rax
testl $7,%r9d
jnz L$mul_enter
movl _OPENSSL_ia32cap_P+8(%rip),%r11d
@@ -13,10 +15,7 @@ _bn_mul_mont_gather5:
.p2align 4
L$mul_enter:
- movl %r9d,%r9d
- movq %rsp,%rax
movd 8(%rsp),%xmm5
- leaq L$inc(%rip),%r10
pushq %rbx
pushq %rbp
pushq %r12
@@ -24,26 +23,36 @@ L$mul_enter:
pushq %r14
pushq %r15
- leaq 2(%r9),%r11
- negq %r11
- leaq -264(%rsp,%r11,8),%rsp
- andq $-1024,%rsp
+ negq %r9
+ movq %rsp,%r11
+ leaq -280(%rsp,%r9,8),%r10
+ negq %r9
+ andq $-1024,%r10
- movq %rax,8(%rsp,%r9,8)
-L$mul_body:
- subq %rsp,%rax
- andq $-4096,%rax
+ subq %r10,%r11
+ andq $-4096,%r11
+ leaq (%r10,%r11,1),%rsp
+ movq (%rsp),%r11
+ cmpq %r10,%rsp
+ ja L$mul_page_walk
+ jmp L$mul_page_walk_done
+
L$mul_page_walk:
- movq (%rsp,%rax,1),%r11
- subq $4096,%rax
-.byte 0x2e
- jnc L$mul_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r11
+ cmpq %r10,%rsp
+ ja L$mul_page_walk
+L$mul_page_walk_done:
+
+ leaq L$inc(%rip),%r10
+ movq %rax,8(%rsp,%r9,8)
+L$mul_body:
leaq 128(%rdx),%r12
movdqa 0(%r10),%xmm0
@@ -414,18 +423,19 @@ L$mul_epilogue:
.p2align 5
bn_mul4x_mont_gather5:
+.byte 0x67
+ movq %rsp,%rax
L$mul4x_enter:
andl $0x80108,%r11d
cmpl $0x80108,%r11d
je L$mulx4x_enter
-.byte 0x67
- movq %rsp,%rax
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
+L$mul4x_prologue:
.byte 0x67
shll $3,%r9d
@@ -442,32 +452,40 @@ L$mul4x_enter:
leaq -320(%rsp,%r9,2),%r11
+ movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb L$mul4xsp_alt
- subq %r11,%rsp
- leaq -320(%rsp,%r9,2),%rsp
+ subq %r11,%rbp
+ leaq -320(%rbp,%r9,2),%rbp
jmp L$mul4xsp_done
.p2align 5
L$mul4xsp_alt:
leaq 4096-320(,%r9,2),%r10
- leaq -320(%rsp,%r9,2),%rsp
+ leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
- subq %r11,%rsp
+ subq %r11,%rbp
L$mul4xsp_done:
- andq $-64,%rsp
- movq %rax,%r11
- subq %rsp,%r11
+ andq $-64,%rbp
+ movq %rsp,%r11
+ subq %rbp,%r11
andq $-4096,%r11
+ leaq (%r11,%rbp,1),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja L$mul4x_page_walk
+ jmp L$mul4x_page_walk_done
+
L$mul4x_page_walk:
- movq (%rsp,%r11,1),%r10
- subq $4096,%r11
-.byte 0x2e
- jnc L$mul4x_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja L$mul4x_page_walk
+L$mul4x_page_walk_done:
negq %r9
@@ -1019,17 +1037,18 @@ L$inner4x:
.p2align 5
_bn_power5:
+ movq %rsp,%rax
movl _OPENSSL_ia32cap_P+8(%rip),%r11d
andl $0x80108,%r11d
cmpl $0x80108,%r11d
je L$powerx5_enter
- movq %rsp,%rax
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
+L$power5_prologue:
shll $3,%r9d
leal (%r9,%r9,2),%r10d
@@ -1044,32 +1063,40 @@ _bn_power5:
leaq -320(%rsp,%r9,2),%r11
+ movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb L$pwr_sp_alt
- subq %r11,%rsp
- leaq -320(%rsp,%r9,2),%rsp
+ subq %r11,%rbp
+ leaq -320(%rbp,%r9,2),%rbp
jmp L$pwr_sp_done
.p2align 5
L$pwr_sp_alt:
leaq 4096-320(,%r9,2),%r10
- leaq -320(%rsp,%r9,2),%rsp
+ leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
- subq %r11,%rsp
+ subq %r11,%rbp
L$pwr_sp_done:
- andq $-64,%rsp
- movq %rax,%r11
- subq %rsp,%r11
+ andq $-64,%rbp
+ movq %rsp,%r11
+ subq %rbp,%r11
andq $-4096,%r11
+ leaq (%r11,%rbp,1),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja L$pwr_page_walk
+ jmp L$pwr_page_walk_done
+
L$pwr_page_walk:
- movq (%rsp,%r11,1),%r10
- subq $4096,%r11
-.byte 0x2e
- jnc L$pwr_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja L$pwr_page_walk
+L$pwr_page_walk_done:
movq %r9,%r10
negq %r9
@@ -1980,6 +2007,7 @@ bn_from_mont8x:
pushq %r13
pushq %r14
pushq %r15
+L$from_prologue:
shll $3,%r9d
leaq (%r9,%r9,2),%r10
@@ -1994,32 +2022,40 @@ bn_from_mont8x:
leaq -320(%rsp,%r9,2),%r11
+ movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb L$from_sp_alt
- subq %r11,%rsp
- leaq -320(%rsp,%r9,2),%rsp
+ subq %r11,%rbp
+ leaq -320(%rbp,%r9,2),%rbp
jmp L$from_sp_done
.p2align 5
L$from_sp_alt:
leaq 4096-320(,%r9,2),%r10
- leaq -320(%rsp,%r9,2),%rsp
+ leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
- subq %r11,%rsp
+ subq %r11,%rbp
L$from_sp_done:
- andq $-64,%rsp
- movq %rax,%r11
- subq %rsp,%r11
+ andq $-64,%rbp
+ movq %rsp,%r11
+ subq %rbp,%r11
andq $-4096,%r11
+ leaq (%r11,%rbp,1),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja L$from_page_walk
+ jmp L$from_page_walk_done
+
L$from_page_walk:
- movq (%rsp,%r11,1),%r10
- subq $4096,%r11
-.byte 0x2e
- jnc L$from_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja L$from_page_walk
+L$from_page_walk_done:
movq %r9,%r10
negq %r9
@@ -2113,14 +2149,15 @@ L$from_epilogue:
.p2align 5
bn_mulx4x_mont_gather5:
-L$mulx4x_enter:
movq %rsp,%rax
+L$mulx4x_enter:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
+L$mulx4x_prologue:
shll $3,%r9d
leaq (%r9,%r9,2),%r10
@@ -2137,31 +2174,39 @@ L$mulx4x_enter:
leaq -320(%rsp,%r9,2),%r11
+ movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb L$mulx4xsp_alt
- subq %r11,%rsp
- leaq -320(%rsp,%r9,2),%rsp
+ subq %r11,%rbp
+ leaq -320(%rbp,%r9,2),%rbp
jmp L$mulx4xsp_done
L$mulx4xsp_alt:
leaq 4096-320(,%r9,2),%r10
- leaq -320(%rsp,%r9,2),%rsp
+ leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
- subq %r11,%rsp
+ subq %r11,%rbp
L$mulx4xsp_done:
- andq $-64,%rsp
- movq %rax,%r11
- subq %rsp,%r11
+ andq $-64,%rbp
+ movq %rsp,%r11
+ subq %rbp,%r11
andq $-4096,%r11
+ leaq (%r11,%rbp,1),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja L$mulx4x_page_walk
+ jmp L$mulx4x_page_walk_done
+
L$mulx4x_page_walk:
- movq (%rsp,%r11,1),%r10
- subq $4096,%r11
-.byte 0x2e
- jnc L$mulx4x_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja L$mulx4x_page_walk
+L$mulx4x_page_walk_done:
@@ -2619,14 +2664,15 @@ L$mulx4x_inner:
.p2align 5
bn_powerx5:
-L$powerx5_enter:
movq %rsp,%rax
+L$powerx5_enter:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
+L$powerx5_prologue:
shll $3,%r9d
leaq (%r9,%r9,2),%r10
@@ -2641,32 +2687,40 @@ L$powerx5_enter:
leaq -320(%rsp,%r9,2),%r11
+ movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb L$pwrx_sp_alt
- subq %r11,%rsp
- leaq -320(%rsp,%r9,2),%rsp
+ subq %r11,%rbp
+ leaq -320(%rbp,%r9,2),%rbp
jmp L$pwrx_sp_done
.p2align 5
L$pwrx_sp_alt:
leaq 4096-320(,%r9,2),%r10
- leaq -320(%rsp,%r9,2),%rsp
+ leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
- subq %r11,%rsp
+ subq %r11,%rbp
L$pwrx_sp_done:
- andq $-64,%rsp
- movq %rax,%r11
- subq %rsp,%r11
+ andq $-64,%rbp
+ movq %rsp,%r11
+ subq %rbp,%r11
andq $-4096,%r11
+ leaq (%r11,%rbp,1),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja L$pwrx_page_walk
+ jmp L$pwrx_page_walk_done
+
L$pwrx_page_walk:
- movq (%rsp,%r11,1),%r10
- subq $4096,%r11
-.byte 0x2e
- jnc L$pwrx_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja L$pwrx_page_walk
+L$pwrx_page_walk_done:
movq %r9,%r10
negq %r9
diff --git a/deps/openssl/asm/x64-macosx-gas/ec/ecp_nistz256-x86_64.s b/deps/openssl/asm/x64-macosx-gas/ec/ecp_nistz256-x86_64.s
index b90788f453..05b6d5be2c 100644
--- a/deps/openssl/asm/x64-macosx-gas/ec/ecp_nistz256-x86_64.s
+++ b/deps/openssl/asm/x64-macosx-gas/ec/ecp_nistz256-x86_64.s
@@ -27,6 +27,7 @@ _ecp_nistz256_mul_by_2:
pushq %r13
movq 0(%rsi),%r8
+ xorq %r13,%r13
movq 8(%rsi),%r9
addq %r8,%r8
movq 16(%rsi),%r10
@@ -37,7 +38,7 @@ _ecp_nistz256_mul_by_2:
adcq %r10,%r10
adcq %r11,%r11
movq %r9,%rdx
- sbbq %r13,%r13
+ adcq $0,%r13
subq 0(%rsi),%r8
movq %r10,%rcx
@@ -45,14 +46,14 @@ _ecp_nistz256_mul_by_2:
sbbq 16(%rsi),%r10
movq %r11,%r12
sbbq 24(%rsi),%r11
- testq %r13,%r13
+ sbbq $0,%r13
- cmovzq %rax,%r8
- cmovzq %rdx,%r9
+ cmovcq %rax,%r8
+ cmovcq %rdx,%r9
movq %r8,0(%rdi)
- cmovzq %rcx,%r10
+ cmovcq %rcx,%r10
movq %r9,8(%rdi)
- cmovzq %r12,%r11
+ cmovcq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
@@ -149,12 +150,12 @@ _ecp_nistz256_mul_by_3:
sbbq $0,%r10
movq %r11,%r12
sbbq L$poly+24(%rip),%r11
- testq %r13,%r13
+ sbbq $0,%r13
- cmovzq %rax,%r8
- cmovzq %rdx,%r9
- cmovzq %rcx,%r10
- cmovzq %r12,%r11
+ cmovcq %rax,%r8
+ cmovcq %rdx,%r9
+ cmovcq %rcx,%r10
+ cmovcq %r12,%r11
xorq %r13,%r13
addq 0(%rsi),%r8
@@ -171,14 +172,14 @@ _ecp_nistz256_mul_by_3:
sbbq $0,%r10
movq %r11,%r12
sbbq L$poly+24(%rip),%r11
- testq %r13,%r13
+ sbbq $0,%r13
- cmovzq %rax,%r8
- cmovzq %rdx,%r9
+ cmovcq %rax,%r8
+ cmovcq %rdx,%r9
movq %r8,0(%rdi)
- cmovzq %rcx,%r10
+ cmovcq %rcx,%r10
movq %r9,8(%rdi)
- cmovzq %r12,%r11
+ cmovcq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
@@ -217,14 +218,14 @@ _ecp_nistz256_add:
sbbq 16(%rsi),%r10
movq %r11,%r12
sbbq 24(%rsi),%r11
- testq %r13,%r13
+ sbbq $0,%r13
- cmovzq %rax,%r8
- cmovzq %rdx,%r9
+ cmovcq %rax,%r8
+ cmovcq %rdx,%r9
movq %r8,0(%rdi)
- cmovzq %rcx,%r10
+ cmovcq %rcx,%r10
movq %r9,8(%rdi)
- cmovzq %r12,%r11
+ cmovcq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
@@ -1461,13 +1462,14 @@ L$select_loop_avx2_w7:
.p2align 5
__ecp_nistz256_add_toq:
+ xorq %r11,%r11
addq 0(%rbx),%r12
adcq 8(%rbx),%r13
movq %r12,%rax
adcq 16(%rbx),%r8
adcq 24(%rbx),%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -1475,14 +1477,14 @@ __ecp_nistz256_add_toq:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
- cmovzq %rbp,%r13
+ cmovcq %rax,%r12
+ cmovcq %rbp,%r13
movq %r12,0(%rdi)
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq %r13,8(%rdi)
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@@ -1550,13 +1552,14 @@ __ecp_nistz256_subq:
.p2align 5
__ecp_nistz256_mul_by_2q:
+ xorq %r11,%r11
addq %r12,%r12
adcq %r13,%r13
movq %r12,%rax
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -1564,14 +1567,14 @@ __ecp_nistz256_mul_by_2q:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
- cmovzq %rbp,%r13
+ cmovcq %rax,%r12
+ cmovcq %rbp,%r13
movq %r12,0(%rdi)
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq %r13,8(%rdi)
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@@ -1809,16 +1812,14 @@ _ecp_nistz256_point_add:
movq %rdx,%rsi
movdqa %xmm0,384(%rsp)
movdqa %xmm1,384+16(%rsp)
- por %xmm0,%xmm1
movdqa %xmm2,416(%rsp)
movdqa %xmm3,416+16(%rsp)
- por %xmm2,%xmm3
movdqa %xmm4,448(%rsp)
movdqa %xmm5,448+16(%rsp)
- por %xmm1,%xmm3
+ por %xmm4,%xmm5
movdqu 0(%rsi),%xmm0
- pshufd $0xb1,%xmm3,%xmm5
+ pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rsi),%xmm1
movdqu 32(%rsi),%xmm2
por %xmm3,%xmm5
@@ -1830,14 +1831,14 @@ _ecp_nistz256_point_add:
movdqa %xmm0,480(%rsp)
pshufd $0x1e,%xmm5,%xmm4
movdqa %xmm1,480+16(%rsp)
- por %xmm0,%xmm1
-.byte 102,72,15,110,199
+ movdqu 64(%rsi),%xmm0
+ movdqu 80(%rsi),%xmm1
movdqa %xmm2,512(%rsp)
movdqa %xmm3,512+16(%rsp)
- por %xmm2,%xmm3
por %xmm4,%xmm5
pxor %xmm4,%xmm4
- por %xmm1,%xmm3
+ por %xmm0,%xmm1
+.byte 102,72,15,110,199
leaq 64-0(%rsi),%rsi
movq %rax,544+0(%rsp)
@@ -1848,8 +1849,8 @@ _ecp_nistz256_point_add:
call __ecp_nistz256_sqr_montq
pcmpeqd %xmm4,%xmm5
- pshufd $0xb1,%xmm3,%xmm4
- por %xmm3,%xmm4
+ pshufd $0xb1,%xmm1,%xmm4
+ por %xmm1,%xmm4
pshufd $0,%xmm5,%xmm5
pshufd $0x1e,%xmm4,%xmm3
por %xmm3,%xmm4
@@ -2032,6 +2033,7 @@ L$add_proceedq:
+ xorq %r11,%r11
addq %r12,%r12
leaq 96(%rsp),%rsi
adcq %r13,%r13
@@ -2039,7 +2041,7 @@ L$add_proceedq:
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -2047,15 +2049,15 @@ L$add_proceedq:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
+ cmovcq %rax,%r12
movq 0(%rsi),%rax
- cmovzq %rbp,%r13
+ cmovcq %rbp,%r13
movq 8(%rsi),%rbp
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq 16(%rsi),%rcx
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subq
@@ -2213,16 +2215,14 @@ _ecp_nistz256_point_add_affine:
movq 64+24(%rsi),%r8
movdqa %xmm0,320(%rsp)
movdqa %xmm1,320+16(%rsp)
- por %xmm0,%xmm1
movdqa %xmm2,352(%rsp)
movdqa %xmm3,352+16(%rsp)
- por %xmm2,%xmm3
movdqa %xmm4,384(%rsp)
movdqa %xmm5,384+16(%rsp)
- por %xmm1,%xmm3
+ por %xmm4,%xmm5
movdqu 0(%rbx),%xmm0
- pshufd $0xb1,%xmm3,%xmm5
+ pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rbx),%xmm1
movdqu 32(%rbx),%xmm2
por %xmm3,%xmm5
@@ -2340,6 +2340,7 @@ _ecp_nistz256_point_add_affine:
+ xorq %r11,%r11
addq %r12,%r12
leaq 192(%rsp),%rsi
adcq %r13,%r13
@@ -2347,7 +2348,7 @@ _ecp_nistz256_point_add_affine:
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -2355,15 +2356,15 @@ _ecp_nistz256_point_add_affine:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
+ cmovcq %rax,%r12
movq 0(%rsi),%rax
- cmovzq %rbp,%r13
+ cmovcq %rbp,%r13
movq 8(%rsi),%rbp
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq 16(%rsi),%rcx
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subq
@@ -2510,14 +2511,14 @@ __ecp_nistz256_add_tox:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
+ sbbq $0,%r11
- btq $0,%r11
- cmovncq %rax,%r12
- cmovncq %rbp,%r13
+ cmovcq %rax,%r12
+ cmovcq %rbp,%r13
movq %r12,0(%rdi)
- cmovncq %rcx,%r8
+ cmovcq %rcx,%r8
movq %r13,8(%rdi)
- cmovncq %r10,%r9
+ cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@@ -2605,14 +2606,14 @@ __ecp_nistz256_mul_by_2x:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
+ sbbq $0,%r11
- btq $0,%r11
- cmovncq %rax,%r12
- cmovncq %rbp,%r13
+ cmovcq %rax,%r12
+ cmovcq %rbp,%r13
movq %r12,0(%rdi)
- cmovncq %rcx,%r8
+ cmovcq %rcx,%r8
movq %r13,8(%rdi)
- cmovncq %r10,%r9
+ cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@@ -2842,16 +2843,14 @@ L$point_addx:
movq %rdx,%rsi
movdqa %xmm0,384(%rsp)
movdqa %xmm1,384+16(%rsp)
- por %xmm0,%xmm1
movdqa %xmm2,416(%rsp)
movdqa %xmm3,416+16(%rsp)
- por %xmm2,%xmm3
movdqa %xmm4,448(%rsp)
movdqa %xmm5,448+16(%rsp)
- por %xmm1,%xmm3
+ por %xmm4,%xmm5
movdqu 0(%rsi),%xmm0
- pshufd $0xb1,%xmm3,%xmm5
+ pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rsi),%xmm1
movdqu 32(%rsi),%xmm2
por %xmm3,%xmm5
@@ -2863,14 +2862,14 @@ L$point_addx:
movdqa %xmm0,480(%rsp)
pshufd $0x1e,%xmm5,%xmm4
movdqa %xmm1,480+16(%rsp)
- por %xmm0,%xmm1
-.byte 102,72,15,110,199
+ movdqu 64(%rsi),%xmm0
+ movdqu 80(%rsi),%xmm1
movdqa %xmm2,512(%rsp)
movdqa %xmm3,512+16(%rsp)
- por %xmm2,%xmm3
por %xmm4,%xmm5
pxor %xmm4,%xmm4
- por %xmm1,%xmm3
+ por %xmm0,%xmm1
+.byte 102,72,15,110,199
leaq 64-128(%rsi),%rsi
movq %rdx,544+0(%rsp)
@@ -2881,8 +2880,8 @@ L$point_addx:
call __ecp_nistz256_sqr_montx
pcmpeqd %xmm4,%xmm5
- pshufd $0xb1,%xmm3,%xmm4
- por %xmm3,%xmm4
+ pshufd $0xb1,%xmm1,%xmm4
+ por %xmm1,%xmm4
pshufd $0,%xmm5,%xmm5
pshufd $0x1e,%xmm4,%xmm3
por %xmm3,%xmm4
@@ -3065,6 +3064,7 @@ L$add_proceedx:
+ xorq %r11,%r11
addq %r12,%r12
leaq 96(%rsp),%rsi
adcq %r13,%r13
@@ -3072,7 +3072,7 @@ L$add_proceedx:
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -3080,15 +3080,15 @@ L$add_proceedx:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
+ cmovcq %rax,%r12
movq 0(%rsi),%rax
- cmovzq %rbp,%r13
+ cmovcq %rbp,%r13
movq 8(%rsi),%rbp
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq 16(%rsi),%rcx
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subx
@@ -3242,16 +3242,14 @@ L$point_add_affinex:
movq 64+24(%rsi),%r8
movdqa %xmm0,320(%rsp)
movdqa %xmm1,320+16(%rsp)
- por %xmm0,%xmm1
movdqa %xmm2,352(%rsp)
movdqa %xmm3,352+16(%rsp)
- por %xmm2,%xmm3
movdqa %xmm4,384(%rsp)
movdqa %xmm5,384+16(%rsp)
- por %xmm1,%xmm3
+ por %xmm4,%xmm5
movdqu 0(%rbx),%xmm0
- pshufd $0xb1,%xmm3,%xmm5
+ pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rbx),%xmm1
movdqu 32(%rbx),%xmm2
por %xmm3,%xmm5
@@ -3369,6 +3367,7 @@ L$point_add_affinex:
+ xorq %r11,%r11
addq %r12,%r12
leaq 192(%rsp),%rsi
adcq %r13,%r13
@@ -3376,7 +3375,7 @@ L$point_add_affinex:
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -3384,15 +3383,15 @@ L$point_add_affinex:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
+ cmovcq %rax,%r12
movq 0(%rsi),%rax
- cmovzq %rbp,%r13
+ cmovcq %rbp,%r13
movq 8(%rsi),%rbp
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq 16(%rsi),%rcx
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subx
diff --git a/deps/openssl/asm/x64-macosx-gas/sha/sha1-x86_64.s b/deps/openssl/asm/x64-macosx-gas/sha/sha1-x86_64.s
index c89ffe3df6..1c52e05e39 100644
--- a/deps/openssl/asm/x64-macosx-gas/sha/sha1-x86_64.s
+++ b/deps/openssl/asm/x64-macosx-gas/sha/sha1-x86_64.s
@@ -1263,9 +1263,9 @@ _shaext_shortcut:
.p2align 4
L$oop_shaext:
decq %rdx
- leaq 64(%rsi),%rax
+ leaq 64(%rsi),%r8
paddd %xmm4,%xmm1
- cmovneq %rax,%rsi
+ cmovneq %r8,%rsi
movdqa %xmm0,%xmm8
.byte 15,56,201,229
movdqa %xmm0,%xmm2
diff --git a/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont.asm b/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont.asm
index ccc591b7c9..9693ed97e4 100644
--- a/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont.asm
+++ b/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont.asm
@@ -19,6 +19,8 @@ $L$SEH_begin_bn_mul_mont::
mov r9,QWORD PTR[48+rsp]
+ mov r9d,r9d
+ mov rax,rsp
test r9d,3
jnz $L$mul_enter
cmp r9d,8
@@ -39,29 +41,36 @@ $L$mul_enter::
push r14
push r15
- mov r9d,r9d
- lea r10,QWORD PTR[2+r9]
+ neg r9
mov r11,rsp
- neg r10
- lea rsp,QWORD PTR[r10*8+rsp]
- and rsp,-1024
+ lea r10,QWORD PTR[((-16))+r9*8+rsp]
+ neg r9
+ and r10,-1024
- mov QWORD PTR[8+r9*8+rsp],r11
-$L$mul_body::
- sub r11,rsp
+ sub r11,r10
and r11,-4096
+ lea rsp,QWORD PTR[r11*1+r10]
+ mov r11,QWORD PTR[rsp]
+ cmp rsp,r10
+ ja $L$mul_page_walk
+ jmp $L$mul_page_walk_done
+
+ALIGN 16
$L$mul_page_walk::
- mov r10,QWORD PTR[r11*1+rsp]
- sub r11,4096
-DB 066h,02eh
- jnc $L$mul_page_walk
+ lea rsp,QWORD PTR[((-4096))+rsp]
+ mov r11,QWORD PTR[rsp]
+ cmp rsp,r10
+ ja $L$mul_page_walk
+$L$mul_page_walk_done::
+ mov QWORD PTR[8+r9*8+rsp],rax
+$L$mul_body::
mov r12,rdx
mov r8,QWORD PTR[r8]
mov rbx,QWORD PTR[r12]
@@ -229,13 +238,13 @@ $L$copy::
mov rsi,QWORD PTR[8+r9*8+rsp]
mov rax,1
- mov r15,QWORD PTR[rsi]
- mov r14,QWORD PTR[8+rsi]
- mov r13,QWORD PTR[16+rsi]
- mov r12,QWORD PTR[24+rsi]
- mov rbp,QWORD PTR[32+rsi]
- mov rbx,QWORD PTR[40+rsi]
- lea rsp,QWORD PTR[48+rsi]
+ mov r15,QWORD PTR[((-48))+rsi]
+ mov r14,QWORD PTR[((-40))+rsi]
+ mov r13,QWORD PTR[((-32))+rsi]
+ mov r12,QWORD PTR[((-24))+rsi]
+ mov rbp,QWORD PTR[((-16))+rsi]
+ mov rbx,QWORD PTR[((-8))+rsi]
+ lea rsp,QWORD PTR[rsi]
$L$mul_epilogue::
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
mov rsi,QWORD PTR[16+rsp]
@@ -257,6 +266,8 @@ $L$SEH_begin_bn_mul4x_mont::
mov r9,QWORD PTR[48+rsp]
+ mov r9d,r9d
+ mov rax,rsp
$L$mul4x_enter::
and r11d,080100h
cmp r11d,080100h
@@ -268,23 +279,29 @@ $L$mul4x_enter::
push r14
push r15
- mov r9d,r9d
- lea r10,QWORD PTR[4+r9]
+ neg r9
mov r11,rsp
- neg r10
- lea rsp,QWORD PTR[r10*8+rsp]
- and rsp,-1024
+ lea r10,QWORD PTR[((-32))+r9*8+rsp]
+ neg r9
+ and r10,-1024
- mov QWORD PTR[8+r9*8+rsp],r11
-$L$mul4x_body::
- sub r11,rsp
+ sub r11,r10
and r11,-4096
+ lea rsp,QWORD PTR[r11*1+r10]
+ mov r11,QWORD PTR[rsp]
+ cmp rsp,r10
+ ja $L$mul4x_page_walk
+ jmp $L$mul4x_page_walk_done
+
$L$mul4x_page_walk::
- mov r10,QWORD PTR[r11*1+rsp]
- sub r11,4096
-DB 02eh
- jnc $L$mul4x_page_walk
+ lea rsp,QWORD PTR[((-4096))+rsp]
+ mov r11,QWORD PTR[rsp]
+ cmp rsp,r10
+ ja $L$mul4x_page_walk
+$L$mul4x_page_walk_done::
+ mov QWORD PTR[8+r9*8+rsp],rax
+$L$mul4x_body::
mov QWORD PTR[16+r9*8+rsp],rdi
mov r12,rdx
mov r8,QWORD PTR[r8]
@@ -653,13 +670,13 @@ $L$copy4x::
movdqu XMMWORD PTR[16+r14*1+rdi],xmm2
mov rsi,QWORD PTR[8+r9*8+rsp]
mov rax,1
- mov r15,QWORD PTR[rsi]
- mov r14,QWORD PTR[8+rsi]
- mov r13,QWORD PTR[16+rsi]
- mov r12,QWORD PTR[24+rsi]
- mov rbp,QWORD PTR[32+rsi]
- mov rbx,QWORD PTR[40+rsi]
- lea rsp,QWORD PTR[48+rsi]
+ mov r15,QWORD PTR[((-48))+rsi]
+ mov r14,QWORD PTR[((-40))+rsi]
+ mov r13,QWORD PTR[((-32))+rsi]
+ mov r12,QWORD PTR[((-24))+rsi]
+ mov rbp,QWORD PTR[((-16))+rsi]
+ mov rbx,QWORD PTR[((-8))+rsi]
+ lea rsp,QWORD PTR[rsi]
$L$mul4x_epilogue::
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
mov rsi,QWORD PTR[16+rsp]
@@ -684,14 +701,15 @@ $L$SEH_begin_bn_sqr8x_mont::
mov r9,QWORD PTR[48+rsp]
-$L$sqr8x_enter::
mov rax,rsp
+$L$sqr8x_enter::
push rbx
push rbp
push r12
push r13
push r14
push r15
+$L$sqr8x_prologue::
mov r10d,r9d
shl r9d,3
@@ -704,33 +722,42 @@ $L$sqr8x_enter::
lea r11,QWORD PTR[((-64))+r9*2+rsp]
+ mov rbp,rsp
mov r8,QWORD PTR[r8]
sub r11,rsi
and r11,4095
cmp r10,r11
jb $L$sqr8x_sp_alt
- sub rsp,r11
- lea rsp,QWORD PTR[((-64))+r9*2+rsp]
+ sub rbp,r11
+ lea rbp,QWORD PTR[((-64))+r9*2+rbp]
jmp $L$sqr8x_sp_done
ALIGN 32
$L$sqr8x_sp_alt::
lea r10,QWORD PTR[((4096-64))+r9*2]
- lea rsp,QWORD PTR[((-64))+r9*2+rsp]
+ lea rbp,QWORD PTR[((-64))+r9*2+rbp]
sub r11,r10
mov r10,0
cmovc r11,r10
- sub rsp,r11
+ sub rbp,r11
$L$sqr8x_sp_done::
- and rsp,-64
- mov r11,rax
- sub r11,rsp
+ and rbp,-64
+ mov r11,rsp
+ sub r11,rbp
and r11,-4096
+ lea rsp,QWORD PTR[rbp*1+r11]
+ mov r10,QWORD PTR[rsp]
+ cmp rsp,rbp
+ ja $L$sqr8x_page_walk
+ jmp $L$sqr8x_page_walk_done
+
+ALIGN 16
$L$sqr8x_page_walk::
- mov r10,QWORD PTR[r11*1+rsp]
- sub r11,4096
-DB 02eh
- jnc $L$sqr8x_page_walk
+ lea rsp,QWORD PTR[((-4096))+rsp]
+ mov r10,QWORD PTR[rsp]
+ cmp rsp,rbp
+ ja $L$sqr8x_page_walk
+$L$sqr8x_page_walk_done::
mov r10,r9
neg r9
@@ -858,30 +885,38 @@ $L$SEH_begin_bn_mulx4x_mont::
mov r9,QWORD PTR[48+rsp]
-$L$mulx4x_enter::
mov rax,rsp
+$L$mulx4x_enter::
push rbx
push rbp
push r12
push r13
push r14
push r15
+$L$mulx4x_prologue::
shl r9d,3
-DB 067h
xor r10,r10
sub r10,r9
mov r8,QWORD PTR[r8]
- lea rsp,QWORD PTR[((-72))+r10*1+rsp]
- and rsp,-128
- mov r11,rax
- sub r11,rsp
+ lea rbp,QWORD PTR[((-72))+r10*1+rsp]
+ and rbp,-128
+ mov r11,rsp
+ sub r11,rbp
and r11,-4096
+ lea rsp,QWORD PTR[rbp*1+r11]
+ mov r10,QWORD PTR[rsp]
+ cmp rsp,rbp
+ ja $L$mulx4x_page_walk
+ jmp $L$mulx4x_page_walk_done
+
+ALIGN 16
$L$mulx4x_page_walk::
- mov r10,QWORD PTR[r11*1+rsp]
- sub r11,4096
-DB 066h,02eh
- jnc $L$mulx4x_page_walk
+ lea rsp,QWORD PTR[((-4096))+rsp]
+ mov r10,QWORD PTR[rsp]
+ cmp rsp,rbp
+ ja $L$mulx4x_page_walk
+$L$mulx4x_page_walk_done::
lea r10,QWORD PTR[r9*1+rdx]
@@ -1230,22 +1265,8 @@ mul_handler PROC PRIVATE
mov r10,QWORD PTR[192+r8]
mov rax,QWORD PTR[8+r10*8+rax]
- lea rax,QWORD PTR[48+rax]
-
- mov rbx,QWORD PTR[((-8))+rax]
- mov rbp,QWORD PTR[((-16))+rax]
- mov r12,QWORD PTR[((-24))+rax]
- mov r13,QWORD PTR[((-32))+rax]
- mov r14,QWORD PTR[((-40))+rax]
- mov r15,QWORD PTR[((-48))+rax]
- mov QWORD PTR[144+r8],rbx
- mov QWORD PTR[160+r8],rbp
- mov QWORD PTR[216+r8],r12
- mov QWORD PTR[224+r8],r13
- mov QWORD PTR[232+r8],r14
- mov QWORD PTR[240+r8],r15
- jmp $L$common_seh_tail
+ jmp $L$common_pop_regs
mul_handler ENDP
@@ -1273,15 +1294,21 @@ sqr_handler PROC PRIVATE
cmp rbx,r10
jb $L$common_seh_tail
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$common_pop_regs
+
mov rax,QWORD PTR[152+r8]
- mov r10d,DWORD PTR[4+r11]
+ mov r10d,DWORD PTR[8+r11]
lea r10,QWORD PTR[r10*1+rsi]
cmp rbx,r10
jae $L$common_seh_tail
mov rax,QWORD PTR[40+rax]
+$L$common_pop_regs::
mov rbx,QWORD PTR[((-8))+rax]
mov rbp,QWORD PTR[((-16))+rax]
mov r12,QWORD PTR[((-24))+rax]
@@ -1366,11 +1393,13 @@ DB 9,0,0,0
$L$SEH_info_bn_sqr8x_mont::
DB 9,0,0,0
DD imagerel sqr_handler
- DD imagerel $L$sqr8x_body,imagerel $L$sqr8x_epilogue
+ DD imagerel $L$sqr8x_prologue,imagerel $L$sqr8x_body,imagerel $L$sqr8x_epilogue
+ALIGN 8
$L$SEH_info_bn_mulx4x_mont::
DB 9,0,0,0
DD imagerel sqr_handler
- DD imagerel $L$mulx4x_body,imagerel $L$mulx4x_epilogue
+ DD imagerel $L$mulx4x_prologue,imagerel $L$mulx4x_body,imagerel $L$mulx4x_epilogue
+ALIGN 8
.xdata ENDS
END
diff --git a/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm b/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm
index 3c1a74afb9..6cd4f2d3d8 100644
--- a/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm
+++ b/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm
@@ -19,6 +19,8 @@ $L$SEH_begin_bn_mul_mont_gather5::
mov r9,QWORD PTR[48+rsp]
+ mov r9d,r9d
+ mov rax,rsp
test r9d,7
jnz $L$mul_enter
mov r11d,DWORD PTR[((OPENSSL_ia32cap_P+8))]
@@ -26,10 +28,7 @@ $L$SEH_begin_bn_mul_mont_gather5::
ALIGN 16
$L$mul_enter::
- mov r9d,r9d
- mov rax,rsp
movd xmm5,DWORD PTR[56+rsp]
- lea r10,QWORD PTR[$L$inc]
push rbx
push rbp
push r12
@@ -37,26 +36,36 @@ $L$mul_enter::
push r14
push r15
- lea r11,QWORD PTR[2+r9]
- neg r11
- lea rsp,QWORD PTR[((-264))+r11*8+rsp]
- and rsp,-1024
+ neg r9
+ mov r11,rsp
+ lea r10,QWORD PTR[((-280))+r9*8+rsp]
+ neg r9
+ and r10,-1024
+
- mov QWORD PTR[8+r9*8+rsp],rax
-$L$mul_body::
+ sub r11,r10
+ and r11,-4096
+ lea rsp,QWORD PTR[r11*1+r10]
+ mov r11,QWORD PTR[rsp]
+ cmp rsp,r10
+ ja $L$mul_page_walk
+ jmp $L$mul_page_walk_done
- sub rax,rsp
- and rax,-4096
$L$mul_page_walk::
- mov r11,QWORD PTR[rax*1+rsp]
- sub rax,4096
-DB 02eh
- jnc $L$mul_page_walk
+ lea rsp,QWORD PTR[((-4096))+rsp]
+ mov r11,QWORD PTR[rsp]
+ cmp rsp,r10
+ ja $L$mul_page_walk
+$L$mul_page_walk_done::
+
+ lea r10,QWORD PTR[$L$inc]
+ mov QWORD PTR[8+r9*8+rsp],rax
+$L$mul_body::
lea r12,QWORD PTR[128+rdx]
movdqa xmm0,XMMWORD PTR[r10]
@@ -442,18 +451,19 @@ $L$SEH_begin_bn_mul4x_mont_gather5::
mov r9,QWORD PTR[48+rsp]
+DB 067h
+ mov rax,rsp
$L$mul4x_enter::
and r11d,080108h
cmp r11d,080108h
je $L$mulx4x_enter
-DB 067h
- mov rax,rsp
push rbx
push rbp
push r12
push r13
push r14
push r15
+$L$mul4x_prologue::
DB 067h
shl r9d,3
@@ -470,32 +480,40 @@ DB 067h
lea r11,QWORD PTR[((-320))+r9*2+rsp]
+ mov rbp,rsp
sub r11,rdi
and r11,4095
cmp r10,r11
jb $L$mul4xsp_alt
- sub rsp,r11
- lea rsp,QWORD PTR[((-320))+r9*2+rsp]
+ sub rbp,r11
+ lea rbp,QWORD PTR[((-320))+r9*2+rbp]
jmp $L$mul4xsp_done
ALIGN 32
$L$mul4xsp_alt::
lea r10,QWORD PTR[((4096-320))+r9*2]
- lea rsp,QWORD PTR[((-320))+r9*2+rsp]
+ lea rbp,QWORD PTR[((-320))+r9*2+rbp]
sub r11,r10
mov r10,0
cmovc r11,r10
- sub rsp,r11
+ sub rbp,r11
$L$mul4xsp_done::
- and rsp,-64
- mov r11,rax
- sub r11,rsp
+ and rbp,-64
+ mov r11,rsp
+ sub r11,rbp
and r11,-4096
+ lea rsp,QWORD PTR[rbp*1+r11]
+ mov r10,QWORD PTR[rsp]
+ cmp rsp,rbp
+ ja $L$mul4x_page_walk
+ jmp $L$mul4x_page_walk_done
+
$L$mul4x_page_walk::
- mov r10,QWORD PTR[r11*1+rsp]
- sub r11,4096
-DB 02eh
- jnc $L$mul4x_page_walk
+ lea rsp,QWORD PTR[((-4096))+rsp]
+ mov r10,QWORD PTR[rsp]
+ cmp rsp,rbp
+ ja $L$mul4x_page_walk
+$L$mul4x_page_walk_done::
neg r9
@@ -1062,17 +1080,18 @@ $L$SEH_begin_bn_power5::
mov r9,QWORD PTR[48+rsp]
+ mov rax,rsp
mov r11d,DWORD PTR[((OPENSSL_ia32cap_P+8))]
and r11d,080108h
cmp r11d,080108h
je $L$powerx5_enter
- mov rax,rsp
push rbx
push rbp
push r12
push r13
push r14
push r15
+$L$power5_prologue::
shl r9d,3
lea r10d,DWORD PTR[r9*2+r9]
@@ -1087,32 +1106,40 @@ $L$SEH_begin_bn_power5::
lea r11,QWORD PTR[((-320))+r9*2+rsp]
+ mov rbp,rsp
sub r11,rdi
and r11,4095
cmp r10,r11
jb $L$pwr_sp_alt
- sub rsp,r11
- lea rsp,QWORD PTR[((-320))+r9*2+rsp]
+ sub rbp,r11
+ lea rbp,QWORD PTR[((-320))+r9*2+rbp]
jmp $L$pwr_sp_done
ALIGN 32
$L$pwr_sp_alt::
lea r10,QWORD PTR[((4096-320))+r9*2]
- lea rsp,QWORD PTR[((-320))+r9*2+rsp]
+ lea rbp,QWORD PTR[((-320))+r9*2+rbp]
sub r11,r10
mov r10,0
cmovc r11,r10
- sub rsp,r11
+ sub rbp,r11
$L$pwr_sp_done::
- and rsp,-64
- mov r11,rax
- sub r11,rsp
+ and rbp,-64
+ mov r11,rsp
+ sub r11,rbp
and r11,-4096
+ lea rsp,QWORD PTR[rbp*1+r11]
+ mov r10,QWORD PTR[rsp]
+ cmp rsp,rbp
+ ja $L$pwr_page_walk
+ jmp $L$pwr_page_walk_done
+
$L$pwr_page_walk::
- mov r10,QWORD PTR[r11*1+rsp]
- sub r11,4096
-DB 02eh
- jnc $L$pwr_page_walk
+ lea rsp,QWORD PTR[((-4096))+rsp]
+ mov r10,QWORD PTR[rsp]
+ cmp rsp,rbp
+ ja $L$pwr_page_walk
+$L$pwr_page_walk_done::
mov r10,r9
neg r9
@@ -2038,6 +2065,7 @@ DB 067h
push r13
push r14
push r15
+$L$from_prologue::
shl r9d,3
lea r10,QWORD PTR[r9*2+r9]
@@ -2052,32 +2080,40 @@ DB 067h
lea r11,QWORD PTR[((-320))+r9*2+rsp]
+ mov rbp,rsp
sub r11,rdi
and r11,4095
cmp r10,r11
jb $L$from_sp_alt
- sub rsp,r11
- lea rsp,QWORD PTR[((-320))+r9*2+rsp]
+ sub rbp,r11
+ lea rbp,QWORD PTR[((-320))+r9*2+rbp]
jmp $L$from_sp_done
ALIGN 32
$L$from_sp_alt::
lea r10,QWORD PTR[((4096-320))+r9*2]
- lea rsp,QWORD PTR[((-320))+r9*2+rsp]
+ lea rbp,QWORD PTR[((-320))+r9*2+rbp]
sub r11,r10
mov r10,0
cmovc r11,r10
- sub rsp,r11
+ sub rbp,r11
$L$from_sp_done::
- and rsp,-64
- mov r11,rax
- sub r11,rsp
+ and rbp,-64
+ mov r11,rsp
+ sub r11,rbp
and r11,-4096
+ lea rsp,QWORD PTR[rbp*1+r11]
+ mov r10,QWORD PTR[rsp]
+ cmp rsp,rbp
+ ja $L$from_page_walk
+ jmp $L$from_page_walk_done
+
$L$from_page_walk::
- mov r10,QWORD PTR[r11*1+rsp]
- sub r11,4096
-DB 02eh
- jnc $L$from_page_walk
+ lea rsp,QWORD PTR[((-4096))+rsp]
+ mov r10,QWORD PTR[rsp]
+ cmp rsp,rbp
+ ja $L$from_page_walk
+$L$from_page_walk_done::
mov r10,r9
neg r9
@@ -2186,14 +2222,15 @@ $L$SEH_begin_bn_mulx4x_mont_gather5::
mov r9,QWORD PTR[48+rsp]
-$L$mulx4x_enter::
mov rax,rsp
+$L$mulx4x_enter::
push rbx
push rbp
push r12
push r13
push r14
push r15
+$L$mulx4x_prologue::
shl r9d,3
lea r10,QWORD PTR[r9*2+r9]
@@ -2210,31 +2247,39 @@ $L$mulx4x_enter::
lea r11,QWORD PTR[((-320))+r9*2+rsp]
+ mov rbp,rsp
sub r11,rdi
and r11,4095
cmp r10,r11
jb $L$mulx4xsp_alt
- sub rsp,r11
- lea rsp,QWORD PTR[((-320))+r9*2+rsp]
+ sub rbp,r11
+ lea rbp,QWORD PTR[((-320))+r9*2+rbp]
jmp $L$mulx4xsp_done
$L$mulx4xsp_alt::
lea r10,QWORD PTR[((4096-320))+r9*2]
- lea rsp,QWORD PTR[((-320))+r9*2+rsp]
+ lea rbp,QWORD PTR[((-320))+r9*2+rbp]
sub r11,r10
mov r10,0
cmovc r11,r10
- sub rsp,r11
+ sub rbp,r11
$L$mulx4xsp_done::
- and rsp,-64
- mov r11,rax
- sub r11,rsp
+ and rbp,-64
+ mov r11,rsp
+ sub r11,rbp
and r11,-4096
+ lea rsp,QWORD PTR[rbp*1+r11]
+ mov r10,QWORD PTR[rsp]
+ cmp rsp,rbp
+ ja $L$mulx4x_page_walk
+ jmp $L$mulx4x_page_walk_done
+
$L$mulx4x_page_walk::
- mov r10,QWORD PTR[r11*1+rsp]
- sub r11,4096
-DB 02eh
- jnc $L$mulx4x_page_walk
+ lea rsp,QWORD PTR[((-4096))+rsp]
+ mov r10,QWORD PTR[rsp]
+ cmp rsp,rbp
+ ja $L$mulx4x_page_walk
+$L$mulx4x_page_walk_done::
@@ -2707,14 +2752,15 @@ $L$SEH_begin_bn_powerx5::
mov r9,QWORD PTR[48+rsp]
-$L$powerx5_enter::
mov rax,rsp
+$L$powerx5_enter::
push rbx
push rbp
push r12
push r13
push r14
push r15
+$L$powerx5_prologue::
shl r9d,3
lea r10,QWORD PTR[r9*2+r9]
@@ -2729,32 +2775,40 @@ $L$powerx5_enter::
lea r11,QWORD PTR[((-320))+r9*2+rsp]
+ mov rbp,rsp
sub r11,rdi
and r11,4095
cmp r10,r11
jb $L$pwrx_sp_alt
- sub rsp,r11
- lea rsp,QWORD PTR[((-320))+r9*2+rsp]
+ sub rbp,r11
+ lea rbp,QWORD PTR[((-320))+r9*2+rbp]
jmp $L$pwrx_sp_done
ALIGN 32
$L$pwrx_sp_alt::
lea r10,QWORD PTR[((4096-320))+r9*2]
- lea rsp,QWORD PTR[((-320))+r9*2+rsp]
+ lea rbp,QWORD PTR[((-320))+r9*2+rbp]
sub r11,r10
mov r10,0
cmovc r11,r10
- sub rsp,r11
+ sub rbp,r11
$L$pwrx_sp_done::
- and rsp,-64
- mov r11,rax
- sub r11,rsp
+ and rbp,-64
+ mov r11,rsp
+ sub r11,rbp
and r11,-4096
+ lea rsp,QWORD PTR[rbp*1+r11]
+ mov r10,QWORD PTR[rsp]
+ cmp rsp,rbp
+ ja $L$pwrx_page_walk
+ jmp $L$pwrx_page_walk_done
+
$L$pwrx_page_walk::
- mov r10,QWORD PTR[r11*1+rsp]
- sub r11,4096
-DB 02eh
- jnc $L$pwrx_page_walk
+ lea rsp,QWORD PTR[((-4096))+rsp]
+ mov r10,QWORD PTR[rsp]
+ cmp rsp,rbp
+ ja $L$pwrx_page_walk
+$L$pwrx_page_walk_done::
mov r10,r9
neg r9
@@ -3712,9 +3766,14 @@ mul_handler PROC PRIVATE
cmp rbx,r10
jb $L$common_seh_tail
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$common_pop_regs
+
mov rax,QWORD PTR[152+r8]
- mov r10d,DWORD PTR[4+r11]
+ mov r10d,DWORD PTR[8+r11]
lea r10,QWORD PTR[r10*1+rsi]
cmp rbx,r10
jae $L$common_seh_tail
@@ -3726,11 +3785,11 @@ mul_handler PROC PRIVATE
mov r10,QWORD PTR[192+r8]
mov rax,QWORD PTR[8+r10*8+rax]
- jmp $L$body_proceed
+ jmp $L$common_pop_regs
$L$body_40::
mov rax,QWORD PTR[40+rax]
-$L$body_proceed::
+$L$common_pop_regs::
mov rbx,QWORD PTR[((-8))+rax]
mov rbp,QWORD PTR[((-16))+rax]
mov r12,QWORD PTR[((-24))+rax]
@@ -3819,32 +3878,32 @@ ALIGN 8
$L$SEH_info_bn_mul_mont_gather5::
DB 9,0,0,0
DD imagerel mul_handler
- DD imagerel $L$mul_body,imagerel $L$mul_epilogue
+ DD imagerel $L$mul_body,imagerel $L$mul_body,imagerel $L$mul_epilogue
ALIGN 8
$L$SEH_info_bn_mul4x_mont_gather5::
DB 9,0,0,0
DD imagerel mul_handler
- DD imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue
+ DD imagerel $L$mul4x_prologue,imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue
ALIGN 8
$L$SEH_info_bn_power5::
DB 9,0,0,0
DD imagerel mul_handler
- DD imagerel $L$power5_body,imagerel $L$power5_epilogue
+ DD imagerel $L$power5_prologue,imagerel $L$power5_body,imagerel $L$power5_epilogue
ALIGN 8
$L$SEH_info_bn_from_mont8x::
DB 9,0,0,0
DD imagerel mul_handler
- DD imagerel $L$from_body,imagerel $L$from_epilogue
+ DD imagerel $L$from_prologue,imagerel $L$from_body,imagerel $L$from_epilogue
ALIGN 8
$L$SEH_info_bn_mulx4x_mont_gather5::
DB 9,0,0,0
DD imagerel mul_handler
- DD imagerel $L$mulx4x_body,imagerel $L$mulx4x_epilogue
+ DD imagerel $L$mulx4x_prologue,imagerel $L$mulx4x_body,imagerel $L$mulx4x_epilogue
ALIGN 8
$L$SEH_info_bn_powerx5::
DB 9,0,0,0
DD imagerel mul_handler
- DD imagerel $L$powerx5_body,imagerel $L$powerx5_epilogue
+ DD imagerel $L$powerx5_prologue,imagerel $L$powerx5_body,imagerel $L$powerx5_epilogue
ALIGN 8
$L$SEH_info_bn_gather5::
DB 001h,00bh,003h,00ah
diff --git a/deps/openssl/asm/x64-win32-masm/ec/ecp_nistz256-x86_64.asm b/deps/openssl/asm/x64-win32-masm/ec/ecp_nistz256-x86_64.asm
index f38d253c16..7cf9277bfb 100644
--- a/deps/openssl/asm/x64-win32-masm/ec/ecp_nistz256-x86_64.asm
+++ b/deps/openssl/asm/x64-win32-masm/ec/ecp_nistz256-x86_64.asm
@@ -36,6 +36,7 @@ $L$SEH_begin_ecp_nistz256_mul_by_2::
push r13
mov r8,QWORD PTR[rsi]
+ xor r13,r13
mov r9,QWORD PTR[8+rsi]
add r8,r8
mov r10,QWORD PTR[16+rsi]
@@ -46,7 +47,7 @@ $L$SEH_begin_ecp_nistz256_mul_by_2::
adc r10,r10
adc r11,r11
mov rdx,r9
- sbb r13,r13
+ adc r13,0
sub r8,QWORD PTR[rsi]
mov rcx,r10
@@ -54,14 +55,14 @@ $L$SEH_begin_ecp_nistz256_mul_by_2::
sbb r10,QWORD PTR[16+rsi]
mov r12,r11
sbb r11,QWORD PTR[24+rsi]
- test r13,r13
+ sbb r13,0
- cmovz r8,rax
- cmovz r9,rdx
+ cmovc r8,rax
+ cmovc r9,rdx
mov QWORD PTR[rdi],r8
- cmovz r10,rcx
+ cmovc r10,rcx
mov QWORD PTR[8+rdi],r9
- cmovz r11,r12
+ cmovc r11,r12
mov QWORD PTR[16+rdi],r10
mov QWORD PTR[24+rdi],r11
@@ -180,12 +181,12 @@ $L$SEH_begin_ecp_nistz256_mul_by_3::
sbb r10,0
mov r12,r11
sbb r11,QWORD PTR[(($L$poly+24))]
- test r13,r13
+ sbb r13,0
- cmovz r8,rax
- cmovz r9,rdx
- cmovz r10,rcx
- cmovz r11,r12
+ cmovc r8,rax
+ cmovc r9,rdx
+ cmovc r10,rcx
+ cmovc r11,r12
xor r13,r13
add r8,QWORD PTR[rsi]
@@ -202,14 +203,14 @@ $L$SEH_begin_ecp_nistz256_mul_by_3::
sbb r10,0
mov r12,r11
sbb r11,QWORD PTR[(($L$poly+24))]
- test r13,r13
+ sbb r13,0
- cmovz r8,rax
- cmovz r9,rdx
+ cmovc r8,rax
+ cmovc r9,rdx
mov QWORD PTR[rdi],r8
- cmovz r10,rcx
+ cmovc r10,rcx
mov QWORD PTR[8+rdi],r9
- cmovz r11,r12
+ cmovc r11,r12
mov QWORD PTR[16+rdi],r10
mov QWORD PTR[24+rdi],r11
@@ -260,14 +261,14 @@ $L$SEH_begin_ecp_nistz256_add::
sbb r10,QWORD PTR[16+rsi]
mov r12,r11
sbb r11,QWORD PTR[24+rsi]
- test r13,r13
+ sbb r13,0
- cmovz r8,rax
- cmovz r9,rdx
+ cmovc r8,rax
+ cmovc r9,rdx
mov QWORD PTR[rdi],r8
- cmovz r10,rcx
+ cmovc r10,rcx
mov QWORD PTR[8+rdi],r9
- cmovz r11,r12
+ cmovc r11,r12
mov QWORD PTR[16+rdi],r10
mov QWORD PTR[24+rdi],r11
@@ -1673,13 +1674,14 @@ ecp_nistz256_avx2_select_w7 ENDP
ALIGN 32
__ecp_nistz256_add_toq PROC PRIVATE
+ xor r11,r11
add r12,QWORD PTR[rbx]
adc r13,QWORD PTR[8+rbx]
mov rax,r12
adc r8,QWORD PTR[16+rbx]
adc r9,QWORD PTR[24+rbx]
mov rbp,r13
- sbb r11,r11
+ adc r11,0
sub r12,-1
mov rcx,r8
@@ -1687,14 +1689,14 @@ __ecp_nistz256_add_toq PROC PRIVATE
sbb r8,0
mov r10,r9
sbb r9,r15
- test r11,r11
+ sbb r11,0
- cmovz r12,rax
- cmovz r13,rbp
+ cmovc r12,rax
+ cmovc r13,rbp
mov QWORD PTR[rdi],r12
- cmovz r8,rcx
+ cmovc r8,rcx
mov QWORD PTR[8+rdi],r13
- cmovz r9,r10
+ cmovc r9,r10
mov QWORD PTR[16+rdi],r8
mov QWORD PTR[24+rdi],r9
@@ -1762,13 +1764,14 @@ __ecp_nistz256_subq ENDP
ALIGN 32
__ecp_nistz256_mul_by_2q PROC PRIVATE
+ xor r11,r11
add r12,r12
adc r13,r13
mov rax,r12
adc r8,r8
adc r9,r9
mov rbp,r13
- sbb r11,r11
+ adc r11,0
sub r12,-1
mov rcx,r8
@@ -1776,14 +1779,14 @@ __ecp_nistz256_mul_by_2q PROC PRIVATE
sbb r8,0
mov r10,r9
sbb r9,r15
- test r11,r11
+ sbb r11,0
- cmovz r12,rax
- cmovz r13,rbp
+ cmovc r12,rax
+ cmovc r13,rbp
mov QWORD PTR[rdi],r12
- cmovz r8,rcx
+ cmovc r8,rcx
mov QWORD PTR[8+rdi],r13
- cmovz r9,r10
+ cmovc r9,r10
mov QWORD PTR[16+rdi],r8
mov QWORD PTR[24+rdi],r9
@@ -2041,16 +2044,14 @@ $L$SEH_begin_ecp_nistz256_point_add::
mov rsi,rdx
movdqa XMMWORD PTR[384+rsp],xmm0
movdqa XMMWORD PTR[(384+16)+rsp],xmm1
- por xmm1,xmm0
movdqa XMMWORD PTR[416+rsp],xmm2
movdqa XMMWORD PTR[(416+16)+rsp],xmm3
- por xmm3,xmm2
movdqa XMMWORD PTR[448+rsp],xmm4
movdqa XMMWORD PTR[(448+16)+rsp],xmm5
- por xmm3,xmm1
+ por xmm5,xmm4
movdqu xmm0,XMMWORD PTR[rsi]
- pshufd xmm5,xmm3,1h
+ pshufd xmm3,xmm5,1h
movdqu xmm1,XMMWORD PTR[16+rsi]
movdqu xmm2,XMMWORD PTR[32+rsi]
por xmm5,xmm3
@@ -2062,14 +2063,14 @@ $L$SEH_begin_ecp_nistz256_point_add::
movdqa XMMWORD PTR[480+rsp],xmm0
pshufd xmm4,xmm5,01eh
movdqa XMMWORD PTR[(480+16)+rsp],xmm1
- por xmm1,xmm0
-DB 102,72,15,110,199
+ movdqu xmm0,XMMWORD PTR[64+rsi]
+ movdqu xmm1,XMMWORD PTR[80+rsi]
movdqa XMMWORD PTR[512+rsp],xmm2
movdqa XMMWORD PTR[(512+16)+rsp],xmm3
- por xmm3,xmm2
por xmm5,xmm4
pxor xmm4,xmm4
- por xmm3,xmm1
+ por xmm1,xmm0
+DB 102,72,15,110,199
lea rsi,QWORD PTR[((64-0))+rsi]
mov QWORD PTR[((544+0))+rsp],rax
@@ -2080,8 +2081,8 @@ DB 102,72,15,110,199
call __ecp_nistz256_sqr_montq
pcmpeqd xmm5,xmm4
- pshufd xmm4,xmm3,1h
- por xmm4,xmm3
+ pshufd xmm4,xmm1,1h
+ por xmm4,xmm1
pshufd xmm5,xmm5,0
pshufd xmm3,xmm4,01eh
por xmm4,xmm3
@@ -2264,6 +2265,7 @@ $L$add_proceedq::
+ xor r11,r11
add r12,r12
lea rsi,QWORD PTR[96+rsp]
adc r13,r13
@@ -2271,7 +2273,7 @@ $L$add_proceedq::
adc r8,r8
adc r9,r9
mov rbp,r13
- sbb r11,r11
+ adc r11,0
sub r12,-1
mov rcx,r8
@@ -2279,15 +2281,15 @@ $L$add_proceedq::
sbb r8,0
mov r10,r9
sbb r9,r15
- test r11,r11
+ sbb r11,0
- cmovz r12,rax
+ cmovc r12,rax
mov rax,QWORD PTR[rsi]
- cmovz r13,rbp
+ cmovc r13,rbp
mov rbp,QWORD PTR[8+rsi]
- cmovz r8,rcx
+ cmovc r8,rcx
mov rcx,QWORD PTR[16+rsi]
- cmovz r9,r10
+ cmovc r9,r10
mov r10,QWORD PTR[24+rsi]
call __ecp_nistz256_subq
@@ -2457,16 +2459,14 @@ $L$SEH_begin_ecp_nistz256_point_add_affine::
mov r8,QWORD PTR[((64+24))+rsi]
movdqa XMMWORD PTR[320+rsp],xmm0
movdqa XMMWORD PTR[(320+16)+rsp],xmm1
- por xmm1,xmm0
movdqa XMMWORD PTR[352+rsp],xmm2
movdqa XMMWORD PTR[(352+16)+rsp],xmm3
- por xmm3,xmm2
movdqa XMMWORD PTR[384+rsp],xmm4
movdqa XMMWORD PTR[(384+16)+rsp],xmm5
- por xmm3,xmm1
+ por xmm5,xmm4
movdqu xmm0,XMMWORD PTR[rbx]
- pshufd xmm5,xmm3,1h
+ pshufd xmm3,xmm5,1h
movdqu xmm1,XMMWORD PTR[16+rbx]
movdqu xmm2,XMMWORD PTR[32+rbx]
por xmm5,xmm3
@@ -2584,6 +2584,7 @@ DB 102,72,15,110,199
+ xor r11,r11
add r12,r12
lea rsi,QWORD PTR[192+rsp]
adc r13,r13
@@ -2591,7 +2592,7 @@ DB 102,72,15,110,199
adc r8,r8
adc r9,r9
mov rbp,r13
- sbb r11,r11
+ adc r11,0
sub r12,-1
mov rcx,r8
@@ -2599,15 +2600,15 @@ DB 102,72,15,110,199
sbb r8,0
mov r10,r9
sbb r9,r15
- test r11,r11
+ sbb r11,0
- cmovz r12,rax
+ cmovc r12,rax
mov rax,QWORD PTR[rsi]
- cmovz r13,rbp
+ cmovc r13,rbp
mov rbp,QWORD PTR[8+rsi]
- cmovz r8,rcx
+ cmovc r8,rcx
mov rcx,QWORD PTR[16+rsi]
- cmovz r9,r10
+ cmovc r9,r10
mov r10,QWORD PTR[24+rsi]
call __ecp_nistz256_subq
@@ -2757,14 +2758,14 @@ __ecp_nistz256_add_tox PROC PRIVATE
sbb r8,0
mov r10,r9
sbb r9,r15
+ sbb r11,0
- bt r11,0
- cmovnc r12,rax
- cmovnc r13,rbp
+ cmovc r12,rax
+ cmovc r13,rbp
mov QWORD PTR[rdi],r12
- cmovnc r8,rcx
+ cmovc r8,rcx
mov QWORD PTR[8+rdi],r13
- cmovnc r9,r10
+ cmovc r9,r10
mov QWORD PTR[16+rdi],r8
mov QWORD PTR[24+rdi],r9
@@ -2852,14 +2853,14 @@ __ecp_nistz256_mul_by_2x PROC PRIVATE
sbb r8,0
mov r10,r9
sbb r9,r15
+ sbb r11,0
- bt r11,0
- cmovnc r12,rax
- cmovnc r13,rbp
+ cmovc r12,rax
+ cmovc r13,rbp
mov QWORD PTR[rdi],r12
- cmovnc r8,rcx
+ cmovc r8,rcx
mov QWORD PTR[8+rdi],r13
- cmovnc r9,r10
+ cmovc r9,r10
mov QWORD PTR[16+rdi],r8
mov QWORD PTR[24+rdi],r9
@@ -3109,16 +3110,14 @@ $L$point_addx::
mov rsi,rdx
movdqa XMMWORD PTR[384+rsp],xmm0
movdqa XMMWORD PTR[(384+16)+rsp],xmm1
- por xmm1,xmm0
movdqa XMMWORD PTR[416+rsp],xmm2
movdqa XMMWORD PTR[(416+16)+rsp],xmm3
- por xmm3,xmm2
movdqa XMMWORD PTR[448+rsp],xmm4
movdqa XMMWORD PTR[(448+16)+rsp],xmm5
- por xmm3,xmm1
+ por xmm5,xmm4
movdqu xmm0,XMMWORD PTR[rsi]
- pshufd xmm5,xmm3,1h
+ pshufd xmm3,xmm5,1h
movdqu xmm1,XMMWORD PTR[16+rsi]
movdqu xmm2,XMMWORD PTR[32+rsi]
por xmm5,xmm3
@@ -3130,14 +3129,14 @@ $L$point_addx::
movdqa XMMWORD PTR[480+rsp],xmm0
pshufd xmm4,xmm5,01eh
movdqa XMMWORD PTR[(480+16)+rsp],xmm1
- por xmm1,xmm0
-DB 102,72,15,110,199
+ movdqu xmm0,XMMWORD PTR[64+rsi]
+ movdqu xmm1,XMMWORD PTR[80+rsi]
movdqa XMMWORD PTR[512+rsp],xmm2
movdqa XMMWORD PTR[(512+16)+rsp],xmm3
- por xmm3,xmm2
por xmm5,xmm4
pxor xmm4,xmm4
- por xmm3,xmm1
+ por xmm1,xmm0
+DB 102,72,15,110,199
lea rsi,QWORD PTR[((64-128))+rsi]
mov QWORD PTR[((544+0))+rsp],rdx
@@ -3148,8 +3147,8 @@ DB 102,72,15,110,199
call __ecp_nistz256_sqr_montx
pcmpeqd xmm5,xmm4
- pshufd xmm4,xmm3,1h
- por xmm4,xmm3
+ pshufd xmm4,xmm1,1h
+ por xmm4,xmm1
pshufd xmm5,xmm5,0
pshufd xmm3,xmm4,01eh
por xmm4,xmm3
@@ -3332,6 +3331,7 @@ $L$add_proceedx::
+ xor r11,r11
add r12,r12
lea rsi,QWORD PTR[96+rsp]
adc r13,r13
@@ -3339,7 +3339,7 @@ $L$add_proceedx::
adc r8,r8
adc r9,r9
mov rbp,r13
- sbb r11,r11
+ adc r11,0
sub r12,-1
mov rcx,r8
@@ -3347,15 +3347,15 @@ $L$add_proceedx::
sbb r8,0
mov r10,r9
sbb r9,r15
- test r11,r11
+ sbb r11,0
- cmovz r12,rax
+ cmovc r12,rax
mov rax,QWORD PTR[rsi]
- cmovz r13,rbp
+ cmovc r13,rbp
mov rbp,QWORD PTR[8+rsi]
- cmovz r8,rcx
+ cmovc r8,rcx
mov rcx,QWORD PTR[16+rsi]
- cmovz r9,r10
+ cmovc r9,r10
mov r10,QWORD PTR[24+rsi]
call __ecp_nistz256_subx
@@ -3521,16 +3521,14 @@ $L$point_add_affinex::
mov r8,QWORD PTR[((64+24))+rsi]
movdqa XMMWORD PTR[320+rsp],xmm0
movdqa XMMWORD PTR[(320+16)+rsp],xmm1
- por xmm1,xmm0
movdqa XMMWORD PTR[352+rsp],xmm2
movdqa XMMWORD PTR[(352+16)+rsp],xmm3
- por xmm3,xmm2
movdqa XMMWORD PTR[384+rsp],xmm4
movdqa XMMWORD PTR[(384+16)+rsp],xmm5
- por xmm3,xmm1
+ por xmm5,xmm4
movdqu xmm0,XMMWORD PTR[rbx]
- pshufd xmm5,xmm3,1h
+ pshufd xmm3,xmm5,1h
movdqu xmm1,XMMWORD PTR[16+rbx]
movdqu xmm2,XMMWORD PTR[32+rbx]
por xmm5,xmm3
@@ -3648,6 +3646,7 @@ DB 102,72,15,110,199
+ xor r11,r11
add r12,r12
lea rsi,QWORD PTR[192+rsp]
adc r13,r13
@@ -3655,7 +3654,7 @@ DB 102,72,15,110,199
adc r8,r8
adc r9,r9
mov rbp,r13
- sbb r11,r11
+ adc r11,0
sub r12,-1
mov rcx,r8
@@ -3663,15 +3662,15 @@ DB 102,72,15,110,199
sbb r8,0
mov r10,r9
sbb r9,r15
- test r11,r11
+ sbb r11,0
- cmovz r12,rax
+ cmovc r12,rax
mov rax,QWORD PTR[rsi]
- cmovz r13,rbp
+ cmovc r13,rbp
mov rbp,QWORD PTR[8+rsi]
- cmovz r8,rcx
+ cmovc r8,rcx
mov rcx,QWORD PTR[16+rsi]
- cmovz r9,r10
+ cmovc r9,r10
mov r10,QWORD PTR[24+rsi]
call __ecp_nistz256_subx
diff --git a/deps/openssl/asm/x64-win32-masm/sha/sha1-x86_64.asm b/deps/openssl/asm/x64-win32-masm/sha/sha1-x86_64.asm
index 295a2c06ba..24df468123 100644
--- a/deps/openssl/asm/x64-win32-masm/sha/sha1-x86_64.asm
+++ b/deps/openssl/asm/x64-win32-masm/sha/sha1-x86_64.asm
@@ -1291,9 +1291,9 @@ DB 102,15,56,0,251
ALIGN 16
$L$oop_shaext::
dec rdx
- lea rax,QWORD PTR[64+rsi]
+ lea r8,QWORD PTR[64+rsi]
paddd xmm1,xmm4
- cmovne rsi,rax
+ cmovne rsi,r8
movdqa xmm8,xmm0
DB 15,56,201,229
movdqa xmm2,xmm0
diff --git a/deps/openssl/asm/x86-elf-gas/bn/x86-mont.s b/deps/openssl/asm/x86-elf-gas/bn/x86-mont.s
index 2f7211d92e..b683577231 100644
--- a/deps/openssl/asm/x86-elf-gas/bn/x86-mont.s
+++ b/deps/openssl/asm/x86-elf-gas/bn/x86-mont.s
@@ -15,44 +15,51 @@ bn_mul_mont:
jl .L000just_leave
leal 20(%esp),%esi
leal 24(%esp),%edx
- movl %esp,%ebp
addl $2,%edi
negl %edi
- leal -32(%esp,%edi,4),%esp
+ leal -32(%esp,%edi,4),%ebp
negl %edi
- movl %esp,%eax
+ movl %ebp,%eax
subl %edx,%eax
andl $2047,%eax
- subl %eax,%esp
- xorl %esp,%edx
+ subl %eax,%ebp
+ xorl %ebp,%edx
andl $2048,%edx
xorl $2048,%edx
- subl %edx,%esp
- andl $-64,%esp
- movl %ebp,%eax
- subl %esp,%eax
+ subl %edx,%ebp
+ andl $-64,%ebp
+ movl %esp,%eax
+ subl %ebp,%eax
andl $-4096,%eax
+ movl %esp,%edx
+ leal (%ebp,%eax,1),%esp
+ movl (%esp),%eax
+ cmpl %ebp,%esp
+ ja .L001page_walk
+ jmp .L002page_walk_done
+.align 16
.L001page_walk:
- movl (%esp,%eax,1),%edx
- subl $4096,%eax
-.byte 46
- jnc .L001page_walk
+ leal -4096(%esp),%esp
+ movl (%esp),%eax
+ cmpl %ebp,%esp
+ ja .L001page_walk
+.L002page_walk_done:
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
- movl 12(%esi),%edx
+ movl 12(%esi),%ebp
movl 16(%esi),%esi
movl (%esi),%esi
movl %eax,4(%esp)
movl %ebx,8(%esp)
movl %ecx,12(%esp)
- movl %edx,16(%esp)
+ movl %ebp,16(%esp)
movl %esi,20(%esp)
leal -3(%edi),%ebx
- movl %ebp,24(%esp)
+ movl %edx,24(%esp)
leal OPENSSL_ia32cap_P,%eax
btl $26,(%eax)
- jnc .L002non_sse2
+ jnc .L003non_sse2
movl $-1,%eax
movd %eax,%mm7
movl 8(%esp),%esi
@@ -76,7 +83,7 @@ bn_mul_mont:
psrlq $32,%mm3
incl %ecx
.align 16
-.L0031st:
+.L0041st:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@@ -91,7 +98,7 @@ bn_mul_mont:
psrlq $32,%mm3
leal 1(%ecx),%ecx
cmpl %ebx,%ecx
- jl .L0031st
+ jl .L0041st
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@@ -105,7 +112,7 @@ bn_mul_mont:
paddq %mm2,%mm3
movq %mm3,32(%esp,%ebx,4)
incl %edx
-.L004outer:
+.L005outer:
xorl %ecx,%ecx
movd (%edi,%edx,4),%mm4
movd (%esi),%mm5
@@ -127,7 +134,7 @@ bn_mul_mont:
paddq %mm6,%mm2
incl %ecx
decl %ebx
-.L005inner:
+.L006inner:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@@ -144,7 +151,7 @@ bn_mul_mont:
paddq %mm6,%mm2
decl %ebx
leal 1(%ecx),%ecx
- jnz .L005inner
+ jnz .L006inner
movl %ecx,%ebx
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
@@ -162,11 +169,11 @@ bn_mul_mont:
movq %mm3,32(%esp,%ebx,4)
leal 1(%edx),%edx
cmpl %ebx,%edx
- jle .L004outer
+ jle .L005outer
emms
- jmp .L006common_tail
+ jmp .L007common_tail
.align 16
-.L002non_sse2:
+.L003non_sse2:
movl 8(%esp),%esi
leal 1(%ebx),%ebp
movl 12(%esp),%edi
@@ -177,12 +184,12 @@ bn_mul_mont:
leal 4(%edi,%ebx,4),%eax
orl %edx,%ebp
movl (%edi),%edi
- jz .L007bn_sqr_mont
+ jz .L008bn_sqr_mont
movl %eax,28(%esp)
movl (%esi),%eax
xorl %edx,%edx
.align 16
-.L008mull:
+.L009mull:
movl %edx,%ebp
mull %edi
addl %eax,%ebp
@@ -191,7 +198,7 @@ bn_mul_mont:
movl (%esi,%ecx,4),%eax
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
- jl .L008mull
+ jl .L009mull
movl %edx,%ebp
mull %edi
movl 20(%esp),%edi
@@ -209,9 +216,9 @@ bn_mul_mont:
movl 4(%esi),%eax
adcl $0,%edx
incl %ecx
- jmp .L0092ndmadd
+ jmp .L0102ndmadd
.align 16
-.L0101stmadd:
+.L0111stmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@@ -222,7 +229,7 @@ bn_mul_mont:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
- jl .L0101stmadd
+ jl .L0111stmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%eax
@@ -245,7 +252,7 @@ bn_mul_mont:
adcl $0,%edx
movl $1,%ecx
.align 16
-.L0092ndmadd:
+.L0102ndmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@@ -256,7 +263,7 @@ bn_mul_mont:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
- jl .L0092ndmadd
+ jl .L0102ndmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@@ -272,16 +279,16 @@ bn_mul_mont:
movl %edx,32(%esp,%ebx,4)
cmpl 28(%esp),%ecx
movl %eax,36(%esp,%ebx,4)
- je .L006common_tail
+ je .L007common_tail
movl (%ecx),%edi
movl 8(%esp),%esi
movl %ecx,12(%esp)
xorl %ecx,%ecx
xorl %edx,%edx
movl (%esi),%eax
- jmp .L0101stmadd
+ jmp .L0111stmadd
.align 16
-.L007bn_sqr_mont:
+.L008bn_sqr_mont:
movl %ebx,(%esp)
movl %ecx,12(%esp)
movl %edi,%eax
@@ -292,7 +299,7 @@ bn_mul_mont:
andl $1,%ebx
incl %ecx
.align 16
-.L011sqr:
+.L012sqr:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@@ -304,7 +311,7 @@ bn_mul_mont:
cmpl (%esp),%ecx
movl %eax,%ebx
movl %ebp,28(%esp,%ecx,4)
- jl .L011sqr
+ jl .L012sqr
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@@ -328,7 +335,7 @@ bn_mul_mont:
movl 4(%esi),%eax
movl $1,%ecx
.align 16
-.L0123rdmadd:
+.L0133rdmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@@ -347,7 +354,7 @@ bn_mul_mont:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
- jl .L0123rdmadd
+ jl .L0133rdmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@@ -363,7 +370,7 @@ bn_mul_mont:
movl %edx,32(%esp,%ebx,4)
cmpl %ebx,%ecx
movl %eax,36(%esp,%ebx,4)
- je .L006common_tail
+ je .L007common_tail
movl 4(%esi,%ecx,4),%edi
leal 1(%ecx),%ecx
movl %edi,%eax
@@ -375,12 +382,12 @@ bn_mul_mont:
xorl %ebp,%ebp
cmpl %ebx,%ecx
leal 1(%ecx),%ecx
- je .L013sqrlast
+ je .L014sqrlast
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
.align 16
-.L014sqradd:
+.L015sqradd:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@@ -396,13 +403,13 @@ bn_mul_mont:
cmpl (%esp),%ecx
movl %ebp,28(%esp,%ecx,4)
movl %eax,%ebx
- jle .L014sqradd
+ jle .L015sqradd
movl %edx,%ebp
addl %edx,%edx
shrl $31,%ebp
addl %ebx,%edx
adcl $0,%ebp
-.L013sqrlast:
+.L014sqrlast:
movl 20(%esp),%edi
movl 16(%esp),%esi
imull 32(%esp),%edi
@@ -417,9 +424,9 @@ bn_mul_mont:
adcl $0,%edx
movl $1,%ecx
movl 4(%esi),%eax
- jmp .L0123rdmadd
+ jmp .L0133rdmadd
.align 16
-.L006common_tail:
+.L007common_tail:
movl 16(%esp),%ebp
movl 4(%esp),%edi
leal 32(%esp),%esi
@@ -427,13 +434,13 @@ bn_mul_mont:
movl %ebx,%ecx
xorl %edx,%edx
.align 16
-.L015sub:
+.L016sub:
sbbl (%ebp,%edx,4),%eax
movl %eax,(%edi,%edx,4)
decl %ecx
movl 4(%esi,%edx,4),%eax
leal 1(%edx),%edx
- jge .L015sub
+ jge .L016sub
sbbl $0,%eax
andl %eax,%esi
notl %eax
@@ -441,12 +448,12 @@ bn_mul_mont:
andl %eax,%ebp
orl %ebp,%esi
.align 16
-.L016copy:
+.L017copy:
movl (%esi,%ebx,4),%eax
movl %eax,(%edi,%ebx,4)
movl %ecx,32(%esp,%ebx,4)
decl %ebx
- jge .L016copy
+ jge .L017copy
movl 24(%esp),%esp
movl $1,%eax
.L000just_leave:
diff --git a/deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s b/deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s
index accec0e519..7bc58d24e0 100644
--- a/deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s
+++ b/deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s
@@ -14,47 +14,54 @@ L_bn_mul_mont_begin:
jl L000just_leave
leal 20(%esp),%esi
leal 24(%esp),%edx
- movl %esp,%ebp
addl $2,%edi
negl %edi
- leal -32(%esp,%edi,4),%esp
+ leal -32(%esp,%edi,4),%ebp
negl %edi
- movl %esp,%eax
+ movl %ebp,%eax
subl %edx,%eax
andl $2047,%eax
- subl %eax,%esp
- xorl %esp,%edx
+ subl %eax,%ebp
+ xorl %ebp,%edx
andl $2048,%edx
xorl $2048,%edx
- subl %edx,%esp
- andl $-64,%esp
- movl %ebp,%eax
- subl %esp,%eax
+ subl %edx,%ebp
+ andl $-64,%ebp
+ movl %esp,%eax
+ subl %ebp,%eax
andl $-4096,%eax
+ movl %esp,%edx
+ leal (%ebp,%eax,1),%esp
+ movl (%esp),%eax
+ cmpl %ebp,%esp
+ ja L001page_walk
+ jmp L002page_walk_done
+.align 4,0x90
L001page_walk:
- movl (%esp,%eax,1),%edx
- subl $4096,%eax
-.byte 46
- jnc L001page_walk
+ leal -4096(%esp),%esp
+ movl (%esp),%eax
+ cmpl %ebp,%esp
+ ja L001page_walk
+L002page_walk_done:
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
- movl 12(%esi),%edx
+ movl 12(%esi),%ebp
movl 16(%esi),%esi
movl (%esi),%esi
movl %eax,4(%esp)
movl %ebx,8(%esp)
movl %ecx,12(%esp)
- movl %edx,16(%esp)
+ movl %ebp,16(%esp)
movl %esi,20(%esp)
leal -3(%edi),%ebx
- movl %ebp,24(%esp)
- call L002PIC_me_up
-L002PIC_me_up:
+ movl %edx,24(%esp)
+ call L003PIC_me_up
+L003PIC_me_up:
popl %eax
- movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L002PIC_me_up(%eax),%eax
+ movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L003PIC_me_up(%eax),%eax
btl $26,(%eax)
- jnc L003non_sse2
+ jnc L004non_sse2
movl $-1,%eax
movd %eax,%mm7
movl 8(%esp),%esi
@@ -78,7 +85,7 @@ L002PIC_me_up:
psrlq $32,%mm3
incl %ecx
.align 4,0x90
-L0041st:
+L0051st:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@@ -93,7 +100,7 @@ L0041st:
psrlq $32,%mm3
leal 1(%ecx),%ecx
cmpl %ebx,%ecx
- jl L0041st
+ jl L0051st
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@@ -107,7 +114,7 @@ L0041st:
paddq %mm2,%mm3
movq %mm3,32(%esp,%ebx,4)
incl %edx
-L005outer:
+L006outer:
xorl %ecx,%ecx
movd (%edi,%edx,4),%mm4
movd (%esi),%mm5
@@ -129,7 +136,7 @@ L005outer:
paddq %mm6,%mm2
incl %ecx
decl %ebx
-L006inner:
+L007inner:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@@ -146,7 +153,7 @@ L006inner:
paddq %mm6,%mm2
decl %ebx
leal 1(%ecx),%ecx
- jnz L006inner
+ jnz L007inner
movl %ecx,%ebx
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
@@ -164,11 +171,11 @@ L006inner:
movq %mm3,32(%esp,%ebx,4)
leal 1(%edx),%edx
cmpl %ebx,%edx
- jle L005outer
+ jle L006outer
emms
- jmp L007common_tail
+ jmp L008common_tail
.align 4,0x90
-L003non_sse2:
+L004non_sse2:
movl 8(%esp),%esi
leal 1(%ebx),%ebp
movl 12(%esp),%edi
@@ -179,12 +186,12 @@ L003non_sse2:
leal 4(%edi,%ebx,4),%eax
orl %edx,%ebp
movl (%edi),%edi
- jz L008bn_sqr_mont
+ jz L009bn_sqr_mont
movl %eax,28(%esp)
movl (%esi),%eax
xorl %edx,%edx
.align 4,0x90
-L009mull:
+L010mull:
movl %edx,%ebp
mull %edi
addl %eax,%ebp
@@ -193,7 +200,7 @@ L009mull:
movl (%esi,%ecx,4),%eax
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
- jl L009mull
+ jl L010mull
movl %edx,%ebp
mull %edi
movl 20(%esp),%edi
@@ -211,9 +218,9 @@ L009mull:
movl 4(%esi),%eax
adcl $0,%edx
incl %ecx
- jmp L0102ndmadd
+ jmp L0112ndmadd
.align 4,0x90
-L0111stmadd:
+L0121stmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@@ -224,7 +231,7 @@ L0111stmadd:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
- jl L0111stmadd
+ jl L0121stmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%eax
@@ -247,7 +254,7 @@ L0111stmadd:
adcl $0,%edx
movl $1,%ecx
.align 4,0x90
-L0102ndmadd:
+L0112ndmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@@ -258,7 +265,7 @@ L0102ndmadd:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
- jl L0102ndmadd
+ jl L0112ndmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@@ -274,16 +281,16 @@ L0102ndmadd:
movl %edx,32(%esp,%ebx,4)
cmpl 28(%esp),%ecx
movl %eax,36(%esp,%ebx,4)
- je L007common_tail
+ je L008common_tail
movl (%ecx),%edi
movl 8(%esp),%esi
movl %ecx,12(%esp)
xorl %ecx,%ecx
xorl %edx,%edx
movl (%esi),%eax
- jmp L0111stmadd
+ jmp L0121stmadd
.align 4,0x90
-L008bn_sqr_mont:
+L009bn_sqr_mont:
movl %ebx,(%esp)
movl %ecx,12(%esp)
movl %edi,%eax
@@ -294,7 +301,7 @@ L008bn_sqr_mont:
andl $1,%ebx
incl %ecx
.align 4,0x90
-L012sqr:
+L013sqr:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@@ -306,7 +313,7 @@ L012sqr:
cmpl (%esp),%ecx
movl %eax,%ebx
movl %ebp,28(%esp,%ecx,4)
- jl L012sqr
+ jl L013sqr
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@@ -330,7 +337,7 @@ L012sqr:
movl 4(%esi),%eax
movl $1,%ecx
.align 4,0x90
-L0133rdmadd:
+L0143rdmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@@ -349,7 +356,7 @@ L0133rdmadd:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
- jl L0133rdmadd
+ jl L0143rdmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@@ -365,7 +372,7 @@ L0133rdmadd:
movl %edx,32(%esp,%ebx,4)
cmpl %ebx,%ecx
movl %eax,36(%esp,%ebx,4)
- je L007common_tail
+ je L008common_tail
movl 4(%esi,%ecx,4),%edi
leal 1(%ecx),%ecx
movl %edi,%eax
@@ -377,12 +384,12 @@ L0133rdmadd:
xorl %ebp,%ebp
cmpl %ebx,%ecx
leal 1(%ecx),%ecx
- je L014sqrlast
+ je L015sqrlast
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
.align 4,0x90
-L015sqradd:
+L016sqradd:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@@ -398,13 +405,13 @@ L015sqradd:
cmpl (%esp),%ecx
movl %ebp,28(%esp,%ecx,4)
movl %eax,%ebx
- jle L015sqradd
+ jle L016sqradd
movl %edx,%ebp
addl %edx,%edx
shrl $31,%ebp
addl %ebx,%edx
adcl $0,%ebp
-L014sqrlast:
+L015sqrlast:
movl 20(%esp),%edi
movl 16(%esp),%esi
imull 32(%esp),%edi
@@ -419,9 +426,9 @@ L014sqrlast:
adcl $0,%edx
movl $1,%ecx
movl 4(%esi),%eax
- jmp L0133rdmadd
+ jmp L0143rdmadd
.align 4,0x90
-L007common_tail:
+L008common_tail:
movl 16(%esp),%ebp
movl 4(%esp),%edi
leal 32(%esp),%esi
@@ -429,13 +436,13 @@ L007common_tail:
movl %ebx,%ecx
xorl %edx,%edx
.align 4,0x90
-L016sub:
+L017sub:
sbbl (%ebp,%edx,4),%eax
movl %eax,(%edi,%edx,4)
decl %ecx
movl 4(%esi,%edx,4),%eax
leal 1(%edx),%edx
- jge L016sub
+ jge L017sub
sbbl $0,%eax
andl %eax,%esi
notl %eax
@@ -443,12 +450,12 @@ L016sub:
andl %eax,%ebp
orl %ebp,%esi
.align 4,0x90
-L017copy:
+L018copy:
movl (%esi,%ebx,4),%eax
movl %eax,(%edi,%ebx,4)
movl %ecx,32(%esp,%ebx,4)
decl %ebx
- jge L017copy
+ jge L018copy
movl 24(%esp),%esp
movl $1,%eax
L000just_leave:
diff --git a/deps/openssl/asm/x86-win32-masm/bn/x86-mont.asm b/deps/openssl/asm/x86-win32-masm/bn/x86-mont.asm
index 4987f6fe91..f026dae738 100644
--- a/deps/openssl/asm/x86-win32-masm/bn/x86-mont.asm
+++ b/deps/openssl/asm/x86-win32-masm/bn/x86-mont.asm
@@ -31,44 +31,51 @@ $L_bn_mul_mont_begin::
jl $L000just_leave
lea esi,DWORD PTR 20[esp]
lea edx,DWORD PTR 24[esp]
- mov ebp,esp
add edi,2
neg edi
- lea esp,DWORD PTR [edi*4+esp-32]
+ lea ebp,DWORD PTR [edi*4+esp-32]
neg edi
- mov eax,esp
+ mov eax,ebp
sub eax,edx
and eax,2047
- sub esp,eax
- xor edx,esp
+ sub ebp,eax
+ xor edx,ebp
and edx,2048
xor edx,2048
- sub esp,edx
- and esp,-64
- mov eax,ebp
- sub eax,esp
+ sub ebp,edx
+ and ebp,-64
+ mov eax,esp
+ sub eax,ebp
and eax,-4096
+ mov edx,esp
+ lea esp,DWORD PTR [eax*1+ebp]
+ mov eax,DWORD PTR [esp]
+ cmp esp,ebp
+ ja $L001page_walk
+ jmp $L002page_walk_done
+ALIGN 16
$L001page_walk:
- mov edx,DWORD PTR [eax*1+esp]
- sub eax,4096
-DB 46
- jnc $L001page_walk
+ lea esp,DWORD PTR [esp-4096]
+ mov eax,DWORD PTR [esp]
+ cmp esp,ebp
+ ja $L001page_walk
+$L002page_walk_done:
mov eax,DWORD PTR [esi]
mov ebx,DWORD PTR 4[esi]
mov ecx,DWORD PTR 8[esi]
- mov edx,DWORD PTR 12[esi]
+ mov ebp,DWORD PTR 12[esi]
mov esi,DWORD PTR 16[esi]
mov esi,DWORD PTR [esi]
mov DWORD PTR 4[esp],eax
mov DWORD PTR 8[esp],ebx
mov DWORD PTR 12[esp],ecx
- mov DWORD PTR 16[esp],edx
+ mov DWORD PTR 16[esp],ebp
mov DWORD PTR 20[esp],esi
lea ebx,DWORD PTR [edi-3]
- mov DWORD PTR 24[esp],ebp
+ mov DWORD PTR 24[esp],edx
lea eax,DWORD PTR _OPENSSL_ia32cap_P
bt DWORD PTR [eax],26
- jnc $L002non_sse2
+ jnc $L003non_sse2
mov eax,-1
movd mm7,eax
mov esi,DWORD PTR 8[esp]
@@ -92,7 +99,7 @@ DB 46
psrlq mm3,32
inc ecx
ALIGN 16
-$L0031st:
+$L0041st:
pmuludq mm0,mm4
pmuludq mm1,mm5
paddq mm2,mm0
@@ -107,7 +114,7 @@ $L0031st:
psrlq mm3,32
lea ecx,DWORD PTR 1[ecx]
cmp ecx,ebx
- jl $L0031st
+ jl $L0041st
pmuludq mm0,mm4
pmuludq mm1,mm5
paddq mm2,mm0
@@ -121,7 +128,7 @@ $L0031st:
paddq mm3,mm2
movq QWORD PTR 32[ebx*4+esp],mm3
inc edx
-$L004outer:
+$L005outer:
xor ecx,ecx
movd mm4,DWORD PTR [edx*4+edi]
movd mm5,DWORD PTR [esi]
@@ -143,7 +150,7 @@ $L004outer:
paddq mm2,mm6
inc ecx
dec ebx
-$L005inner:
+$L006inner:
pmuludq mm0,mm4
pmuludq mm1,mm5
paddq mm2,mm0
@@ -160,7 +167,7 @@ $L005inner:
paddq mm2,mm6
dec ebx
lea ecx,DWORD PTR 1[ecx]
- jnz $L005inner
+ jnz $L006inner
mov ebx,ecx
pmuludq mm0,mm4
pmuludq mm1,mm5
@@ -178,11 +185,11 @@ $L005inner:
movq QWORD PTR 32[ebx*4+esp],mm3
lea edx,DWORD PTR 1[edx]
cmp edx,ebx
- jle $L004outer
+ jle $L005outer
emms
- jmp $L006common_tail
+ jmp $L007common_tail
ALIGN 16
-$L002non_sse2:
+$L003non_sse2:
mov esi,DWORD PTR 8[esp]
lea ebp,DWORD PTR 1[ebx]
mov edi,DWORD PTR 12[esp]
@@ -193,12 +200,12 @@ $L002non_sse2:
lea eax,DWORD PTR 4[ebx*4+edi]
or ebp,edx
mov edi,DWORD PTR [edi]
- jz $L007bn_sqr_mont
+ jz $L008bn_sqr_mont
mov DWORD PTR 28[esp],eax
mov eax,DWORD PTR [esi]
xor edx,edx
ALIGN 16
-$L008mull:
+$L009mull:
mov ebp,edx
mul edi
add ebp,eax
@@ -207,7 +214,7 @@ $L008mull:
mov eax,DWORD PTR [ecx*4+esi]
cmp ecx,ebx
mov DWORD PTR 28[ecx*4+esp],ebp
- jl $L008mull
+ jl $L009mull
mov ebp,edx
mul edi
mov edi,DWORD PTR 20[esp]
@@ -225,9 +232,9 @@ $L008mull:
mov eax,DWORD PTR 4[esi]
adc edx,0
inc ecx
- jmp $L0092ndmadd
+ jmp $L0102ndmadd
ALIGN 16
-$L0101stmadd:
+$L0111stmadd:
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ecx*4+esp]
@@ -238,7 +245,7 @@ $L0101stmadd:
adc edx,0
cmp ecx,ebx
mov DWORD PTR 28[ecx*4+esp],ebp
- jl $L0101stmadd
+ jl $L0111stmadd
mov ebp,edx
mul edi
add eax,DWORD PTR 32[ebx*4+esp]
@@ -261,7 +268,7 @@ $L0101stmadd:
adc edx,0
mov ecx,1
ALIGN 16
-$L0092ndmadd:
+$L0102ndmadd:
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ecx*4+esp]
@@ -272,7 +279,7 @@ $L0092ndmadd:
adc edx,0
cmp ecx,ebx
mov DWORD PTR 24[ecx*4+esp],ebp
- jl $L0092ndmadd
+ jl $L0102ndmadd
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ebx*4+esp]
@@ -288,16 +295,16 @@ $L0092ndmadd:
mov DWORD PTR 32[ebx*4+esp],edx
cmp ecx,DWORD PTR 28[esp]
mov DWORD PTR 36[ebx*4+esp],eax
- je $L006common_tail
+ je $L007common_tail
mov edi,DWORD PTR [ecx]
mov esi,DWORD PTR 8[esp]
mov DWORD PTR 12[esp],ecx
xor ecx,ecx
xor edx,edx
mov eax,DWORD PTR [esi]
- jmp $L0101stmadd
+ jmp $L0111stmadd
ALIGN 16
-$L007bn_sqr_mont:
+$L008bn_sqr_mont:
mov DWORD PTR [esp],ebx
mov DWORD PTR 12[esp],ecx
mov eax,edi
@@ -308,7 +315,7 @@ $L007bn_sqr_mont:
and ebx,1
inc ecx
ALIGN 16
-$L011sqr:
+$L012sqr:
mov eax,DWORD PTR [ecx*4+esi]
mov ebp,edx
mul edi
@@ -320,7 +327,7 @@ $L011sqr:
cmp ecx,DWORD PTR [esp]
mov ebx,eax
mov DWORD PTR 28[ecx*4+esp],ebp
- jl $L011sqr
+ jl $L012sqr
mov eax,DWORD PTR [ecx*4+esi]
mov ebp,edx
mul edi
@@ -344,7 +351,7 @@ $L011sqr:
mov eax,DWORD PTR 4[esi]
mov ecx,1
ALIGN 16
-$L0123rdmadd:
+$L0133rdmadd:
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ecx*4+esp]
@@ -363,7 +370,7 @@ $L0123rdmadd:
adc edx,0
cmp ecx,ebx
mov DWORD PTR 24[ecx*4+esp],ebp
- jl $L0123rdmadd
+ jl $L0133rdmadd
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ebx*4+esp]
@@ -379,7 +386,7 @@ $L0123rdmadd:
mov DWORD PTR 32[ebx*4+esp],edx
cmp ecx,ebx
mov DWORD PTR 36[ebx*4+esp],eax
- je $L006common_tail
+ je $L007common_tail
mov edi,DWORD PTR 4[ecx*4+esi]
lea ecx,DWORD PTR 1[ecx]
mov eax,edi
@@ -391,12 +398,12 @@ $L0123rdmadd:
xor ebp,ebp
cmp ecx,ebx
lea ecx,DWORD PTR 1[ecx]
- je $L013sqrlast
+ je $L014sqrlast
mov ebx,edx
shr edx,1
and ebx,1
ALIGN 16
-$L014sqradd:
+$L015sqradd:
mov eax,DWORD PTR [ecx*4+esi]
mov ebp,edx
mul edi
@@ -412,13 +419,13 @@ $L014sqradd:
cmp ecx,DWORD PTR [esp]
mov DWORD PTR 28[ecx*4+esp],ebp
mov ebx,eax
- jle $L014sqradd
+ jle $L015sqradd
mov ebp,edx
add edx,edx
shr ebp,31
add edx,ebx
adc ebp,0
-$L013sqrlast:
+$L014sqrlast:
mov edi,DWORD PTR 20[esp]
mov esi,DWORD PTR 16[esp]
imul edi,DWORD PTR 32[esp]
@@ -433,9 +440,9 @@ $L013sqrlast:
adc edx,0
mov ecx,1
mov eax,DWORD PTR 4[esi]
- jmp $L0123rdmadd
+ jmp $L0133rdmadd
ALIGN 16
-$L006common_tail:
+$L007common_tail:
mov ebp,DWORD PTR 16[esp]
mov edi,DWORD PTR 4[esp]
lea esi,DWORD PTR 32[esp]
@@ -443,13 +450,13 @@ $L006common_tail:
mov ecx,ebx
xor edx,edx
ALIGN 16
-$L015sub:
+$L016sub:
sbb eax,DWORD PTR [edx*4+ebp]
mov DWORD PTR [edx*4+edi],eax
dec ecx
mov eax,DWORD PTR 4[edx*4+esi]
lea edx,DWORD PTR 1[edx]
- jge $L015sub
+ jge $L016sub
sbb eax,0
and esi,eax
not eax
@@ -457,12 +464,12 @@ $L015sub:
and ebp,eax
or esi,ebp
ALIGN 16
-$L016copy:
+$L017copy:
mov eax,DWORD PTR [ebx*4+esi]
mov DWORD PTR [ebx*4+edi],eax
mov DWORD PTR 32[ebx*4+esp],ecx
dec ebx
- jge $L016copy
+ jge $L017copy
mov esp,DWORD PTR 24[esp]
mov eax,1
$L000just_leave:
diff --git a/deps/openssl/asm_obsolete/arm-void-gas/aes/bsaes-armv7.S b/deps/openssl/asm_obsolete/arm-void-gas/aes/bsaes-armv7.S
index 9738ed5023..449e7a442e 100644
--- a/deps/openssl/asm_obsolete/arm-void-gas/aes/bsaes-armv7.S
+++ b/deps/openssl/asm_obsolete/arm-void-gas/aes/bsaes-armv7.S
@@ -1816,8 +1816,6 @@ bsaes_xts_encrypt:
b .Lxts_enc_done
.align 4
.Lxts_enc_6:
- vst1.64 {q14}, [r0,:128] @ next round tweak
-
veor q4, q4, q12
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@@ -1853,8 +1851,6 @@ bsaes_xts_encrypt:
.align 5
.Lxts_enc_5:
- vst1.64 {q13}, [r0,:128] @ next round tweak
-
veor q3, q3, q11
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@@ -1883,8 +1879,6 @@ bsaes_xts_encrypt:
b .Lxts_enc_done
.align 4
.Lxts_enc_4:
- vst1.64 {q12}, [r0,:128] @ next round tweak
-
veor q2, q2, q10
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@@ -1910,8 +1904,6 @@ bsaes_xts_encrypt:
b .Lxts_enc_done
.align 4
.Lxts_enc_3:
- vst1.64 {q11}, [r0,:128] @ next round tweak
-
veor q1, q1, q9
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@@ -1936,8 +1928,6 @@ bsaes_xts_encrypt:
b .Lxts_enc_done
.align 4
.Lxts_enc_2:
- vst1.64 {q10}, [r0,:128] @ next round tweak
-
veor q0, q0, q8
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@@ -1960,7 +1950,7 @@ bsaes_xts_encrypt:
.align 4
.Lxts_enc_1:
mov r0, sp
- veor q0, q8
+ veor q0, q0, q8
mov r1, sp
vst1.8 {q0}, [sp,:128]
mov r2, r10
@@ -2346,8 +2336,6 @@ bsaes_xts_decrypt:
b .Lxts_dec_done
.align 4
.Lxts_dec_5:
- vst1.64 {q13}, [r0,:128] @ next round tweak
-
veor q3, q3, q11
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@@ -2376,8 +2364,6 @@ bsaes_xts_decrypt:
b .Lxts_dec_done
.align 4
.Lxts_dec_4:
- vst1.64 {q12}, [r0,:128] @ next round tweak
-
veor q2, q2, q10
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@@ -2403,8 +2389,6 @@ bsaes_xts_decrypt:
b .Lxts_dec_done
.align 4
.Lxts_dec_3:
- vst1.64 {q11}, [r0,:128] @ next round tweak
-
veor q1, q1, q9
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@@ -2429,8 +2413,6 @@ bsaes_xts_decrypt:
b .Lxts_dec_done
.align 4
.Lxts_dec_2:
- vst1.64 {q10}, [r0,:128] @ next round tweak
-
veor q0, q0, q8
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@@ -2453,12 +2435,12 @@ bsaes_xts_decrypt:
.align 4
.Lxts_dec_1:
mov r0, sp
- veor q0, q8
+ veor q0, q0, q8
mov r1, sp
vst1.8 {q0}, [sp,:128]
+ mov r5, r2 @ preserve magic
mov r2, r10
mov r4, r3 @ preserve fp
- mov r5, r2 @ preserve magic
bl AES_decrypt
diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont.s b/deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont.s
index 0d36e3d473..865c2ef5cb 100644
--- a/deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont.s
+++ b/deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont.s
@@ -6,6 +6,8 @@
.type bn_mul_mont,@function
.align 16
bn_mul_mont:
+ movl %r9d,%r9d
+ movq %rsp,%rax
testl $3,%r9d
jnz .Lmul_enter
cmpl $8,%r9d
@@ -25,29 +27,36 @@ bn_mul_mont:
pushq %r14
pushq %r15
- movl %r9d,%r9d
- leaq 2(%r9),%r10
+ negq %r9
movq %rsp,%r11
- negq %r10
- leaq (%rsp,%r10,8),%rsp
- andq $-1024,%rsp
+ leaq -16(%rsp,%r9,8),%r10
+ negq %r9
+ andq $-1024,%r10
- movq %r11,8(%rsp,%r9,8)
-.Lmul_body:
- subq %rsp,%r11
+ subq %r10,%r11
andq $-4096,%r11
+ leaq (%r10,%r11,1),%rsp
+ movq (%rsp),%r11
+ cmpq %r10,%rsp
+ ja .Lmul_page_walk
+ jmp .Lmul_page_walk_done
+
+.align 16
.Lmul_page_walk:
- movq (%rsp,%r11,1),%r10
- subq $4096,%r11
-.byte 0x66,0x2e
- jnc .Lmul_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r11
+ cmpq %r10,%rsp
+ ja .Lmul_page_walk
+.Lmul_page_walk_done:
+ movq %rax,8(%rsp,%r9,8)
+.Lmul_body:
movq %rdx,%r12
movq (%r8),%r8
movq (%r12),%rbx
@@ -215,19 +224,21 @@ bn_mul_mont:
movq 8(%rsp,%r9,8),%rsi
movq $1,%rax
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq -48(%rsi),%r15
+ movq -40(%rsi),%r14
+ movq -32(%rsi),%r13
+ movq -24(%rsi),%r12
+ movq -16(%rsi),%rbp
+ movq -8(%rsi),%rbx
+ leaq (%rsi),%rsp
.Lmul_epilogue:
.byte 0xf3,0xc3
.size bn_mul_mont,.-bn_mul_mont
.type bn_mul4x_mont,@function
.align 16
bn_mul4x_mont:
+ movl %r9d,%r9d
+ movq %rsp,%rax
.Lmul4x_enter:
pushq %rbx
pushq %rbp
@@ -236,23 +247,29 @@ bn_mul4x_mont:
pushq %r14
pushq %r15
- movl %r9d,%r9d
- leaq 4(%r9),%r10
+ negq %r9
movq %rsp,%r11
- negq %r10
- leaq (%rsp,%r10,8),%rsp
- andq $-1024,%rsp
+ leaq -32(%rsp,%r9,8),%r10
+ negq %r9
+ andq $-1024,%r10
- movq %r11,8(%rsp,%r9,8)
-.Lmul4x_body:
- subq %rsp,%r11
+ subq %r10,%r11
andq $-4096,%r11
+ leaq (%r10,%r11,1),%rsp
+ movq (%rsp),%r11
+ cmpq %r10,%rsp
+ ja .Lmul4x_page_walk
+ jmp .Lmul4x_page_walk_done
+
.Lmul4x_page_walk:
- movq (%rsp,%r11,1),%r10
- subq $4096,%r11
-.byte 0x2e
- jnc .Lmul4x_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r11
+ cmpq %r10,%rsp
+ ja .Lmul4x_page_walk
+.Lmul4x_page_walk_done:
+ movq %rax,8(%rsp,%r9,8)
+.Lmul4x_body:
movq %rdi,16(%rsp,%r9,8)
movq %rdx,%r12
movq (%r8),%r8
@@ -621,13 +638,13 @@ bn_mul4x_mont:
movdqu %xmm2,16(%rdi,%r14,1)
movq 8(%rsp,%r9,8),%rsi
movq $1,%rax
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq -48(%rsi),%r15
+ movq -40(%rsi),%r14
+ movq -32(%rsi),%r13
+ movq -24(%rsi),%r12
+ movq -16(%rsi),%rbp
+ movq -8(%rsi),%rbx
+ leaq (%rsi),%rsp
.Lmul4x_epilogue:
.byte 0xf3,0xc3
.size bn_mul4x_mont,.-bn_mul4x_mont
@@ -636,14 +653,15 @@ bn_mul4x_mont:
.type bn_sqr8x_mont,@function
.align 32
bn_sqr8x_mont:
-.Lsqr8x_enter:
movq %rsp,%rax
+.Lsqr8x_enter:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
+.Lsqr8x_prologue:
movl %r9d,%r10d
shll $3,%r9d
@@ -656,33 +674,42 @@ bn_sqr8x_mont:
leaq -64(%rsp,%r9,2),%r11
+ movq %rsp,%rbp
movq (%r8),%r8
subq %rsi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb .Lsqr8x_sp_alt
- subq %r11,%rsp
- leaq -64(%rsp,%r9,2),%rsp
+ subq %r11,%rbp
+ leaq -64(%rbp,%r9,2),%rbp
jmp .Lsqr8x_sp_done
.align 32
.Lsqr8x_sp_alt:
leaq 4096-64(,%r9,2),%r10
- leaq -64(%rsp,%r9,2),%rsp
+ leaq -64(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
- subq %r11,%rsp
+ subq %r11,%rbp
.Lsqr8x_sp_done:
- andq $-64,%rsp
- movq %rax,%r11
- subq %rsp,%r11
+ andq $-64,%rbp
+ movq %rsp,%r11
+ subq %rbp,%r11
andq $-4096,%r11
+ leaq (%r11,%rbp,1),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja .Lsqr8x_page_walk
+ jmp .Lsqr8x_page_walk_done
+
+.align 16
.Lsqr8x_page_walk:
- movq (%rsp,%r11,1),%r10
- subq $4096,%r11
-.byte 0x2e
- jnc .Lsqr8x_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja .Lsqr8x_page_walk
+.Lsqr8x_page_walk_done:
movq %r9,%r10
negq %r9
diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont5.s b/deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont5.s
index a503f6bd8d..74ac8ee159 100644
--- a/deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont5.s
+++ b/deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont5.s
@@ -6,16 +6,15 @@
.type bn_mul_mont_gather5,@function
.align 64
bn_mul_mont_gather5:
+ movl %r9d,%r9d
+ movq %rsp,%rax
testl $7,%r9d
jnz .Lmul_enter
jmp .Lmul4x_enter
.align 16
.Lmul_enter:
- movl %r9d,%r9d
- movq %rsp,%rax
movd 8(%rsp),%xmm5
- leaq .Linc(%rip),%r10
pushq %rbx
pushq %rbp
pushq %r12
@@ -23,26 +22,36 @@ bn_mul_mont_gather5:
pushq %r14
pushq %r15
- leaq 2(%r9),%r11
- negq %r11
- leaq -264(%rsp,%r11,8),%rsp
- andq $-1024,%rsp
+ negq %r9
+ movq %rsp,%r11
+ leaq -280(%rsp,%r9,8),%r10
+ negq %r9
+ andq $-1024,%r10
+
- movq %rax,8(%rsp,%r9,8)
-.Lmul_body:
+ subq %r10,%r11
+ andq $-4096,%r11
+ leaq (%r10,%r11,1),%rsp
+ movq (%rsp),%r11
+ cmpq %r10,%rsp
+ ja .Lmul_page_walk
+ jmp .Lmul_page_walk_done
- subq %rsp,%rax
- andq $-4096,%rax
.Lmul_page_walk:
- movq (%rsp,%rax,1),%r11
- subq $4096,%rax
-.byte 0x2e
- jnc .Lmul_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r11
+ cmpq %r10,%rsp
+ ja .Lmul_page_walk
+.Lmul_page_walk_done:
+
+ leaq .Linc(%rip),%r10
+ movq %rax,8(%rsp,%r9,8)
+.Lmul_body:
leaq 128(%rdx),%r12
movdqa 0(%r10),%xmm0
@@ -413,15 +422,16 @@ bn_mul_mont_gather5:
.type bn_mul4x_mont_gather5,@function
.align 32
bn_mul4x_mont_gather5:
-.Lmul4x_enter:
.byte 0x67
movq %rsp,%rax
+.Lmul4x_enter:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
+.Lmul4x_prologue:
.byte 0x67
shll $3,%r9d
@@ -438,32 +448,40 @@ bn_mul4x_mont_gather5:
leaq -320(%rsp,%r9,2),%r11
+ movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb .Lmul4xsp_alt
- subq %r11,%rsp
- leaq -320(%rsp,%r9,2),%rsp
+ subq %r11,%rbp
+ leaq -320(%rbp,%r9,2),%rbp
jmp .Lmul4xsp_done
.align 32
.Lmul4xsp_alt:
leaq 4096-320(,%r9,2),%r10
- leaq -320(%rsp,%r9,2),%rsp
+ leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
- subq %r11,%rsp
+ subq %r11,%rbp
.Lmul4xsp_done:
- andq $-64,%rsp
- movq %rax,%r11
- subq %rsp,%r11
+ andq $-64,%rbp
+ movq %rsp,%r11
+ subq %rbp,%r11
andq $-4096,%r11
+ leaq (%r11,%rbp,1),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja .Lmul4x_page_walk
+ jmp .Lmul4x_page_walk_done
+
.Lmul4x_page_walk:
- movq (%rsp,%r11,1),%r10
- subq $4096,%r11
-.byte 0x2e
- jnc .Lmul4x_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja .Lmul4x_page_walk
+.Lmul4x_page_walk_done:
negq %r9
@@ -1022,6 +1040,7 @@ bn_power5:
pushq %r13
pushq %r14
pushq %r15
+.Lpower5_prologue:
shll $3,%r9d
leal (%r9,%r9,2),%r10d
@@ -1036,32 +1055,40 @@ bn_power5:
leaq -320(%rsp,%r9,2),%r11
+ movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb .Lpwr_sp_alt
- subq %r11,%rsp
- leaq -320(%rsp,%r9,2),%rsp
+ subq %r11,%rbp
+ leaq -320(%rbp,%r9,2),%rbp
jmp .Lpwr_sp_done
.align 32
.Lpwr_sp_alt:
leaq 4096-320(,%r9,2),%r10
- leaq -320(%rsp,%r9,2),%rsp
+ leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
- subq %r11,%rsp
+ subq %r11,%rbp
.Lpwr_sp_done:
- andq $-64,%rsp
- movq %rax,%r11
- subq %rsp,%r11
+ andq $-64,%rbp
+ movq %rsp,%r11
+ subq %rbp,%r11
andq $-4096,%r11
+ leaq (%r11,%rbp,1),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja .Lpwr_page_walk
+ jmp .Lpwr_page_walk_done
+
.Lpwr_page_walk:
- movq (%rsp,%r11,1),%r10
- subq $4096,%r11
-.byte 0x2e
- jnc .Lpwr_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja .Lpwr_page_walk
+.Lpwr_page_walk_done:
movq %r9,%r10
negq %r9
@@ -1972,6 +1999,7 @@ bn_from_mont8x:
pushq %r13
pushq %r14
pushq %r15
+.Lfrom_prologue:
shll $3,%r9d
leaq (%r9,%r9,2),%r10
@@ -1986,32 +2014,40 @@ bn_from_mont8x:
leaq -320(%rsp,%r9,2),%r11
+ movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb .Lfrom_sp_alt
- subq %r11,%rsp
- leaq -320(%rsp,%r9,2),%rsp
+ subq %r11,%rbp
+ leaq -320(%rbp,%r9,2),%rbp
jmp .Lfrom_sp_done
.align 32
.Lfrom_sp_alt:
leaq 4096-320(,%r9,2),%r10
- leaq -320(%rsp,%r9,2),%rsp
+ leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
- subq %r11,%rsp
+ subq %r11,%rbp
.Lfrom_sp_done:
- andq $-64,%rsp
- movq %rax,%r11
- subq %rsp,%r11
+ andq $-64,%rbp
+ movq %rsp,%r11
+ subq %rbp,%r11
andq $-4096,%r11
+ leaq (%r11,%rbp,1),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja .Lfrom_page_walk
+ jmp .Lfrom_page_walk_done
+
.Lfrom_page_walk:
- movq (%rsp,%r11,1),%r10
- subq $4096,%r11
-.byte 0x2e
- jnc .Lfrom_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja .Lfrom_page_walk
+.Lfrom_page_walk_done:
movq %r9,%r10
negq %r9
diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/ec/ecp_nistz256-x86_64.s b/deps/openssl/asm_obsolete/x64-elf-gas/ec/ecp_nistz256-x86_64.s
index 7876e38299..c7bc6f2636 100644
--- a/deps/openssl/asm_obsolete/x64-elf-gas/ec/ecp_nistz256-x86_64.s
+++ b/deps/openssl/asm_obsolete/x64-elf-gas/ec/ecp_nistz256-x86_64.s
@@ -27,6 +27,7 @@ ecp_nistz256_mul_by_2:
pushq %r13
movq 0(%rsi),%r8
+ xorq %r13,%r13
movq 8(%rsi),%r9
addq %r8,%r8
movq 16(%rsi),%r10
@@ -37,7 +38,7 @@ ecp_nistz256_mul_by_2:
adcq %r10,%r10
adcq %r11,%r11
movq %r9,%rdx
- sbbq %r13,%r13
+ adcq $0,%r13
subq 0(%rsi),%r8
movq %r10,%rcx
@@ -45,14 +46,14 @@ ecp_nistz256_mul_by_2:
sbbq 16(%rsi),%r10
movq %r11,%r12
sbbq 24(%rsi),%r11
- testq %r13,%r13
+ sbbq $0,%r13
- cmovzq %rax,%r8
- cmovzq %rdx,%r9
+ cmovcq %rax,%r8
+ cmovcq %rdx,%r9
movq %r8,0(%rdi)
- cmovzq %rcx,%r10
+ cmovcq %rcx,%r10
movq %r9,8(%rdi)
- cmovzq %r12,%r11
+ cmovcq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
@@ -149,12 +150,12 @@ ecp_nistz256_mul_by_3:
sbbq $0,%r10
movq %r11,%r12
sbbq .Lpoly+24(%rip),%r11
- testq %r13,%r13
+ sbbq $0,%r13
- cmovzq %rax,%r8
- cmovzq %rdx,%r9
- cmovzq %rcx,%r10
- cmovzq %r12,%r11
+ cmovcq %rax,%r8
+ cmovcq %rdx,%r9
+ cmovcq %rcx,%r10
+ cmovcq %r12,%r11
xorq %r13,%r13
addq 0(%rsi),%r8
@@ -171,14 +172,14 @@ ecp_nistz256_mul_by_3:
sbbq $0,%r10
movq %r11,%r12
sbbq .Lpoly+24(%rip),%r11
- testq %r13,%r13
+ sbbq $0,%r13
- cmovzq %rax,%r8
- cmovzq %rdx,%r9
+ cmovcq %rax,%r8
+ cmovcq %rdx,%r9
movq %r8,0(%rdi)
- cmovzq %rcx,%r10
+ cmovcq %rcx,%r10
movq %r9,8(%rdi)
- cmovzq %r12,%r11
+ cmovcq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
@@ -217,14 +218,14 @@ ecp_nistz256_add:
sbbq 16(%rsi),%r10
movq %r11,%r12
sbbq 24(%rsi),%r11
- testq %r13,%r13
+ sbbq $0,%r13
- cmovzq %rax,%r8
- cmovzq %rdx,%r9
+ cmovcq %rax,%r8
+ cmovcq %rdx,%r9
movq %r8,0(%rdi)
- cmovzq %rcx,%r10
+ cmovcq %rcx,%r10
movq %r9,8(%rdi)
- cmovzq %r12,%r11
+ cmovcq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
@@ -993,13 +994,14 @@ ecp_nistz256_avx2_select_w7:
.type __ecp_nistz256_add_toq,@function
.align 32
__ecp_nistz256_add_toq:
+ xorq %r11,%r11
addq 0(%rbx),%r12
adcq 8(%rbx),%r13
movq %r12,%rax
adcq 16(%rbx),%r8
adcq 24(%rbx),%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -1007,14 +1009,14 @@ __ecp_nistz256_add_toq:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
- cmovzq %rbp,%r13
+ cmovcq %rax,%r12
+ cmovcq %rbp,%r13
movq %r12,0(%rdi)
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq %r13,8(%rdi)
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@@ -1082,13 +1084,14 @@ __ecp_nistz256_subq:
.type __ecp_nistz256_mul_by_2q,@function
.align 32
__ecp_nistz256_mul_by_2q:
+ xorq %r11,%r11
addq %r12,%r12
adcq %r13,%r13
movq %r12,%rax
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -1096,14 +1099,14 @@ __ecp_nistz256_mul_by_2q:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
- cmovzq %rbp,%r13
+ cmovcq %rax,%r12
+ cmovcq %rbp,%r13
movq %r12,0(%rdi)
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq %r13,8(%rdi)
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@@ -1333,16 +1336,14 @@ ecp_nistz256_point_add:
movq %rdx,%rsi
movdqa %xmm0,384(%rsp)
movdqa %xmm1,384+16(%rsp)
- por %xmm0,%xmm1
movdqa %xmm2,416(%rsp)
movdqa %xmm3,416+16(%rsp)
- por %xmm2,%xmm3
movdqa %xmm4,448(%rsp)
movdqa %xmm5,448+16(%rsp)
- por %xmm1,%xmm3
+ por %xmm4,%xmm5
movdqu 0(%rsi),%xmm0
- pshufd $0xb1,%xmm3,%xmm5
+ pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rsi),%xmm1
movdqu 32(%rsi),%xmm2
por %xmm3,%xmm5
@@ -1354,14 +1355,14 @@ ecp_nistz256_point_add:
movdqa %xmm0,480(%rsp)
pshufd $0x1e,%xmm5,%xmm4
movdqa %xmm1,480+16(%rsp)
- por %xmm0,%xmm1
-.byte 102,72,15,110,199
+ movdqu 64(%rsi),%xmm0
+ movdqu 80(%rsi),%xmm1
movdqa %xmm2,512(%rsp)
movdqa %xmm3,512+16(%rsp)
- por %xmm2,%xmm3
por %xmm4,%xmm5
pxor %xmm4,%xmm4
- por %xmm1,%xmm3
+ por %xmm0,%xmm1
+.byte 102,72,15,110,199
leaq 64-0(%rsi),%rsi
movq %rax,544+0(%rsp)
@@ -1372,8 +1373,8 @@ ecp_nistz256_point_add:
call __ecp_nistz256_sqr_montq
pcmpeqd %xmm4,%xmm5
- pshufd $0xb1,%xmm3,%xmm4
- por %xmm3,%xmm4
+ pshufd $0xb1,%xmm1,%xmm4
+ por %xmm1,%xmm4
pshufd $0,%xmm5,%xmm5
pshufd $0x1e,%xmm4,%xmm3
por %xmm3,%xmm4
@@ -1556,6 +1557,7 @@ ecp_nistz256_point_add:
+ xorq %r11,%r11
addq %r12,%r12
leaq 96(%rsp),%rsi
adcq %r13,%r13
@@ -1563,7 +1565,7 @@ ecp_nistz256_point_add:
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -1571,15 +1573,15 @@ ecp_nistz256_point_add:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
+ cmovcq %rax,%r12
movq 0(%rsi),%rax
- cmovzq %rbp,%r13
+ cmovcq %rbp,%r13
movq 8(%rsi),%rbp
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq 16(%rsi),%rcx
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subq
@@ -1733,16 +1735,14 @@ ecp_nistz256_point_add_affine:
movq 64+24(%rsi),%r8
movdqa %xmm0,320(%rsp)
movdqa %xmm1,320+16(%rsp)
- por %xmm0,%xmm1
movdqa %xmm2,352(%rsp)
movdqa %xmm3,352+16(%rsp)
- por %xmm2,%xmm3
movdqa %xmm4,384(%rsp)
movdqa %xmm5,384+16(%rsp)
- por %xmm1,%xmm3
+ por %xmm4,%xmm5
movdqu 0(%rbx),%xmm0
- pshufd $0xb1,%xmm3,%xmm5
+ pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rbx),%xmm1
movdqu 32(%rbx),%xmm2
por %xmm3,%xmm5
@@ -1860,6 +1860,7 @@ ecp_nistz256_point_add_affine:
+ xorq %r11,%r11
addq %r12,%r12
leaq 192(%rsp),%rsi
adcq %r13,%r13
@@ -1867,7 +1868,7 @@ ecp_nistz256_point_add_affine:
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -1875,15 +1876,15 @@ ecp_nistz256_point_add_affine:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
+ cmovcq %rax,%r12
movq 0(%rsi),%rax
- cmovzq %rbp,%r13
+ cmovcq %rbp,%r13
movq 8(%rsi),%rbp
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq 16(%rsi),%rcx
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subq
diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/sha/sha1-x86_64.s b/deps/openssl/asm_obsolete/x64-elf-gas/sha/sha1-x86_64.s
index 38b7df1970..d2fbc15044 100644
--- a/deps/openssl/asm_obsolete/x64-elf-gas/sha/sha1-x86_64.s
+++ b/deps/openssl/asm_obsolete/x64-elf-gas/sha/sha1-x86_64.s
@@ -1255,9 +1255,9 @@ _shaext_shortcut:
.align 16
.Loop_shaext:
decq %rdx
- leaq 64(%rsi),%rax
+ leaq 64(%rsi),%r8
paddd %xmm4,%xmm1
- cmovneq %rax,%rsi
+ cmovneq %r8,%rsi
movdqa %xmm0,%xmm8
.byte 15,56,201,229
movdqa %xmm0,%xmm2
diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont.s b/deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont.s
index a5b25ebb4b..7c59d9b730 100644
--- a/deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont.s
+++ b/deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont.s
@@ -6,6 +6,8 @@
.p2align 4
_bn_mul_mont:
+ movl %r9d,%r9d
+ movq %rsp,%rax
testl $3,%r9d
jnz L$mul_enter
cmpl $8,%r9d
@@ -25,29 +27,36 @@ L$mul_enter:
pushq %r14
pushq %r15
- movl %r9d,%r9d
- leaq 2(%r9),%r10
+ negq %r9
movq %rsp,%r11
- negq %r10
- leaq (%rsp,%r10,8),%rsp
- andq $-1024,%rsp
+ leaq -16(%rsp,%r9,8),%r10
+ negq %r9
+ andq $-1024,%r10
- movq %r11,8(%rsp,%r9,8)
-L$mul_body:
- subq %rsp,%r11
+ subq %r10,%r11
andq $-4096,%r11
+ leaq (%r10,%r11,1),%rsp
+ movq (%rsp),%r11
+ cmpq %r10,%rsp
+ ja L$mul_page_walk
+ jmp L$mul_page_walk_done
+
+.p2align 4
L$mul_page_walk:
- movq (%rsp,%r11,1),%r10
- subq $4096,%r11
-.byte 0x66,0x2e
- jnc L$mul_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r11
+ cmpq %r10,%rsp
+ ja L$mul_page_walk
+L$mul_page_walk_done:
+ movq %rax,8(%rsp,%r9,8)
+L$mul_body:
movq %rdx,%r12
movq (%r8),%r8
movq (%r12),%rbx
@@ -215,19 +224,21 @@ L$copy:
movq 8(%rsp,%r9,8),%rsi
movq $1,%rax
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq -48(%rsi),%r15
+ movq -40(%rsi),%r14
+ movq -32(%rsi),%r13
+ movq -24(%rsi),%r12
+ movq -16(%rsi),%rbp
+ movq -8(%rsi),%rbx
+ leaq (%rsi),%rsp
L$mul_epilogue:
.byte 0xf3,0xc3
.p2align 4
bn_mul4x_mont:
+ movl %r9d,%r9d
+ movq %rsp,%rax
L$mul4x_enter:
pushq %rbx
pushq %rbp
@@ -236,23 +247,29 @@ L$mul4x_enter:
pushq %r14
pushq %r15
- movl %r9d,%r9d
- leaq 4(%r9),%r10
+ negq %r9
movq %rsp,%r11
- negq %r10
- leaq (%rsp,%r10,8),%rsp
- andq $-1024,%rsp
+ leaq -32(%rsp,%r9,8),%r10
+ negq %r9
+ andq $-1024,%r10
- movq %r11,8(%rsp,%r9,8)
-L$mul4x_body:
- subq %rsp,%r11
+ subq %r10,%r11
andq $-4096,%r11
+ leaq (%r10,%r11,1),%rsp
+ movq (%rsp),%r11
+ cmpq %r10,%rsp
+ ja L$mul4x_page_walk
+ jmp L$mul4x_page_walk_done
+
L$mul4x_page_walk:
- movq (%rsp,%r11,1),%r10
- subq $4096,%r11
-.byte 0x2e
- jnc L$mul4x_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r11
+ cmpq %r10,%rsp
+ ja L$mul4x_page_walk
+L$mul4x_page_walk_done:
+ movq %rax,8(%rsp,%r9,8)
+L$mul4x_body:
movq %rdi,16(%rsp,%r9,8)
movq %rdx,%r12
movq (%r8),%r8
@@ -621,13 +638,13 @@ L$copy4x:
movdqu %xmm2,16(%rdi,%r14,1)
movq 8(%rsp,%r9,8),%rsi
movq $1,%rax
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq -48(%rsi),%r15
+ movq -40(%rsi),%r14
+ movq -32(%rsi),%r13
+ movq -24(%rsi),%r12
+ movq -16(%rsi),%rbp
+ movq -8(%rsi),%rbx
+ leaq (%rsi),%rsp
L$mul4x_epilogue:
.byte 0xf3,0xc3
@@ -636,14 +653,15 @@ L$mul4x_epilogue:
.p2align 5
bn_sqr8x_mont:
-L$sqr8x_enter:
movq %rsp,%rax
+L$sqr8x_enter:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
+L$sqr8x_prologue:
movl %r9d,%r10d
shll $3,%r9d
@@ -656,33 +674,42 @@ L$sqr8x_enter:
leaq -64(%rsp,%r9,2),%r11
+ movq %rsp,%rbp
movq (%r8),%r8
subq %rsi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb L$sqr8x_sp_alt
- subq %r11,%rsp
- leaq -64(%rsp,%r9,2),%rsp
+ subq %r11,%rbp
+ leaq -64(%rbp,%r9,2),%rbp
jmp L$sqr8x_sp_done
.p2align 5
L$sqr8x_sp_alt:
leaq 4096-64(,%r9,2),%r10
- leaq -64(%rsp,%r9,2),%rsp
+ leaq -64(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
- subq %r11,%rsp
+ subq %r11,%rbp
L$sqr8x_sp_done:
- andq $-64,%rsp
- movq %rax,%r11
- subq %rsp,%r11
+ andq $-64,%rbp
+ movq %rsp,%r11
+ subq %rbp,%r11
andq $-4096,%r11
+ leaq (%r11,%rbp,1),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja L$sqr8x_page_walk
+ jmp L$sqr8x_page_walk_done
+
+.p2align 4
L$sqr8x_page_walk:
- movq (%rsp,%r11,1),%r10
- subq $4096,%r11
-.byte 0x2e
- jnc L$sqr8x_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja L$sqr8x_page_walk
+L$sqr8x_page_walk_done:
movq %r9,%r10
negq %r9
diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont5.s b/deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont5.s
index 8bb7c34c35..527abf5711 100644
--- a/deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont5.s
+++ b/deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont5.s
@@ -6,16 +6,15 @@
.p2align 6
_bn_mul_mont_gather5:
+ movl %r9d,%r9d
+ movq %rsp,%rax
testl $7,%r9d
jnz L$mul_enter
jmp L$mul4x_enter
.p2align 4
L$mul_enter:
- movl %r9d,%r9d
- movq %rsp,%rax
movd 8(%rsp),%xmm5
- leaq L$inc(%rip),%r10
pushq %rbx
pushq %rbp
pushq %r12
@@ -23,26 +22,36 @@ L$mul_enter:
pushq %r14
pushq %r15
- leaq 2(%r9),%r11
- negq %r11
- leaq -264(%rsp,%r11,8),%rsp
- andq $-1024,%rsp
+ negq %r9
+ movq %rsp,%r11
+ leaq -280(%rsp,%r9,8),%r10
+ negq %r9
+ andq $-1024,%r10
+
- movq %rax,8(%rsp,%r9,8)
-L$mul_body:
+ subq %r10,%r11
+ andq $-4096,%r11
+ leaq (%r10,%r11,1),%rsp
+ movq (%rsp),%r11
+ cmpq %r10,%rsp
+ ja L$mul_page_walk
+ jmp L$mul_page_walk_done
- subq %rsp,%rax
- andq $-4096,%rax
L$mul_page_walk:
- movq (%rsp,%rax,1),%r11
- subq $4096,%rax
-.byte 0x2e
- jnc L$mul_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r11
+ cmpq %r10,%rsp
+ ja L$mul_page_walk
+L$mul_page_walk_done:
+
+ leaq L$inc(%rip),%r10
+ movq %rax,8(%rsp,%r9,8)
+L$mul_body:
leaq 128(%rdx),%r12
movdqa 0(%r10),%xmm0
@@ -413,15 +422,16 @@ L$mul_epilogue:
.p2align 5
bn_mul4x_mont_gather5:
-L$mul4x_enter:
.byte 0x67
movq %rsp,%rax
+L$mul4x_enter:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
+L$mul4x_prologue:
.byte 0x67
shll $3,%r9d
@@ -438,32 +448,40 @@ L$mul4x_enter:
leaq -320(%rsp,%r9,2),%r11
+ movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb L$mul4xsp_alt
- subq %r11,%rsp
- leaq -320(%rsp,%r9,2),%rsp
+ subq %r11,%rbp
+ leaq -320(%rbp,%r9,2),%rbp
jmp L$mul4xsp_done
.p2align 5
L$mul4xsp_alt:
leaq 4096-320(,%r9,2),%r10
- leaq -320(%rsp,%r9,2),%rsp
+ leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
- subq %r11,%rsp
+ subq %r11,%rbp
L$mul4xsp_done:
- andq $-64,%rsp
- movq %rax,%r11
- subq %rsp,%r11
+ andq $-64,%rbp
+ movq %rsp,%r11
+ subq %rbp,%r11
andq $-4096,%r11
+ leaq (%r11,%rbp,1),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja L$mul4x_page_walk
+ jmp L$mul4x_page_walk_done
+
L$mul4x_page_walk:
- movq (%rsp,%r11,1),%r10
- subq $4096,%r11
-.byte 0x2e
- jnc L$mul4x_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja L$mul4x_page_walk
+L$mul4x_page_walk_done:
negq %r9
@@ -1022,6 +1040,7 @@ _bn_power5:
pushq %r13
pushq %r14
pushq %r15
+L$power5_prologue:
shll $3,%r9d
leal (%r9,%r9,2),%r10d
@@ -1036,32 +1055,40 @@ _bn_power5:
leaq -320(%rsp,%r9,2),%r11
+ movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb L$pwr_sp_alt
- subq %r11,%rsp
- leaq -320(%rsp,%r9,2),%rsp
+ subq %r11,%rbp
+ leaq -320(%rbp,%r9,2),%rbp
jmp L$pwr_sp_done
.p2align 5
L$pwr_sp_alt:
leaq 4096-320(,%r9,2),%r10
- leaq -320(%rsp,%r9,2),%rsp
+ leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
- subq %r11,%rsp
+ subq %r11,%rbp
L$pwr_sp_done:
- andq $-64,%rsp
- movq %rax,%r11
- subq %rsp,%r11
+ andq $-64,%rbp
+ movq %rsp,%r11
+ subq %rbp,%r11
andq $-4096,%r11
+ leaq (%r11,%rbp,1),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja L$pwr_page_walk
+ jmp L$pwr_page_walk_done
+
L$pwr_page_walk:
- movq (%rsp,%r11,1),%r10
- subq $4096,%r11
-.byte 0x2e
- jnc L$pwr_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja L$pwr_page_walk
+L$pwr_page_walk_done:
movq %r9,%r10
negq %r9
@@ -1972,6 +1999,7 @@ bn_from_mont8x:
pushq %r13
pushq %r14
pushq %r15
+L$from_prologue:
shll $3,%r9d
leaq (%r9,%r9,2),%r10
@@ -1986,32 +2014,40 @@ bn_from_mont8x:
leaq -320(%rsp,%r9,2),%r11
+ movq %rsp,%rbp
subq %rdi,%r11
andq $4095,%r11
cmpq %r11,%r10
jb L$from_sp_alt
- subq %r11,%rsp
- leaq -320(%rsp,%r9,2),%rsp
+ subq %r11,%rbp
+ leaq -320(%rbp,%r9,2),%rbp
jmp L$from_sp_done
.p2align 5
L$from_sp_alt:
leaq 4096-320(,%r9,2),%r10
- leaq -320(%rsp,%r9,2),%rsp
+ leaq -320(%rbp,%r9,2),%rbp
subq %r10,%r11
movq $0,%r10
cmovcq %r10,%r11
- subq %r11,%rsp
+ subq %r11,%rbp
L$from_sp_done:
- andq $-64,%rsp
- movq %rax,%r11
- subq %rsp,%r11
+ andq $-64,%rbp
+ movq %rsp,%r11
+ subq %rbp,%r11
andq $-4096,%r11
+ leaq (%r11,%rbp,1),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja L$from_page_walk
+ jmp L$from_page_walk_done
+
L$from_page_walk:
- movq (%rsp,%r11,1),%r10
- subq $4096,%r11
-.byte 0x2e
- jnc L$from_page_walk
+ leaq -4096(%rsp),%rsp
+ movq (%rsp),%r10
+ cmpq %rbp,%rsp
+ ja L$from_page_walk
+L$from_page_walk_done:
movq %r9,%r10
negq %r9
diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/ec/ecp_nistz256-x86_64.s b/deps/openssl/asm_obsolete/x64-macosx-gas/ec/ecp_nistz256-x86_64.s
index 30456b900f..62e8d00ccd 100644
--- a/deps/openssl/asm_obsolete/x64-macosx-gas/ec/ecp_nistz256-x86_64.s
+++ b/deps/openssl/asm_obsolete/x64-macosx-gas/ec/ecp_nistz256-x86_64.s
@@ -27,6 +27,7 @@ _ecp_nistz256_mul_by_2:
pushq %r13
movq 0(%rsi),%r8
+ xorq %r13,%r13
movq 8(%rsi),%r9
addq %r8,%r8
movq 16(%rsi),%r10
@@ -37,7 +38,7 @@ _ecp_nistz256_mul_by_2:
adcq %r10,%r10
adcq %r11,%r11
movq %r9,%rdx
- sbbq %r13,%r13
+ adcq $0,%r13
subq 0(%rsi),%r8
movq %r10,%rcx
@@ -45,14 +46,14 @@ _ecp_nistz256_mul_by_2:
sbbq 16(%rsi),%r10
movq %r11,%r12
sbbq 24(%rsi),%r11
- testq %r13,%r13
+ sbbq $0,%r13
- cmovzq %rax,%r8
- cmovzq %rdx,%r9
+ cmovcq %rax,%r8
+ cmovcq %rdx,%r9
movq %r8,0(%rdi)
- cmovzq %rcx,%r10
+ cmovcq %rcx,%r10
movq %r9,8(%rdi)
- cmovzq %r12,%r11
+ cmovcq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
@@ -149,12 +150,12 @@ _ecp_nistz256_mul_by_3:
sbbq $0,%r10
movq %r11,%r12
sbbq L$poly+24(%rip),%r11
- testq %r13,%r13
+ sbbq $0,%r13
- cmovzq %rax,%r8
- cmovzq %rdx,%r9
- cmovzq %rcx,%r10
- cmovzq %r12,%r11
+ cmovcq %rax,%r8
+ cmovcq %rdx,%r9
+ cmovcq %rcx,%r10
+ cmovcq %r12,%r11
xorq %r13,%r13
addq 0(%rsi),%r8
@@ -171,14 +172,14 @@ _ecp_nistz256_mul_by_3:
sbbq $0,%r10
movq %r11,%r12
sbbq L$poly+24(%rip),%r11
- testq %r13,%r13
+ sbbq $0,%r13
- cmovzq %rax,%r8
- cmovzq %rdx,%r9
+ cmovcq %rax,%r8
+ cmovcq %rdx,%r9
movq %r8,0(%rdi)
- cmovzq %rcx,%r10
+ cmovcq %rcx,%r10
movq %r9,8(%rdi)
- cmovzq %r12,%r11
+ cmovcq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
@@ -217,14 +218,14 @@ _ecp_nistz256_add:
sbbq 16(%rsi),%r10
movq %r11,%r12
sbbq 24(%rsi),%r11
- testq %r13,%r13
+ sbbq $0,%r13
- cmovzq %rax,%r8
- cmovzq %rdx,%r9
+ cmovcq %rax,%r8
+ cmovcq %rdx,%r9
movq %r8,0(%rdi)
- cmovzq %rcx,%r10
+ cmovcq %rcx,%r10
movq %r9,8(%rdi)
- cmovzq %r12,%r11
+ cmovcq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
@@ -993,13 +994,14 @@ _ecp_nistz256_avx2_select_w7:
.p2align 5
__ecp_nistz256_add_toq:
+ xorq %r11,%r11
addq 0(%rbx),%r12
adcq 8(%rbx),%r13
movq %r12,%rax
adcq 16(%rbx),%r8
adcq 24(%rbx),%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -1007,14 +1009,14 @@ __ecp_nistz256_add_toq:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
- cmovzq %rbp,%r13
+ cmovcq %rax,%r12
+ cmovcq %rbp,%r13
movq %r12,0(%rdi)
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq %r13,8(%rdi)
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@@ -1082,13 +1084,14 @@ __ecp_nistz256_subq:
.p2align 5
__ecp_nistz256_mul_by_2q:
+ xorq %r11,%r11
addq %r12,%r12
adcq %r13,%r13
movq %r12,%rax
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -1096,14 +1099,14 @@ __ecp_nistz256_mul_by_2q:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
- cmovzq %rbp,%r13
+ cmovcq %rax,%r12
+ cmovcq %rbp,%r13
movq %r12,0(%rdi)
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq %r13,8(%rdi)
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@@ -1333,16 +1336,14 @@ _ecp_nistz256_point_add:
movq %rdx,%rsi
movdqa %xmm0,384(%rsp)
movdqa %xmm1,384+16(%rsp)
- por %xmm0,%xmm1
movdqa %xmm2,416(%rsp)
movdqa %xmm3,416+16(%rsp)
- por %xmm2,%xmm3
movdqa %xmm4,448(%rsp)
movdqa %xmm5,448+16(%rsp)
- por %xmm1,%xmm3
+ por %xmm4,%xmm5
movdqu 0(%rsi),%xmm0
- pshufd $0xb1,%xmm3,%xmm5
+ pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rsi),%xmm1
movdqu 32(%rsi),%xmm2
por %xmm3,%xmm5
@@ -1354,14 +1355,14 @@ _ecp_nistz256_point_add:
movdqa %xmm0,480(%rsp)
pshufd $0x1e,%xmm5,%xmm4
movdqa %xmm1,480+16(%rsp)
- por %xmm0,%xmm1
-.byte 102,72,15,110,199
+ movdqu 64(%rsi),%xmm0
+ movdqu 80(%rsi),%xmm1
movdqa %xmm2,512(%rsp)
movdqa %xmm3,512+16(%rsp)
- por %xmm2,%xmm3
por %xmm4,%xmm5
pxor %xmm4,%xmm4
- por %xmm1,%xmm3
+ por %xmm0,%xmm1
+.byte 102,72,15,110,199
leaq 64-0(%rsi),%rsi
movq %rax,544+0(%rsp)
@@ -1372,8 +1373,8 @@ _ecp_nistz256_point_add:
call __ecp_nistz256_sqr_montq
pcmpeqd %xmm4,%xmm5
- pshufd $0xb1,%xmm3,%xmm4
- por %xmm3,%xmm4
+ pshufd $0xb1,%xmm1,%xmm4
+ por %xmm1,%xmm4
pshufd $0,%xmm5,%xmm5
pshufd $0x1e,%xmm4,%xmm3
por %xmm3,%xmm4
@@ -1556,6 +1557,7 @@ L$add_proceedq:
+ xorq %r11,%r11
addq %r12,%r12
leaq 96(%rsp),%rsi
adcq %r13,%r13
@@ -1563,7 +1565,7 @@ L$add_proceedq:
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -1571,15 +1573,15 @@ L$add_proceedq:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
+ cmovcq %rax,%r12
movq 0(%rsi),%rax
- cmovzq %rbp,%r13
+ cmovcq %rbp,%r13
movq 8(%rsi),%rbp
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq 16(%rsi),%rcx
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subq
@@ -1733,16 +1735,14 @@ _ecp_nistz256_point_add_affine:
movq 64+24(%rsi),%r8
movdqa %xmm0,320(%rsp)
movdqa %xmm1,320+16(%rsp)
- por %xmm0,%xmm1
movdqa %xmm2,352(%rsp)
movdqa %xmm3,352+16(%rsp)
- por %xmm2,%xmm3
movdqa %xmm4,384(%rsp)
movdqa %xmm5,384+16(%rsp)
- por %xmm1,%xmm3
+ por %xmm4,%xmm5
movdqu 0(%rbx),%xmm0
- pshufd $0xb1,%xmm3,%xmm5
+ pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rbx),%xmm1
movdqu 32(%rbx),%xmm2
por %xmm3,%xmm5
@@ -1860,6 +1860,7 @@ _ecp_nistz256_point_add_affine:
+ xorq %r11,%r11
addq %r12,%r12
leaq 192(%rsp),%rsi
adcq %r13,%r13
@@ -1867,7 +1868,7 @@ _ecp_nistz256_point_add_affine:
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -1875,15 +1876,15 @@ _ecp_nistz256_point_add_affine:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
+ cmovcq %rax,%r12
movq 0(%rsi),%rax
- cmovzq %rbp,%r13
+ cmovcq %rbp,%r13
movq 8(%rsi),%rbp
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq 16(%rsi),%rcx
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subq
diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha1-x86_64.s b/deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha1-x86_64.s
index 671034cdaf..47c5f633cd 100644
--- a/deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha1-x86_64.s
+++ b/deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha1-x86_64.s
@@ -1255,9 +1255,9 @@ _shaext_shortcut:
.p2align 4
L$oop_shaext:
decq %rdx
- leaq 64(%rsi),%rax
+ leaq 64(%rsi),%r8
paddd %xmm4,%xmm1
- cmovneq %rax,%rsi
+ cmovneq %r8,%rsi
movdqa %xmm0,%xmm8
.byte 15,56,201,229
movdqa %xmm0,%xmm2
diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont.asm b/deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont.asm
index ed588a016b..2b46716247 100644
--- a/deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont.asm
+++ b/deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont.asm
@@ -19,6 +19,8 @@ $L$SEH_begin_bn_mul_mont::
mov r9,QWORD PTR[48+rsp]
+ mov r9d,r9d
+ mov rax,rsp
test r9d,3
jnz $L$mul_enter
cmp r9d,8
@@ -38,29 +40,36 @@ $L$mul_enter::
push r14
push r15
- mov r9d,r9d
- lea r10,QWORD PTR[2+r9]
+ neg r9
mov r11,rsp
- neg r10
- lea rsp,QWORD PTR[r10*8+rsp]
- and rsp,-1024
+ lea r10,QWORD PTR[((-16))+r9*8+rsp]
+ neg r9
+ and r10,-1024
- mov QWORD PTR[8+r9*8+rsp],r11
-$L$mul_body::
- sub r11,rsp
+ sub r11,r10
and r11,-4096
+ lea rsp,QWORD PTR[r11*1+r10]
+ mov r11,QWORD PTR[rsp]
+ cmp rsp,r10
+ ja $L$mul_page_walk
+ jmp $L$mul_page_walk_done
+
+ALIGN 16
$L$mul_page_walk::
- mov r10,QWORD PTR[r11*1+rsp]
- sub r11,4096
-DB 066h,02eh
- jnc $L$mul_page_walk
+ lea rsp,QWORD PTR[((-4096))+rsp]
+ mov r11,QWORD PTR[rsp]
+ cmp rsp,r10
+ ja $L$mul_page_walk
+$L$mul_page_walk_done::
+ mov QWORD PTR[8+r9*8+rsp],rax
+$L$mul_body::
mov r12,rdx
mov r8,QWORD PTR[r8]
mov rbx,QWORD PTR[r12]
@@ -228,13 +237,13 @@ $L$copy::
mov rsi,QWORD PTR[8+r9*8+rsp]
mov rax,1
- mov r15,QWORD PTR[rsi]
- mov r14,QWORD PTR[8+rsi]
- mov r13,QWORD PTR[16+rsi]
- mov r12,QWORD PTR[24+rsi]
- mov rbp,QWORD PTR[32+rsi]
- mov rbx,QWORD PTR[40+rsi]
- lea rsp,QWORD PTR[48+rsi]
+ mov r15,QWORD PTR[((-48))+rsi]
+ mov r14,QWORD PTR[((-40))+rsi]
+ mov r13,QWORD PTR[((-32))+rsi]
+ mov r12,QWORD PTR[((-24))+rsi]
+ mov rbp,QWORD PTR[((-16))+rsi]
+ mov rbx,QWORD PTR[((-8))+rsi]
+ lea rsp,QWORD PTR[rsi]
$L$mul_epilogue::
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
mov rsi,QWORD PTR[16+rsp]
@@ -256,6 +265,8 @@ $L$SEH_begin_bn_mul4x_mont::
mov r9,QWORD PTR[48+rsp]
+ mov r9d,r9d
+ mov rax,rsp
$L$mul4x_enter::
push rbx
push rbp
@@ -264,23 +275,29 @@ $L$mul4x_enter::
push r14
push r15
- mov r9d,r9d
- lea r10,QWORD PTR[4+r9]
+ neg r9
mov r11,rsp
- neg r10
- lea rsp,QWORD PTR[r10*8+rsp]
- and rsp,-1024
+ lea r10,QWORD PTR[((-32))+r9*8+rsp]
+ neg r9
+ and r10,-1024
- mov QWORD PTR[8+r9*8+rsp],r11
-$L$mul4x_body::
- sub r11,rsp
+ sub r11,r10
and r11,-4096
+ lea rsp,QWORD PTR[r11*1+r10]
+ mov r11,QWORD PTR[rsp]
+ cmp rsp,r10
+ ja $L$mul4x_page_walk
+ jmp $L$mul4x_page_walk_done
+
$L$mul4x_page_walk::
- mov r10,QWORD PTR[r11*1+rsp]
- sub r11,4096
-DB 02eh
- jnc $L$mul4x_page_walk
+ lea rsp,QWORD PTR[((-4096))+rsp]
+ mov r11,QWORD PTR[rsp]
+ cmp rsp,r10
+ ja $L$mul4x_page_walk
+$L$mul4x_page_walk_done::
+ mov QWORD PTR[8+r9*8+rsp],rax
+$L$mul4x_body::
mov QWORD PTR[16+r9*8+rsp],rdi
mov r12,rdx
mov r8,QWORD PTR[r8]
@@ -649,13 +666,13 @@ $L$copy4x::
movdqu XMMWORD PTR[16+r14*1+rdi],xmm2
mov rsi,QWORD PTR[8+r9*8+rsp]
mov rax,1
- mov r15,QWORD PTR[rsi]
- mov r14,QWORD PTR[8+rsi]
- mov r13,QWORD PTR[16+rsi]
- mov r12,QWORD PTR[24+rsi]
- mov rbp,QWORD PTR[32+rsi]
- mov rbx,QWORD PTR[40+rsi]
- lea rsp,QWORD PTR[48+rsi]
+ mov r15,QWORD PTR[((-48))+rsi]
+ mov r14,QWORD PTR[((-40))+rsi]
+ mov r13,QWORD PTR[((-32))+rsi]
+ mov r12,QWORD PTR[((-24))+rsi]
+ mov rbp,QWORD PTR[((-16))+rsi]
+ mov rbx,QWORD PTR[((-8))+rsi]
+ lea rsp,QWORD PTR[rsi]
$L$mul4x_epilogue::
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
mov rsi,QWORD PTR[16+rsp]
@@ -679,14 +696,15 @@ $L$SEH_begin_bn_sqr8x_mont::
mov r9,QWORD PTR[48+rsp]
-$L$sqr8x_enter::
mov rax,rsp
+$L$sqr8x_enter::
push rbx
push rbp
push r12
push r13
push r14
push r15
+$L$sqr8x_prologue::
mov r10d,r9d
shl r9d,3
@@ -699,33 +717,42 @@ $L$sqr8x_enter::
lea r11,QWORD PTR[((-64))+r9*2+rsp]
+ mov rbp,rsp
mov r8,QWORD PTR[r8]
sub r11,rsi
and r11,4095
cmp r10,r11
jb $L$sqr8x_sp_alt
- sub rsp,r11
- lea rsp,QWORD PTR[((-64))+r9*2+rsp]
+ sub rbp,r11
+ lea rbp,QWORD PTR[((-64))+r9*2+rbp]
jmp $L$sqr8x_sp_done
ALIGN 32
$L$sqr8x_sp_alt::
lea r10,QWORD PTR[((4096-64))+r9*2]
- lea rsp,QWORD PTR[((-64))+r9*2+rsp]
+ lea rbp,QWORD PTR[((-64))+r9*2+rbp]
sub r11,r10
mov r10,0
cmovc r11,r10
- sub rsp,r11
+ sub rbp,r11
$L$sqr8x_sp_done::
- and rsp,-64
- mov r11,rax
- sub r11,rsp
+ and rbp,-64
+ mov r11,rsp
+ sub r11,rbp
and r11,-4096
+ lea rsp,QWORD PTR[rbp*1+r11]
+ mov r10,QWORD PTR[rsp]
+ cmp rsp,rbp
+ ja $L$sqr8x_page_walk
+ jmp $L$sqr8x_page_walk_done
+
+ALIGN 16
$L$sqr8x_page_walk::
- mov r10,QWORD PTR[r11*1+rsp]
- sub r11,4096
-DB 02eh
- jnc $L$sqr8x_page_walk
+ lea rsp,QWORD PTR[((-4096))+rsp]
+ mov r10,QWORD PTR[rsp]
+ cmp rsp,rbp
+ ja $L$sqr8x_page_walk
+$L$sqr8x_page_walk_done::
mov r10,r9
neg r9
@@ -860,22 +887,8 @@ mul_handler PROC PRIVATE
mov r10,QWORD PTR[192+r8]
mov rax,QWORD PTR[8+r10*8+rax]
- lea rax,QWORD PTR[48+rax]
- mov rbx,QWORD PTR[((-8))+rax]
- mov rbp,QWORD PTR[((-16))+rax]
- mov r12,QWORD PTR[((-24))+rax]
- mov r13,QWORD PTR[((-32))+rax]
- mov r14,QWORD PTR[((-40))+rax]
- mov r15,QWORD PTR[((-48))+rax]
- mov QWORD PTR[144+r8],rbx
- mov QWORD PTR[160+r8],rbp
- mov QWORD PTR[216+r8],r12
- mov QWORD PTR[224+r8],r13
- mov QWORD PTR[232+r8],r14
- mov QWORD PTR[240+r8],r15
-
- jmp $L$common_seh_tail
+ jmp $L$common_pop_regs
mul_handler ENDP
@@ -903,15 +916,21 @@ sqr_handler PROC PRIVATE
cmp rbx,r10
jb $L$common_seh_tail
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$common_pop_regs
+
mov rax,QWORD PTR[152+r8]
- mov r10d,DWORD PTR[4+r11]
+ mov r10d,DWORD PTR[8+r11]
lea r10,QWORD PTR[r10*1+rsi]
cmp rbx,r10
jae $L$common_seh_tail
mov rax,QWORD PTR[40+rax]
+$L$common_pop_regs::
mov rbx,QWORD PTR[((-8))+rax]
mov rbp,QWORD PTR[((-16))+rax]
mov r12,QWORD PTR[((-24))+rax]
@@ -993,7 +1012,8 @@ DB 9,0,0,0
$L$SEH_info_bn_sqr8x_mont::
DB 9,0,0,0
DD imagerel sqr_handler
- DD imagerel $L$sqr8x_body,imagerel $L$sqr8x_epilogue
+ DD imagerel $L$sqr8x_prologue,imagerel $L$sqr8x_body,imagerel $L$sqr8x_epilogue
+ALIGN 8
.xdata ENDS
END
diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont5.asm b/deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont5.asm
index fb3c27a0ff..89f45a4915 100644
--- a/deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont5.asm
+++ b/deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont5.asm
@@ -19,16 +19,15 @@ $L$SEH_begin_bn_mul_mont_gather5::
mov r9,QWORD PTR[48+rsp]
+ mov r9d,r9d
+ mov rax,rsp
test r9d,7
jnz $L$mul_enter
jmp $L$mul4x_enter
ALIGN 16
$L$mul_enter::
- mov r9d,r9d
- mov rax,rsp
movd xmm5,DWORD PTR[56+rsp]
- lea r10,QWORD PTR[$L$inc]
push rbx
push rbp
push r12
@@ -36,26 +35,36 @@ $L$mul_enter::
push r14
push r15
- lea r11,QWORD PTR[2+r9]
- neg r11
- lea rsp,QWORD PTR[((-264))+r11*8+rsp]
- and rsp,-1024
+ neg r9
+ mov r11,rsp
+ lea r10,QWORD PTR[((-280))+r9*8+rsp]
+ neg r9
+ and r10,-1024
+
- mov QWORD PTR[8+r9*8+rsp],rax
-$L$mul_body::
+ sub r11,r10
+ and r11,-4096
+ lea rsp,QWORD PTR[r11*1+r10]
+ mov r11,QWORD PTR[rsp]
+ cmp rsp,r10
+ ja $L$mul_page_walk
+ jmp $L$mul_page_walk_done
- sub rax,rsp
- and rax,-4096
$L$mul_page_walk::
- mov r11,QWORD PTR[rax*1+rsp]
- sub rax,4096
-DB 02eh
- jnc $L$mul_page_walk
+ lea rsp,QWORD PTR[((-4096))+rsp]
+ mov r11,QWORD PTR[rsp]
+ cmp rsp,r10
+ ja $L$mul_page_walk
+$L$mul_page_walk_done::
+
+ lea r10,QWORD PTR[$L$inc]
+ mov QWORD PTR[8+r9*8+rsp],rax
+$L$mul_body::
lea r12,QWORD PTR[128+rdx]
movdqa xmm0,XMMWORD PTR[r10]
@@ -441,15 +450,16 @@ $L$SEH_begin_bn_mul4x_mont_gather5::
mov r9,QWORD PTR[48+rsp]
-$L$mul4x_enter::
DB 067h
mov rax,rsp
+$L$mul4x_enter::
push rbx
push rbp
push r12
push r13
push r14
push r15
+$L$mul4x_prologue::
DB 067h
shl r9d,3
@@ -466,32 +476,40 @@ DB 067h
lea r11,QWORD PTR[((-320))+r9*2+rsp]
+ mov rbp,rsp
sub r11,rdi
and r11,4095
cmp r10,r11
jb $L$mul4xsp_alt
- sub rsp,r11
- lea rsp,QWORD PTR[((-320))+r9*2+rsp]
+ sub rbp,r11
+ lea rbp,QWORD PTR[((-320))+r9*2+rbp]
jmp $L$mul4xsp_done
ALIGN 32
$L$mul4xsp_alt::
lea r10,QWORD PTR[((4096-320))+r9*2]
- lea rsp,QWORD PTR[((-320))+r9*2+rsp]
+ lea rbp,QWORD PTR[((-320))+r9*2+rbp]
sub r11,r10
mov r10,0
cmovc r11,r10
- sub rsp,r11
+ sub rbp,r11
$L$mul4xsp_done::
- and rsp,-64
- mov r11,rax
- sub r11,rsp
+ and rbp,-64
+ mov r11,rsp
+ sub r11,rbp
and r11,-4096
+ lea rsp,QWORD PTR[rbp*1+r11]
+ mov r10,QWORD PTR[rsp]
+ cmp rsp,rbp
+ ja $L$mul4x_page_walk
+ jmp $L$mul4x_page_walk_done
+
$L$mul4x_page_walk::
- mov r10,QWORD PTR[r11*1+rsp]
- sub r11,4096
-DB 02eh
- jnc $L$mul4x_page_walk
+ lea rsp,QWORD PTR[((-4096))+rsp]
+ mov r10,QWORD PTR[rsp]
+ cmp rsp,rbp
+ ja $L$mul4x_page_walk
+$L$mul4x_page_walk_done::
neg r9
@@ -1065,6 +1083,7 @@ $L$SEH_begin_bn_power5::
push r13
push r14
push r15
+$L$power5_prologue::
shl r9d,3
lea r10d,DWORD PTR[r9*2+r9]
@@ -1079,32 +1098,40 @@ $L$SEH_begin_bn_power5::
lea r11,QWORD PTR[((-320))+r9*2+rsp]
+ mov rbp,rsp
sub r11,rdi
and r11,4095
cmp r10,r11
jb $L$pwr_sp_alt
- sub rsp,r11
- lea rsp,QWORD PTR[((-320))+r9*2+rsp]
+ sub rbp,r11
+ lea rbp,QWORD PTR[((-320))+r9*2+rbp]
jmp $L$pwr_sp_done
ALIGN 32
$L$pwr_sp_alt::
lea r10,QWORD PTR[((4096-320))+r9*2]
- lea rsp,QWORD PTR[((-320))+r9*2+rsp]
+ lea rbp,QWORD PTR[((-320))+r9*2+rbp]
sub r11,r10
mov r10,0
cmovc r11,r10
- sub rsp,r11
+ sub rbp,r11
$L$pwr_sp_done::
- and rsp,-64
- mov r11,rax
- sub r11,rsp
+ and rbp,-64
+ mov r11,rsp
+ sub r11,rbp
and r11,-4096
+ lea rsp,QWORD PTR[rbp*1+r11]
+ mov r10,QWORD PTR[rsp]
+ cmp rsp,rbp
+ ja $L$pwr_page_walk
+ jmp $L$pwr_page_walk_done
+
$L$pwr_page_walk::
- mov r10,QWORD PTR[r11*1+rsp]
- sub r11,4096
-DB 02eh
- jnc $L$pwr_page_walk
+ lea rsp,QWORD PTR[((-4096))+rsp]
+ mov r10,QWORD PTR[rsp]
+ cmp rsp,rbp
+ ja $L$pwr_page_walk
+$L$pwr_page_walk_done::
mov r10,r9
neg r9
@@ -2030,6 +2057,7 @@ DB 067h
push r13
push r14
push r15
+$L$from_prologue::
shl r9d,3
lea r10,QWORD PTR[r9*2+r9]
@@ -2044,32 +2072,40 @@ DB 067h
lea r11,QWORD PTR[((-320))+r9*2+rsp]
+ mov rbp,rsp
sub r11,rdi
and r11,4095
cmp r10,r11
jb $L$from_sp_alt
- sub rsp,r11
- lea rsp,QWORD PTR[((-320))+r9*2+rsp]
+ sub rbp,r11
+ lea rbp,QWORD PTR[((-320))+r9*2+rbp]
jmp $L$from_sp_done
ALIGN 32
$L$from_sp_alt::
lea r10,QWORD PTR[((4096-320))+r9*2]
- lea rsp,QWORD PTR[((-320))+r9*2+rsp]
+ lea rbp,QWORD PTR[((-320))+r9*2+rbp]
sub r11,r10
mov r10,0
cmovc r11,r10
- sub rsp,r11
+ sub rbp,r11
$L$from_sp_done::
- and rsp,-64
- mov r11,rax
- sub r11,rsp
+ and rbp,-64
+ mov r11,rsp
+ sub r11,rbp
and r11,-4096
+ lea rsp,QWORD PTR[rbp*1+r11]
+ mov r10,QWORD PTR[rsp]
+ cmp rsp,rbp
+ ja $L$from_page_walk
+ jmp $L$from_page_walk_done
+
$L$from_page_walk::
- mov r10,QWORD PTR[r11*1+rsp]
- sub r11,4096
-DB 02eh
- jnc $L$from_page_walk
+ lea rsp,QWORD PTR[((-4096))+rsp]
+ mov r10,QWORD PTR[rsp]
+ cmp rsp,rbp
+ ja $L$from_page_walk
+$L$from_page_walk_done::
mov r10,r9
neg r9
@@ -2383,9 +2419,14 @@ mul_handler PROC PRIVATE
cmp rbx,r10
jb $L$common_seh_tail
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$common_pop_regs
+
mov rax,QWORD PTR[152+r8]
- mov r10d,DWORD PTR[4+r11]
+ mov r10d,DWORD PTR[8+r11]
lea r10,QWORD PTR[r10*1+rsi]
cmp rbx,r10
jae $L$common_seh_tail
@@ -2397,11 +2438,11 @@ mul_handler PROC PRIVATE
mov r10,QWORD PTR[192+r8]
mov rax,QWORD PTR[8+r10*8+rax]
- jmp $L$body_proceed
+ jmp $L$common_pop_regs
$L$body_40::
mov rax,QWORD PTR[40+rax]
-$L$body_proceed::
+$L$common_pop_regs::
mov rbx,QWORD PTR[((-8))+rax]
mov rbp,QWORD PTR[((-16))+rax]
mov r12,QWORD PTR[((-24))+rax]
@@ -2483,22 +2524,22 @@ ALIGN 8
$L$SEH_info_bn_mul_mont_gather5::
DB 9,0,0,0
DD imagerel mul_handler
- DD imagerel $L$mul_body,imagerel $L$mul_epilogue
+ DD imagerel $L$mul_body,imagerel $L$mul_body,imagerel $L$mul_epilogue
ALIGN 8
$L$SEH_info_bn_mul4x_mont_gather5::
DB 9,0,0,0
DD imagerel mul_handler
- DD imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue
+ DD imagerel $L$mul4x_prologue,imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue
ALIGN 8
$L$SEH_info_bn_power5::
DB 9,0,0,0
DD imagerel mul_handler
- DD imagerel $L$power5_body,imagerel $L$power5_epilogue
+ DD imagerel $L$power5_prologue,imagerel $L$power5_body,imagerel $L$power5_epilogue
ALIGN 8
$L$SEH_info_bn_from_mont8x::
DB 9,0,0,0
DD imagerel mul_handler
- DD imagerel $L$from_body,imagerel $L$from_epilogue
+ DD imagerel $L$from_prologue,imagerel $L$from_body,imagerel $L$from_epilogue
ALIGN 8
$L$SEH_info_bn_gather5::
DB 001h,00bh,003h,00ah
diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/ec/ecp_nistz256-x86_64.asm b/deps/openssl/asm_obsolete/x64-win32-masm/ec/ecp_nistz256-x86_64.asm
index ca78bd52cc..c985159a7b 100644
--- a/deps/openssl/asm_obsolete/x64-win32-masm/ec/ecp_nistz256-x86_64.asm
+++ b/deps/openssl/asm_obsolete/x64-win32-masm/ec/ecp_nistz256-x86_64.asm
@@ -36,6 +36,7 @@ $L$SEH_begin_ecp_nistz256_mul_by_2::
push r13
mov r8,QWORD PTR[rsi]
+ xor r13,r13
mov r9,QWORD PTR[8+rsi]
add r8,r8
mov r10,QWORD PTR[16+rsi]
@@ -46,7 +47,7 @@ $L$SEH_begin_ecp_nistz256_mul_by_2::
adc r10,r10
adc r11,r11
mov rdx,r9
- sbb r13,r13
+ adc r13,0
sub r8,QWORD PTR[rsi]
mov rcx,r10
@@ -54,14 +55,14 @@ $L$SEH_begin_ecp_nistz256_mul_by_2::
sbb r10,QWORD PTR[16+rsi]
mov r12,r11
sbb r11,QWORD PTR[24+rsi]
- test r13,r13
+ sbb r13,0
- cmovz r8,rax
- cmovz r9,rdx
+ cmovc r8,rax
+ cmovc r9,rdx
mov QWORD PTR[rdi],r8
- cmovz r10,rcx
+ cmovc r10,rcx
mov QWORD PTR[8+rdi],r9
- cmovz r11,r12
+ cmovc r11,r12
mov QWORD PTR[16+rdi],r10
mov QWORD PTR[24+rdi],r11
@@ -180,12 +181,12 @@ $L$SEH_begin_ecp_nistz256_mul_by_3::
sbb r10,0
mov r12,r11
sbb r11,QWORD PTR[(($L$poly+24))]
- test r13,r13
+ sbb r13,0
- cmovz r8,rax
- cmovz r9,rdx
- cmovz r10,rcx
- cmovz r11,r12
+ cmovc r8,rax
+ cmovc r9,rdx
+ cmovc r10,rcx
+ cmovc r11,r12
xor r13,r13
add r8,QWORD PTR[rsi]
@@ -202,14 +203,14 @@ $L$SEH_begin_ecp_nistz256_mul_by_3::
sbb r10,0
mov r12,r11
sbb r11,QWORD PTR[(($L$poly+24))]
- test r13,r13
+ sbb r13,0
- cmovz r8,rax
- cmovz r9,rdx
+ cmovc r8,rax
+ cmovc r9,rdx
mov QWORD PTR[rdi],r8
- cmovz r10,rcx
+ cmovc r10,rcx
mov QWORD PTR[8+rdi],r9
- cmovz r11,r12
+ cmovc r11,r12
mov QWORD PTR[16+rdi],r10
mov QWORD PTR[24+rdi],r11
@@ -260,14 +261,14 @@ $L$SEH_begin_ecp_nistz256_add::
sbb r10,QWORD PTR[16+rsi]
mov r12,r11
sbb r11,QWORD PTR[24+rsi]
- test r13,r13
+ sbb r13,0
- cmovz r8,rax
- cmovz r9,rdx
+ cmovc r8,rax
+ cmovc r9,rdx
mov QWORD PTR[rdi],r8
- cmovz r10,rcx
+ cmovc r10,rcx
mov QWORD PTR[8+rdi],r9
- cmovz r11,r12
+ cmovc r11,r12
mov QWORD PTR[16+rdi],r10
mov QWORD PTR[24+rdi],r11
@@ -1167,13 +1168,14 @@ ecp_nistz256_avx2_select_w7 ENDP
ALIGN 32
__ecp_nistz256_add_toq PROC PRIVATE
+ xor r11,r11
add r12,QWORD PTR[rbx]
adc r13,QWORD PTR[8+rbx]
mov rax,r12
adc r8,QWORD PTR[16+rbx]
adc r9,QWORD PTR[24+rbx]
mov rbp,r13
- sbb r11,r11
+ adc r11,0
sub r12,-1
mov rcx,r8
@@ -1181,14 +1183,14 @@ __ecp_nistz256_add_toq PROC PRIVATE
sbb r8,0
mov r10,r9
sbb r9,r15
- test r11,r11
+ sbb r11,0
- cmovz r12,rax
- cmovz r13,rbp
+ cmovc r12,rax
+ cmovc r13,rbp
mov QWORD PTR[rdi],r12
- cmovz r8,rcx
+ cmovc r8,rcx
mov QWORD PTR[8+rdi],r13
- cmovz r9,r10
+ cmovc r9,r10
mov QWORD PTR[16+rdi],r8
mov QWORD PTR[24+rdi],r9
@@ -1256,13 +1258,14 @@ __ecp_nistz256_subq ENDP
ALIGN 32
__ecp_nistz256_mul_by_2q PROC PRIVATE
+ xor r11,r11
add r12,r12
adc r13,r13
mov rax,r12
adc r8,r8
adc r9,r9
mov rbp,r13
- sbb r11,r11
+ adc r11,0
sub r12,-1
mov rcx,r8
@@ -1270,14 +1273,14 @@ __ecp_nistz256_mul_by_2q PROC PRIVATE
sbb r8,0
mov r10,r9
sbb r9,r15
- test r11,r11
+ sbb r11,0
- cmovz r12,rax
- cmovz r13,rbp
+ cmovc r12,rax
+ cmovc r13,rbp
mov QWORD PTR[rdi],r12
- cmovz r8,rcx
+ cmovc r8,rcx
mov QWORD PTR[8+rdi],r13
- cmovz r9,r10
+ cmovc r9,r10
mov QWORD PTR[16+rdi],r8
mov QWORD PTR[24+rdi],r9
@@ -1527,16 +1530,14 @@ $L$SEH_begin_ecp_nistz256_point_add::
mov rsi,rdx
movdqa XMMWORD PTR[384+rsp],xmm0
movdqa XMMWORD PTR[(384+16)+rsp],xmm1
- por xmm1,xmm0
movdqa XMMWORD PTR[416+rsp],xmm2
movdqa XMMWORD PTR[(416+16)+rsp],xmm3
- por xmm3,xmm2
movdqa XMMWORD PTR[448+rsp],xmm4
movdqa XMMWORD PTR[(448+16)+rsp],xmm5
- por xmm3,xmm1
+ por xmm5,xmm4
movdqu xmm0,XMMWORD PTR[rsi]
- pshufd xmm5,xmm3,1h
+ pshufd xmm3,xmm5,1h
movdqu xmm1,XMMWORD PTR[16+rsi]
movdqu xmm2,XMMWORD PTR[32+rsi]
por xmm5,xmm3
@@ -1548,14 +1549,14 @@ $L$SEH_begin_ecp_nistz256_point_add::
movdqa XMMWORD PTR[480+rsp],xmm0
pshufd xmm4,xmm5,01eh
movdqa XMMWORD PTR[(480+16)+rsp],xmm1
- por xmm1,xmm0
-DB 102,72,15,110,199
+ movdqu xmm0,XMMWORD PTR[64+rsi]
+ movdqu xmm1,XMMWORD PTR[80+rsi]
movdqa XMMWORD PTR[512+rsp],xmm2
movdqa XMMWORD PTR[(512+16)+rsp],xmm3
- por xmm3,xmm2
por xmm5,xmm4
pxor xmm4,xmm4
- por xmm3,xmm1
+ por xmm1,xmm0
+DB 102,72,15,110,199
lea rsi,QWORD PTR[((64-0))+rsi]
mov QWORD PTR[((544+0))+rsp],rax
@@ -1566,8 +1567,8 @@ DB 102,72,15,110,199
call __ecp_nistz256_sqr_montq
pcmpeqd xmm5,xmm4
- pshufd xmm4,xmm3,1h
- por xmm4,xmm3
+ pshufd xmm4,xmm1,1h
+ por xmm4,xmm1
pshufd xmm5,xmm5,0
pshufd xmm3,xmm4,01eh
por xmm4,xmm3
@@ -1750,6 +1751,7 @@ $L$add_proceedq::
+ xor r11,r11
add r12,r12
lea rsi,QWORD PTR[96+rsp]
adc r13,r13
@@ -1757,7 +1759,7 @@ $L$add_proceedq::
adc r8,r8
adc r9,r9
mov rbp,r13
- sbb r11,r11
+ adc r11,0
sub r12,-1
mov rcx,r8
@@ -1765,15 +1767,15 @@ $L$add_proceedq::
sbb r8,0
mov r10,r9
sbb r9,r15
- test r11,r11
+ sbb r11,0
- cmovz r12,rax
+ cmovc r12,rax
mov rax,QWORD PTR[rsi]
- cmovz r13,rbp
+ cmovc r13,rbp
mov rbp,QWORD PTR[8+rsi]
- cmovz r8,rcx
+ cmovc r8,rcx
mov rcx,QWORD PTR[16+rsi]
- cmovz r9,r10
+ cmovc r9,r10
mov r10,QWORD PTR[24+rsi]
call __ecp_nistz256_subq
@@ -1939,16 +1941,14 @@ $L$SEH_begin_ecp_nistz256_point_add_affine::
mov r8,QWORD PTR[((64+24))+rsi]
movdqa XMMWORD PTR[320+rsp],xmm0
movdqa XMMWORD PTR[(320+16)+rsp],xmm1
- por xmm1,xmm0
movdqa XMMWORD PTR[352+rsp],xmm2
movdqa XMMWORD PTR[(352+16)+rsp],xmm3
- por xmm3,xmm2
movdqa XMMWORD PTR[384+rsp],xmm4
movdqa XMMWORD PTR[(384+16)+rsp],xmm5
- por xmm3,xmm1
+ por xmm5,xmm4
movdqu xmm0,XMMWORD PTR[rbx]
- pshufd xmm5,xmm3,1h
+ pshufd xmm3,xmm5,1h
movdqu xmm1,XMMWORD PTR[16+rbx]
movdqu xmm2,XMMWORD PTR[32+rbx]
por xmm5,xmm3
@@ -2066,6 +2066,7 @@ DB 102,72,15,110,199
+ xor r11,r11
add r12,r12
lea rsi,QWORD PTR[192+rsp]
adc r13,r13
@@ -2073,7 +2074,7 @@ DB 102,72,15,110,199
adc r8,r8
adc r9,r9
mov rbp,r13
- sbb r11,r11
+ adc r11,0
sub r12,-1
mov rcx,r8
@@ -2081,15 +2082,15 @@ DB 102,72,15,110,199
sbb r8,0
mov r10,r9
sbb r9,r15
- test r11,r11
+ sbb r11,0
- cmovz r12,rax
+ cmovc r12,rax
mov rax,QWORD PTR[rsi]
- cmovz r13,rbp
+ cmovc r13,rbp
mov rbp,QWORD PTR[8+rsi]
- cmovz r8,rcx
+ cmovc r8,rcx
mov rcx,QWORD PTR[16+rsi]
- cmovz r9,r10
+ cmovc r9,r10
mov r10,QWORD PTR[24+rsi]
call __ecp_nistz256_subq
diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/sha/sha1-x86_64.asm b/deps/openssl/asm_obsolete/x64-win32-masm/sha/sha1-x86_64.asm
index 07b7882a96..7528c8a8a6 100644
--- a/deps/openssl/asm_obsolete/x64-win32-masm/sha/sha1-x86_64.asm
+++ b/deps/openssl/asm_obsolete/x64-win32-masm/sha/sha1-x86_64.asm
@@ -1283,9 +1283,9 @@ DB 102,15,56,0,251
ALIGN 16
$L$oop_shaext::
dec rdx
- lea rax,QWORD PTR[64+rsi]
+ lea r8,QWORD PTR[64+rsi]
paddd xmm1,xmm4
- cmovne rsi,rax
+ cmovne rsi,r8
movdqa xmm8,xmm0
DB 15,56,201,229
movdqa xmm2,xmm0
diff --git a/deps/openssl/asm_obsolete/x86-elf-gas/bn/x86-mont.s b/deps/openssl/asm_obsolete/x86-elf-gas/bn/x86-mont.s
index 2f7211d92e..b683577231 100644
--- a/deps/openssl/asm_obsolete/x86-elf-gas/bn/x86-mont.s
+++ b/deps/openssl/asm_obsolete/x86-elf-gas/bn/x86-mont.s
@@ -15,44 +15,51 @@ bn_mul_mont:
jl .L000just_leave
leal 20(%esp),%esi
leal 24(%esp),%edx
- movl %esp,%ebp
addl $2,%edi
negl %edi
- leal -32(%esp,%edi,4),%esp
+ leal -32(%esp,%edi,4),%ebp
negl %edi
- movl %esp,%eax
+ movl %ebp,%eax
subl %edx,%eax
andl $2047,%eax
- subl %eax,%esp
- xorl %esp,%edx
+ subl %eax,%ebp
+ xorl %ebp,%edx
andl $2048,%edx
xorl $2048,%edx
- subl %edx,%esp
- andl $-64,%esp
- movl %ebp,%eax
- subl %esp,%eax
+ subl %edx,%ebp
+ andl $-64,%ebp
+ movl %esp,%eax
+ subl %ebp,%eax
andl $-4096,%eax
+ movl %esp,%edx
+ leal (%ebp,%eax,1),%esp
+ movl (%esp),%eax
+ cmpl %ebp,%esp
+ ja .L001page_walk
+ jmp .L002page_walk_done
+.align 16
.L001page_walk:
- movl (%esp,%eax,1),%edx
- subl $4096,%eax
-.byte 46
- jnc .L001page_walk
+ leal -4096(%esp),%esp
+ movl (%esp),%eax
+ cmpl %ebp,%esp
+ ja .L001page_walk
+.L002page_walk_done:
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
- movl 12(%esi),%edx
+ movl 12(%esi),%ebp
movl 16(%esi),%esi
movl (%esi),%esi
movl %eax,4(%esp)
movl %ebx,8(%esp)
movl %ecx,12(%esp)
- movl %edx,16(%esp)
+ movl %ebp,16(%esp)
movl %esi,20(%esp)
leal -3(%edi),%ebx
- movl %ebp,24(%esp)
+ movl %edx,24(%esp)
leal OPENSSL_ia32cap_P,%eax
btl $26,(%eax)
- jnc .L002non_sse2
+ jnc .L003non_sse2
movl $-1,%eax
movd %eax,%mm7
movl 8(%esp),%esi
@@ -76,7 +83,7 @@ bn_mul_mont:
psrlq $32,%mm3
incl %ecx
.align 16
-.L0031st:
+.L0041st:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@@ -91,7 +98,7 @@ bn_mul_mont:
psrlq $32,%mm3
leal 1(%ecx),%ecx
cmpl %ebx,%ecx
- jl .L0031st
+ jl .L0041st
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@@ -105,7 +112,7 @@ bn_mul_mont:
paddq %mm2,%mm3
movq %mm3,32(%esp,%ebx,4)
incl %edx
-.L004outer:
+.L005outer:
xorl %ecx,%ecx
movd (%edi,%edx,4),%mm4
movd (%esi),%mm5
@@ -127,7 +134,7 @@ bn_mul_mont:
paddq %mm6,%mm2
incl %ecx
decl %ebx
-.L005inner:
+.L006inner:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@@ -144,7 +151,7 @@ bn_mul_mont:
paddq %mm6,%mm2
decl %ebx
leal 1(%ecx),%ecx
- jnz .L005inner
+ jnz .L006inner
movl %ecx,%ebx
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
@@ -162,11 +169,11 @@ bn_mul_mont:
movq %mm3,32(%esp,%ebx,4)
leal 1(%edx),%edx
cmpl %ebx,%edx
- jle .L004outer
+ jle .L005outer
emms
- jmp .L006common_tail
+ jmp .L007common_tail
.align 16
-.L002non_sse2:
+.L003non_sse2:
movl 8(%esp),%esi
leal 1(%ebx),%ebp
movl 12(%esp),%edi
@@ -177,12 +184,12 @@ bn_mul_mont:
leal 4(%edi,%ebx,4),%eax
orl %edx,%ebp
movl (%edi),%edi
- jz .L007bn_sqr_mont
+ jz .L008bn_sqr_mont
movl %eax,28(%esp)
movl (%esi),%eax
xorl %edx,%edx
.align 16
-.L008mull:
+.L009mull:
movl %edx,%ebp
mull %edi
addl %eax,%ebp
@@ -191,7 +198,7 @@ bn_mul_mont:
movl (%esi,%ecx,4),%eax
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
- jl .L008mull
+ jl .L009mull
movl %edx,%ebp
mull %edi
movl 20(%esp),%edi
@@ -209,9 +216,9 @@ bn_mul_mont:
movl 4(%esi),%eax
adcl $0,%edx
incl %ecx
- jmp .L0092ndmadd
+ jmp .L0102ndmadd
.align 16
-.L0101stmadd:
+.L0111stmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@@ -222,7 +229,7 @@ bn_mul_mont:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
- jl .L0101stmadd
+ jl .L0111stmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%eax
@@ -245,7 +252,7 @@ bn_mul_mont:
adcl $0,%edx
movl $1,%ecx
.align 16
-.L0092ndmadd:
+.L0102ndmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@@ -256,7 +263,7 @@ bn_mul_mont:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
- jl .L0092ndmadd
+ jl .L0102ndmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@@ -272,16 +279,16 @@ bn_mul_mont:
movl %edx,32(%esp,%ebx,4)
cmpl 28(%esp),%ecx
movl %eax,36(%esp,%ebx,4)
- je .L006common_tail
+ je .L007common_tail
movl (%ecx),%edi
movl 8(%esp),%esi
movl %ecx,12(%esp)
xorl %ecx,%ecx
xorl %edx,%edx
movl (%esi),%eax
- jmp .L0101stmadd
+ jmp .L0111stmadd
.align 16
-.L007bn_sqr_mont:
+.L008bn_sqr_mont:
movl %ebx,(%esp)
movl %ecx,12(%esp)
movl %edi,%eax
@@ -292,7 +299,7 @@ bn_mul_mont:
andl $1,%ebx
incl %ecx
.align 16
-.L011sqr:
+.L012sqr:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@@ -304,7 +311,7 @@ bn_mul_mont:
cmpl (%esp),%ecx
movl %eax,%ebx
movl %ebp,28(%esp,%ecx,4)
- jl .L011sqr
+ jl .L012sqr
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@@ -328,7 +335,7 @@ bn_mul_mont:
movl 4(%esi),%eax
movl $1,%ecx
.align 16
-.L0123rdmadd:
+.L0133rdmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@@ -347,7 +354,7 @@ bn_mul_mont:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
- jl .L0123rdmadd
+ jl .L0133rdmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@@ -363,7 +370,7 @@ bn_mul_mont:
movl %edx,32(%esp,%ebx,4)
cmpl %ebx,%ecx
movl %eax,36(%esp,%ebx,4)
- je .L006common_tail
+ je .L007common_tail
movl 4(%esi,%ecx,4),%edi
leal 1(%ecx),%ecx
movl %edi,%eax
@@ -375,12 +382,12 @@ bn_mul_mont:
xorl %ebp,%ebp
cmpl %ebx,%ecx
leal 1(%ecx),%ecx
- je .L013sqrlast
+ je .L014sqrlast
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
.align 16
-.L014sqradd:
+.L015sqradd:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@@ -396,13 +403,13 @@ bn_mul_mont:
cmpl (%esp),%ecx
movl %ebp,28(%esp,%ecx,4)
movl %eax,%ebx
- jle .L014sqradd
+ jle .L015sqradd
movl %edx,%ebp
addl %edx,%edx
shrl $31,%ebp
addl %ebx,%edx
adcl $0,%ebp
-.L013sqrlast:
+.L014sqrlast:
movl 20(%esp),%edi
movl 16(%esp),%esi
imull 32(%esp),%edi
@@ -417,9 +424,9 @@ bn_mul_mont:
adcl $0,%edx
movl $1,%ecx
movl 4(%esi),%eax
- jmp .L0123rdmadd
+ jmp .L0133rdmadd
.align 16
-.L006common_tail:
+.L007common_tail:
movl 16(%esp),%ebp
movl 4(%esp),%edi
leal 32(%esp),%esi
@@ -427,13 +434,13 @@ bn_mul_mont:
movl %ebx,%ecx
xorl %edx,%edx
.align 16
-.L015sub:
+.L016sub:
sbbl (%ebp,%edx,4),%eax
movl %eax,(%edi,%edx,4)
decl %ecx
movl 4(%esi,%edx,4),%eax
leal 1(%edx),%edx
- jge .L015sub
+ jge .L016sub
sbbl $0,%eax
andl %eax,%esi
notl %eax
@@ -441,12 +448,12 @@ bn_mul_mont:
andl %eax,%ebp
orl %ebp,%esi
.align 16
-.L016copy:
+.L017copy:
movl (%esi,%ebx,4),%eax
movl %eax,(%edi,%ebx,4)
movl %ecx,32(%esp,%ebx,4)
decl %ebx
- jge .L016copy
+ jge .L017copy
movl 24(%esp),%esp
movl $1,%eax
.L000just_leave:
diff --git a/deps/openssl/asm_obsolete/x86-macosx-gas/bn/x86-mont.s b/deps/openssl/asm_obsolete/x86-macosx-gas/bn/x86-mont.s
index accec0e519..7bc58d24e0 100644
--- a/deps/openssl/asm_obsolete/x86-macosx-gas/bn/x86-mont.s
+++ b/deps/openssl/asm_obsolete/x86-macosx-gas/bn/x86-mont.s
@@ -14,47 +14,54 @@ L_bn_mul_mont_begin:
jl L000just_leave
leal 20(%esp),%esi
leal 24(%esp),%edx
- movl %esp,%ebp
addl $2,%edi
negl %edi
- leal -32(%esp,%edi,4),%esp
+ leal -32(%esp,%edi,4),%ebp
negl %edi
- movl %esp,%eax
+ movl %ebp,%eax
subl %edx,%eax
andl $2047,%eax
- subl %eax,%esp
- xorl %esp,%edx
+ subl %eax,%ebp
+ xorl %ebp,%edx
andl $2048,%edx
xorl $2048,%edx
- subl %edx,%esp
- andl $-64,%esp
- movl %ebp,%eax
- subl %esp,%eax
+ subl %edx,%ebp
+ andl $-64,%ebp
+ movl %esp,%eax
+ subl %ebp,%eax
andl $-4096,%eax
+ movl %esp,%edx
+ leal (%ebp,%eax,1),%esp
+ movl (%esp),%eax
+ cmpl %ebp,%esp
+ ja L001page_walk
+ jmp L002page_walk_done
+.align 4,0x90
L001page_walk:
- movl (%esp,%eax,1),%edx
- subl $4096,%eax
-.byte 46
- jnc L001page_walk
+ leal -4096(%esp),%esp
+ movl (%esp),%eax
+ cmpl %ebp,%esp
+ ja L001page_walk
+L002page_walk_done:
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
- movl 12(%esi),%edx
+ movl 12(%esi),%ebp
movl 16(%esi),%esi
movl (%esi),%esi
movl %eax,4(%esp)
movl %ebx,8(%esp)
movl %ecx,12(%esp)
- movl %edx,16(%esp)
+ movl %ebp,16(%esp)
movl %esi,20(%esp)
leal -3(%edi),%ebx
- movl %ebp,24(%esp)
- call L002PIC_me_up
-L002PIC_me_up:
+ movl %edx,24(%esp)
+ call L003PIC_me_up
+L003PIC_me_up:
popl %eax
- movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L002PIC_me_up(%eax),%eax
+ movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L003PIC_me_up(%eax),%eax
btl $26,(%eax)
- jnc L003non_sse2
+ jnc L004non_sse2
movl $-1,%eax
movd %eax,%mm7
movl 8(%esp),%esi
@@ -78,7 +85,7 @@ L002PIC_me_up:
psrlq $32,%mm3
incl %ecx
.align 4,0x90
-L0041st:
+L0051st:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@@ -93,7 +100,7 @@ L0041st:
psrlq $32,%mm3
leal 1(%ecx),%ecx
cmpl %ebx,%ecx
- jl L0041st
+ jl L0051st
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@@ -107,7 +114,7 @@ L0041st:
paddq %mm2,%mm3
movq %mm3,32(%esp,%ebx,4)
incl %edx
-L005outer:
+L006outer:
xorl %ecx,%ecx
movd (%edi,%edx,4),%mm4
movd (%esi),%mm5
@@ -129,7 +136,7 @@ L005outer:
paddq %mm6,%mm2
incl %ecx
decl %ebx
-L006inner:
+L007inner:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
@@ -146,7 +153,7 @@ L006inner:
paddq %mm6,%mm2
decl %ebx
leal 1(%ecx),%ecx
- jnz L006inner
+ jnz L007inner
movl %ecx,%ebx
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
@@ -164,11 +171,11 @@ L006inner:
movq %mm3,32(%esp,%ebx,4)
leal 1(%edx),%edx
cmpl %ebx,%edx
- jle L005outer
+ jle L006outer
emms
- jmp L007common_tail
+ jmp L008common_tail
.align 4,0x90
-L003non_sse2:
+L004non_sse2:
movl 8(%esp),%esi
leal 1(%ebx),%ebp
movl 12(%esp),%edi
@@ -179,12 +186,12 @@ L003non_sse2:
leal 4(%edi,%ebx,4),%eax
orl %edx,%ebp
movl (%edi),%edi
- jz L008bn_sqr_mont
+ jz L009bn_sqr_mont
movl %eax,28(%esp)
movl (%esi),%eax
xorl %edx,%edx
.align 4,0x90
-L009mull:
+L010mull:
movl %edx,%ebp
mull %edi
addl %eax,%ebp
@@ -193,7 +200,7 @@ L009mull:
movl (%esi,%ecx,4),%eax
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
- jl L009mull
+ jl L010mull
movl %edx,%ebp
mull %edi
movl 20(%esp),%edi
@@ -211,9 +218,9 @@ L009mull:
movl 4(%esi),%eax
adcl $0,%edx
incl %ecx
- jmp L0102ndmadd
+ jmp L0112ndmadd
.align 4,0x90
-L0111stmadd:
+L0121stmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@@ -224,7 +231,7 @@ L0111stmadd:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
- jl L0111stmadd
+ jl L0121stmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%eax
@@ -247,7 +254,7 @@ L0111stmadd:
adcl $0,%edx
movl $1,%ecx
.align 4,0x90
-L0102ndmadd:
+L0112ndmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@@ -258,7 +265,7 @@ L0102ndmadd:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
- jl L0102ndmadd
+ jl L0112ndmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@@ -274,16 +281,16 @@ L0102ndmadd:
movl %edx,32(%esp,%ebx,4)
cmpl 28(%esp),%ecx
movl %eax,36(%esp,%ebx,4)
- je L007common_tail
+ je L008common_tail
movl (%ecx),%edi
movl 8(%esp),%esi
movl %ecx,12(%esp)
xorl %ecx,%ecx
xorl %edx,%edx
movl (%esi),%eax
- jmp L0111stmadd
+ jmp L0121stmadd
.align 4,0x90
-L008bn_sqr_mont:
+L009bn_sqr_mont:
movl %ebx,(%esp)
movl %ecx,12(%esp)
movl %edi,%eax
@@ -294,7 +301,7 @@ L008bn_sqr_mont:
andl $1,%ebx
incl %ecx
.align 4,0x90
-L012sqr:
+L013sqr:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@@ -306,7 +313,7 @@ L012sqr:
cmpl (%esp),%ecx
movl %eax,%ebx
movl %ebp,28(%esp,%ecx,4)
- jl L012sqr
+ jl L013sqr
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@@ -330,7 +337,7 @@ L012sqr:
movl 4(%esi),%eax
movl $1,%ecx
.align 4,0x90
-L0133rdmadd:
+L0143rdmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
@@ -349,7 +356,7 @@ L0133rdmadd:
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
- jl L0133rdmadd
+ jl L0143rdmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
@@ -365,7 +372,7 @@ L0133rdmadd:
movl %edx,32(%esp,%ebx,4)
cmpl %ebx,%ecx
movl %eax,36(%esp,%ebx,4)
- je L007common_tail
+ je L008common_tail
movl 4(%esi,%ecx,4),%edi
leal 1(%ecx),%ecx
movl %edi,%eax
@@ -377,12 +384,12 @@ L0133rdmadd:
xorl %ebp,%ebp
cmpl %ebx,%ecx
leal 1(%ecx),%ecx
- je L014sqrlast
+ je L015sqrlast
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
.align 4,0x90
-L015sqradd:
+L016sqradd:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
@@ -398,13 +405,13 @@ L015sqradd:
cmpl (%esp),%ecx
movl %ebp,28(%esp,%ecx,4)
movl %eax,%ebx
- jle L015sqradd
+ jle L016sqradd
movl %edx,%ebp
addl %edx,%edx
shrl $31,%ebp
addl %ebx,%edx
adcl $0,%ebp
-L014sqrlast:
+L015sqrlast:
movl 20(%esp),%edi
movl 16(%esp),%esi
imull 32(%esp),%edi
@@ -419,9 +426,9 @@ L014sqrlast:
adcl $0,%edx
movl $1,%ecx
movl 4(%esi),%eax
- jmp L0133rdmadd
+ jmp L0143rdmadd
.align 4,0x90
-L007common_tail:
+L008common_tail:
movl 16(%esp),%ebp
movl 4(%esp),%edi
leal 32(%esp),%esi
@@ -429,13 +436,13 @@ L007common_tail:
movl %ebx,%ecx
xorl %edx,%edx
.align 4,0x90
-L016sub:
+L017sub:
sbbl (%ebp,%edx,4),%eax
movl %eax,(%edi,%edx,4)
decl %ecx
movl 4(%esi,%edx,4),%eax
leal 1(%edx),%edx
- jge L016sub
+ jge L017sub
sbbl $0,%eax
andl %eax,%esi
notl %eax
@@ -443,12 +450,12 @@ L016sub:
andl %eax,%ebp
orl %ebp,%esi
.align 4,0x90
-L017copy:
+L018copy:
movl (%esi,%ebx,4),%eax
movl %eax,(%edi,%ebx,4)
movl %ecx,32(%esp,%ebx,4)
decl %ebx
- jge L017copy
+ jge L018copy
movl 24(%esp),%esp
movl $1,%eax
L000just_leave:
diff --git a/deps/openssl/asm_obsolete/x86-win32-masm/bn/x86-mont.asm b/deps/openssl/asm_obsolete/x86-win32-masm/bn/x86-mont.asm
index 4987f6fe91..f026dae738 100644
--- a/deps/openssl/asm_obsolete/x86-win32-masm/bn/x86-mont.asm
+++ b/deps/openssl/asm_obsolete/x86-win32-masm/bn/x86-mont.asm
@@ -31,44 +31,51 @@ $L_bn_mul_mont_begin::
jl $L000just_leave
lea esi,DWORD PTR 20[esp]
lea edx,DWORD PTR 24[esp]
- mov ebp,esp
add edi,2
neg edi
- lea esp,DWORD PTR [edi*4+esp-32]
+ lea ebp,DWORD PTR [edi*4+esp-32]
neg edi
- mov eax,esp
+ mov eax,ebp
sub eax,edx
and eax,2047
- sub esp,eax
- xor edx,esp
+ sub ebp,eax
+ xor edx,ebp
and edx,2048
xor edx,2048
- sub esp,edx
- and esp,-64
- mov eax,ebp
- sub eax,esp
+ sub ebp,edx
+ and ebp,-64
+ mov eax,esp
+ sub eax,ebp
and eax,-4096
+ mov edx,esp
+ lea esp,DWORD PTR [eax*1+ebp]
+ mov eax,DWORD PTR [esp]
+ cmp esp,ebp
+ ja $L001page_walk
+ jmp $L002page_walk_done
+ALIGN 16
$L001page_walk:
- mov edx,DWORD PTR [eax*1+esp]
- sub eax,4096
-DB 46
- jnc $L001page_walk
+ lea esp,DWORD PTR [esp-4096]
+ mov eax,DWORD PTR [esp]
+ cmp esp,ebp
+ ja $L001page_walk
+$L002page_walk_done:
mov eax,DWORD PTR [esi]
mov ebx,DWORD PTR 4[esi]
mov ecx,DWORD PTR 8[esi]
- mov edx,DWORD PTR 12[esi]
+ mov ebp,DWORD PTR 12[esi]
mov esi,DWORD PTR 16[esi]
mov esi,DWORD PTR [esi]
mov DWORD PTR 4[esp],eax
mov DWORD PTR 8[esp],ebx
mov DWORD PTR 12[esp],ecx
- mov DWORD PTR 16[esp],edx
+ mov DWORD PTR 16[esp],ebp
mov DWORD PTR 20[esp],esi
lea ebx,DWORD PTR [edi-3]
- mov DWORD PTR 24[esp],ebp
+ mov DWORD PTR 24[esp],edx
lea eax,DWORD PTR _OPENSSL_ia32cap_P
bt DWORD PTR [eax],26
- jnc $L002non_sse2
+ jnc $L003non_sse2
mov eax,-1
movd mm7,eax
mov esi,DWORD PTR 8[esp]
@@ -92,7 +99,7 @@ DB 46
psrlq mm3,32
inc ecx
ALIGN 16
-$L0031st:
+$L0041st:
pmuludq mm0,mm4
pmuludq mm1,mm5
paddq mm2,mm0
@@ -107,7 +114,7 @@ $L0031st:
psrlq mm3,32
lea ecx,DWORD PTR 1[ecx]
cmp ecx,ebx
- jl $L0031st
+ jl $L0041st
pmuludq mm0,mm4
pmuludq mm1,mm5
paddq mm2,mm0
@@ -121,7 +128,7 @@ $L0031st:
paddq mm3,mm2
movq QWORD PTR 32[ebx*4+esp],mm3
inc edx
-$L004outer:
+$L005outer:
xor ecx,ecx
movd mm4,DWORD PTR [edx*4+edi]
movd mm5,DWORD PTR [esi]
@@ -143,7 +150,7 @@ $L004outer:
paddq mm2,mm6
inc ecx
dec ebx
-$L005inner:
+$L006inner:
pmuludq mm0,mm4
pmuludq mm1,mm5
paddq mm2,mm0
@@ -160,7 +167,7 @@ $L005inner:
paddq mm2,mm6
dec ebx
lea ecx,DWORD PTR 1[ecx]
- jnz $L005inner
+ jnz $L006inner
mov ebx,ecx
pmuludq mm0,mm4
pmuludq mm1,mm5
@@ -178,11 +185,11 @@ $L005inner:
movq QWORD PTR 32[ebx*4+esp],mm3
lea edx,DWORD PTR 1[edx]
cmp edx,ebx
- jle $L004outer
+ jle $L005outer
emms
- jmp $L006common_tail
+ jmp $L007common_tail
ALIGN 16
-$L002non_sse2:
+$L003non_sse2:
mov esi,DWORD PTR 8[esp]
lea ebp,DWORD PTR 1[ebx]
mov edi,DWORD PTR 12[esp]
@@ -193,12 +200,12 @@ $L002non_sse2:
lea eax,DWORD PTR 4[ebx*4+edi]
or ebp,edx
mov edi,DWORD PTR [edi]
- jz $L007bn_sqr_mont
+ jz $L008bn_sqr_mont
mov DWORD PTR 28[esp],eax
mov eax,DWORD PTR [esi]
xor edx,edx
ALIGN 16
-$L008mull:
+$L009mull:
mov ebp,edx
mul edi
add ebp,eax
@@ -207,7 +214,7 @@ $L008mull:
mov eax,DWORD PTR [ecx*4+esi]
cmp ecx,ebx
mov DWORD PTR 28[ecx*4+esp],ebp
- jl $L008mull
+ jl $L009mull
mov ebp,edx
mul edi
mov edi,DWORD PTR 20[esp]
@@ -225,9 +232,9 @@ $L008mull:
mov eax,DWORD PTR 4[esi]
adc edx,0
inc ecx
- jmp $L0092ndmadd
+ jmp $L0102ndmadd
ALIGN 16
-$L0101stmadd:
+$L0111stmadd:
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ecx*4+esp]
@@ -238,7 +245,7 @@ $L0101stmadd:
adc edx,0
cmp ecx,ebx
mov DWORD PTR 28[ecx*4+esp],ebp
- jl $L0101stmadd
+ jl $L0111stmadd
mov ebp,edx
mul edi
add eax,DWORD PTR 32[ebx*4+esp]
@@ -261,7 +268,7 @@ $L0101stmadd:
adc edx,0
mov ecx,1
ALIGN 16
-$L0092ndmadd:
+$L0102ndmadd:
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ecx*4+esp]
@@ -272,7 +279,7 @@ $L0092ndmadd:
adc edx,0
cmp ecx,ebx
mov DWORD PTR 24[ecx*4+esp],ebp
- jl $L0092ndmadd
+ jl $L0102ndmadd
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ebx*4+esp]
@@ -288,16 +295,16 @@ $L0092ndmadd:
mov DWORD PTR 32[ebx*4+esp],edx
cmp ecx,DWORD PTR 28[esp]
mov DWORD PTR 36[ebx*4+esp],eax
- je $L006common_tail
+ je $L007common_tail
mov edi,DWORD PTR [ecx]
mov esi,DWORD PTR 8[esp]
mov DWORD PTR 12[esp],ecx
xor ecx,ecx
xor edx,edx
mov eax,DWORD PTR [esi]
- jmp $L0101stmadd
+ jmp $L0111stmadd
ALIGN 16
-$L007bn_sqr_mont:
+$L008bn_sqr_mont:
mov DWORD PTR [esp],ebx
mov DWORD PTR 12[esp],ecx
mov eax,edi
@@ -308,7 +315,7 @@ $L007bn_sqr_mont:
and ebx,1
inc ecx
ALIGN 16
-$L011sqr:
+$L012sqr:
mov eax,DWORD PTR [ecx*4+esi]
mov ebp,edx
mul edi
@@ -320,7 +327,7 @@ $L011sqr:
cmp ecx,DWORD PTR [esp]
mov ebx,eax
mov DWORD PTR 28[ecx*4+esp],ebp
- jl $L011sqr
+ jl $L012sqr
mov eax,DWORD PTR [ecx*4+esi]
mov ebp,edx
mul edi
@@ -344,7 +351,7 @@ $L011sqr:
mov eax,DWORD PTR 4[esi]
mov ecx,1
ALIGN 16
-$L0123rdmadd:
+$L0133rdmadd:
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ecx*4+esp]
@@ -363,7 +370,7 @@ $L0123rdmadd:
adc edx,0
cmp ecx,ebx
mov DWORD PTR 24[ecx*4+esp],ebp
- jl $L0123rdmadd
+ jl $L0133rdmadd
mov ebp,edx
mul edi
add ebp,DWORD PTR 32[ebx*4+esp]
@@ -379,7 +386,7 @@ $L0123rdmadd:
mov DWORD PTR 32[ebx*4+esp],edx
cmp ecx,ebx
mov DWORD PTR 36[ebx*4+esp],eax
- je $L006common_tail
+ je $L007common_tail
mov edi,DWORD PTR 4[ecx*4+esi]
lea ecx,DWORD PTR 1[ecx]
mov eax,edi
@@ -391,12 +398,12 @@ $L0123rdmadd:
xor ebp,ebp
cmp ecx,ebx
lea ecx,DWORD PTR 1[ecx]
- je $L013sqrlast
+ je $L014sqrlast
mov ebx,edx
shr edx,1
and ebx,1
ALIGN 16
-$L014sqradd:
+$L015sqradd:
mov eax,DWORD PTR [ecx*4+esi]
mov ebp,edx
mul edi
@@ -412,13 +419,13 @@ $L014sqradd:
cmp ecx,DWORD PTR [esp]
mov DWORD PTR 28[ecx*4+esp],ebp
mov ebx,eax
- jle $L014sqradd
+ jle $L015sqradd
mov ebp,edx
add edx,edx
shr ebp,31
add edx,ebx
adc ebp,0
-$L013sqrlast:
+$L014sqrlast:
mov edi,DWORD PTR 20[esp]
mov esi,DWORD PTR 16[esp]
imul edi,DWORD PTR 32[esp]
@@ -433,9 +440,9 @@ $L013sqrlast:
adc edx,0
mov ecx,1
mov eax,DWORD PTR 4[esi]
- jmp $L0123rdmadd
+ jmp $L0133rdmadd
ALIGN 16
-$L006common_tail:
+$L007common_tail:
mov ebp,DWORD PTR 16[esp]
mov edi,DWORD PTR 4[esp]
lea esi,DWORD PTR 32[esp]
@@ -443,13 +450,13 @@ $L006common_tail:
mov ecx,ebx
xor edx,edx
ALIGN 16
-$L015sub:
+$L016sub:
sbb eax,DWORD PTR [edx*4+ebp]
mov DWORD PTR [edx*4+edi],eax
dec ecx
mov eax,DWORD PTR 4[edx*4+esi]
lea edx,DWORD PTR 1[edx]
- jge $L015sub
+ jge $L016sub
sbb eax,0
and esi,eax
not eax
@@ -457,12 +464,12 @@ $L015sub:
and ebp,eax
or esi,ebp
ALIGN 16
-$L016copy:
+$L017copy:
mov eax,DWORD PTR [ebx*4+esi]
mov DWORD PTR [ebx*4+edi],eax
mov DWORD PTR 32[ebx*4+esp],ecx
dec ebx
- jge $L016copy
+ jge $L017copy
mov esp,DWORD PTR 24[esp]
mov eax,1
$L000just_leave: