diff options
author | Sam Roberts <vieuxtech@gmail.com> | 2018-11-22 11:47:07 -0800 |
---|---|---|
committer | Sam Roberts <vieuxtech@gmail.com> | 2019-01-22 13:33:54 -0800 |
commit | 807ed7883a12423270450776f015a7c2348c0913 (patch) | |
tree | 00ec21dd290b29c782680ffc2f97e6d59fd2ab2f /deps/openssl/config/archs/VC-WIN64A/asm/crypto/ec | |
parent | 57119fbdb200702d6e2cf23428de4c458ae86bbc (diff) | |
download | android-node-v8-807ed7883a12423270450776f015a7c2348c0913.tar.gz android-node-v8-807ed7883a12423270450776f015a7c2348c0913.tar.bz2 android-node-v8-807ed7883a12423270450776f015a7c2348c0913.zip |
deps: update archs files for OpenSSL-1.1.1a
`cd deps/openssl/config; make` updates all archs dependant files.
PR-URL: https://github.com/nodejs/node/pull/25381
Reviewed-By: Daniel Bevenius <daniel.bevenius@gmail.com>
Reviewed-By: Shigeki Ohtsu <ohtsu@ohtsu.org>
Diffstat (limited to 'deps/openssl/config/archs/VC-WIN64A/asm/crypto/ec')
-rw-r--r-- | deps/openssl/config/archs/VC-WIN64A/asm/crypto/ec/ecp_nistz256-x86_64.asm | 1837 | ||||
-rw-r--r-- | deps/openssl/config/archs/VC-WIN64A/asm/crypto/ec/x25519-x86_64.asm | 1054 |
2 files changed, 2819 insertions, 72 deletions
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/ec/ecp_nistz256-x86_64.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/ec/ecp_nistz256-x86_64.asm index e0c40d6ec4..9ef88ef1c8 100644 --- a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/ec/ecp_nistz256-x86_64.asm +++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/ec/ecp_nistz256-x86_64.asm @@ -2399,6 +2399,12 @@ $L$Three: $L$ONE_mont: DQ 0x0000000000000001,0xffffffff00000000,0xffffffffffffffff,0x00000000fffffffe + +$L$ord: + DQ 0xf3b9cac2fc632551,0xbce6faada7179e84,0xffffffffffffffff,0xffffffff00000000 +$L$ordK: + DQ 0xccd1c8aaee00bc4f + global ecp_nistz256_mul_by_2 ALIGN 64 @@ -2411,9 +2417,13 @@ $L$SEH_begin_ecp_nistz256_mul_by_2: mov rsi,rdx + push r12 + push r13 +$L$mul_by_2_body: + mov r8,QWORD[rsi] xor r13,r13 mov r9,QWORD[8+rsi] @@ -2445,11 +2455,17 @@ $L$SEH_begin_ecp_nistz256_mul_by_2: mov QWORD[16+rdi],r10 mov QWORD[24+rdi],r11 - pop r13 - pop r12 + mov r13,QWORD[rsp] + + mov r12,QWORD[8+rsp] + + lea rsp,[16+rsp] + +$L$mul_by_2_epilogue: mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret + $L$SEH_end_ecp_nistz256_mul_by_2: @@ -2466,9 +2482,13 @@ $L$SEH_begin_ecp_nistz256_div_by_2: mov rsi,rdx + push r12 + push r13 +$L$div_by_2_body: + mov r8,QWORD[rsi] mov r9,QWORD[8+rsi] mov r10,QWORD[16+rsi] @@ -2515,11 +2535,17 @@ $L$SEH_begin_ecp_nistz256_div_by_2: mov QWORD[16+rdi],r10 mov QWORD[24+rdi],r11 - pop r13 - pop r12 + mov r13,QWORD[rsp] + + mov r12,QWORD[8+rsp] + + lea rsp,[16+rsp] + +$L$div_by_2_epilogue: mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret + $L$SEH_end_ecp_nistz256_div_by_2: @@ -2536,9 +2562,13 @@ $L$SEH_begin_ecp_nistz256_mul_by_3: mov rsi,rdx + push r12 + push r13 +$L$mul_by_3_body: + mov r8,QWORD[rsi] xor r13,r13 mov r9,QWORD[8+rsi] @@ -2591,11 +2621,17 @@ $L$SEH_begin_ecp_nistz256_mul_by_3: mov QWORD[16+rdi],r10 mov QWORD[24+rdi],r11 - pop r13 - pop r12 + mov r13,QWORD[rsp] + + mov r12,QWORD[8+rsp] + + lea rsp,[16+rsp] + +$L$mul_by_3_epilogue: mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret + $L$SEH_end_ecp_nistz256_mul_by_3: @@ -2613,9 +2649,13 @@ $L$SEH_begin_ecp_nistz256_add: mov rdx,r8 + push r12 + push r13 +$L$add_body: + mov r8,QWORD[rsi] xor r13,r13 mov r9,QWORD[8+rsi] @@ -2648,11 +2688,17 @@ $L$SEH_begin_ecp_nistz256_add: mov QWORD[16+rdi],r10 mov QWORD[24+rdi],r11 - pop r13 - pop r12 + mov r13,QWORD[rsp] + + mov r12,QWORD[8+rsp] + + lea rsp,[16+rsp] + +$L$add_epilogue: mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret + $L$SEH_end_ecp_nistz256_add: @@ -2670,9 +2716,13 @@ $L$SEH_begin_ecp_nistz256_sub: mov rdx,r8 + push r12 + push r13 +$L$sub_body: + mov r8,QWORD[rsi] xor r13,r13 mov r9,QWORD[8+rsi] @@ -2705,11 +2755,17 @@ $L$SEH_begin_ecp_nistz256_sub: mov QWORD[16+rdi],r10 mov QWORD[24+rdi],r11 - pop r13 - pop r12 + mov r13,QWORD[rsp] + + mov r12,QWORD[8+rsp] + + lea rsp,[16+rsp] + +$L$sub_epilogue: mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret + $L$SEH_end_ecp_nistz256_sub: @@ -2726,9 +2782,13 @@ $L$SEH_begin_ecp_nistz256_neg: mov rsi,rdx + push r12 + push r13 +$L$neg_body: + xor r8,r8 xor r9,r9 xor r10,r10 @@ -2761,16 +2821,1131 @@ $L$SEH_begin_ecp_nistz256_neg: mov QWORD[16+rdi],r10 mov QWORD[24+rdi],r11 - pop r13 - pop r12 + mov r13,QWORD[rsp] + + mov r12,QWORD[8+rsp] + + lea rsp,[16+rsp] + +$L$neg_epilogue: mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret + $L$SEH_end_ecp_nistz256_neg: + + +global ecp_nistz256_ord_mul_mont + +ALIGN 32 +ecp_nistz256_ord_mul_mont: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_ord_mul_mont: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + + mov ecx,0x80100 + and ecx,DWORD[((OPENSSL_ia32cap_P+8))] + cmp ecx,0x80100 + je NEAR $L$ecp_nistz256_ord_mul_montx + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + +$L$ord_mul_body: + + mov rax,QWORD[rdx] + mov rbx,rdx + lea r14,[$L$ord] + mov r15,QWORD[$L$ordK] + + + mov rcx,rax + mul QWORD[rsi] + mov r8,rax + mov rax,rcx + mov r9,rdx + + mul QWORD[8+rsi] + add r9,rax + mov rax,rcx + adc rdx,0 + mov r10,rdx + + mul QWORD[16+rsi] + add r10,rax + mov rax,rcx + adc rdx,0 + + mov r13,r8 + imul r8,r15 + + mov r11,rdx + mul QWORD[24+rsi] + add r11,rax + mov rax,r8 + adc rdx,0 + mov r12,rdx + + + mul QWORD[r14] + mov rbp,r8 + add r13,rax + mov rax,r8 + adc rdx,0 + mov rcx,rdx + + sub r10,r8 + sbb r8,0 + + mul QWORD[8+r14] + add r9,rcx + adc rdx,0 + add r9,rax + mov rax,rbp + adc r10,rdx + mov rdx,rbp + adc r8,0 + + shl rax,32 + shr rdx,32 + sub r11,rax + mov rax,QWORD[8+rbx] + sbb rbp,rdx + + add r11,r8 + adc r12,rbp + adc r13,0 + + + mov rcx,rax + mul QWORD[rsi] + add r9,rax + mov rax,rcx + adc rdx,0 + mov rbp,rdx + + mul QWORD[8+rsi] + add r10,rbp + adc rdx,0 + add r10,rax + mov rax,rcx + adc rdx,0 + mov rbp,rdx + + mul QWORD[16+rsi] + add r11,rbp + adc rdx,0 + add r11,rax + mov rax,rcx + adc rdx,0 + + mov rcx,r9 + imul r9,r15 + + mov rbp,rdx + mul QWORD[24+rsi] + add r12,rbp + adc rdx,0 + xor r8,r8 + add r12,rax + mov rax,r9 + adc r13,rdx + adc r8,0 + + + mul QWORD[r14] + mov rbp,r9 + add rcx,rax + mov rax,r9 + adc rcx,rdx + + sub r11,r9 + sbb r9,0 + + mul QWORD[8+r14] + add r10,rcx + adc rdx,0 + add r10,rax + mov rax,rbp + adc r11,rdx + mov rdx,rbp + adc r9,0 + + shl rax,32 + shr rdx,32 + sub r12,rax + mov rax,QWORD[16+rbx] + sbb rbp,rdx + + add r12,r9 + adc r13,rbp + adc r8,0 + + + mov rcx,rax + mul QWORD[rsi] + add r10,rax + mov rax,rcx + adc rdx,0 + mov rbp,rdx + + mul QWORD[8+rsi] + add r11,rbp + adc rdx,0 + add r11,rax + mov rax,rcx + adc rdx,0 + mov rbp,rdx + + mul QWORD[16+rsi] + add r12,rbp + adc rdx,0 + add r12,rax + mov rax,rcx + adc rdx,0 + + mov rcx,r10 + imul r10,r15 + + mov rbp,rdx + mul QWORD[24+rsi] + add r13,rbp + adc rdx,0 + xor r9,r9 + add r13,rax + mov rax,r10 + adc r8,rdx + adc r9,0 + + + mul QWORD[r14] + mov rbp,r10 + add rcx,rax + mov rax,r10 + adc rcx,rdx + + sub r12,r10 + sbb r10,0 + + mul QWORD[8+r14] + add r11,rcx + adc rdx,0 + add r11,rax + mov rax,rbp + adc r12,rdx + mov rdx,rbp + adc r10,0 + + shl rax,32 + shr rdx,32 + sub r13,rax + mov rax,QWORD[24+rbx] + sbb rbp,rdx + + add r13,r10 + adc r8,rbp + adc r9,0 + + + mov rcx,rax + mul QWORD[rsi] + add r11,rax + mov rax,rcx + adc rdx,0 + mov rbp,rdx + + mul QWORD[8+rsi] + add r12,rbp + adc rdx,0 + add r12,rax + mov rax,rcx + adc rdx,0 + mov rbp,rdx + + mul QWORD[16+rsi] + add r13,rbp + adc rdx,0 + add r13,rax + mov rax,rcx + adc rdx,0 + + mov rcx,r11 + imul r11,r15 + + mov rbp,rdx + mul QWORD[24+rsi] + add r8,rbp + adc rdx,0 + xor r10,r10 + add r8,rax + mov rax,r11 + adc r9,rdx + adc r10,0 + + + mul QWORD[r14] + mov rbp,r11 + add rcx,rax + mov rax,r11 + adc rcx,rdx + + sub r13,r11 + sbb r11,0 + + mul QWORD[8+r14] + add r12,rcx + adc rdx,0 + add r12,rax + mov rax,rbp + adc r13,rdx + mov rdx,rbp + adc r11,0 + + shl rax,32 + shr rdx,32 + sub r8,rax + sbb rbp,rdx + + add r8,r11 + adc r9,rbp + adc r10,0 + + + mov rsi,r12 + sub r12,QWORD[r14] + mov r11,r13 + sbb r13,QWORD[8+r14] + mov rcx,r8 + sbb r8,QWORD[16+r14] + mov rbp,r9 + sbb r9,QWORD[24+r14] + sbb r10,0 + + cmovc r12,rsi + cmovc r13,r11 + cmovc r8,rcx + cmovc r9,rbp + + mov QWORD[rdi],r12 + mov QWORD[8+rdi],r13 + mov QWORD[16+rdi],r8 + mov QWORD[24+rdi],r9 + + mov r15,QWORD[rsp] + + mov r14,QWORD[8+rsp] + + mov r13,QWORD[16+rsp] + + mov r12,QWORD[24+rsp] + + mov rbx,QWORD[32+rsp] + + mov rbp,QWORD[40+rsp] + + lea rsp,[48+rsp] + +$L$ord_mul_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_ecp_nistz256_ord_mul_mont: + + + + + + + +global ecp_nistz256_ord_sqr_mont + +ALIGN 32 +ecp_nistz256_ord_sqr_mont: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_ord_sqr_mont: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + + mov ecx,0x80100 + and ecx,DWORD[((OPENSSL_ia32cap_P+8))] + cmp ecx,0x80100 + je NEAR $L$ecp_nistz256_ord_sqr_montx + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + +$L$ord_sqr_body: + + mov r8,QWORD[rsi] + mov rax,QWORD[8+rsi] + mov r14,QWORD[16+rsi] + mov r15,QWORD[24+rsi] + lea rsi,[$L$ord] + mov rbx,rdx + jmp NEAR $L$oop_ord_sqr + +ALIGN 32 +$L$oop_ord_sqr: + + mov rbp,rax + mul r8 + mov r9,rax +DB 102,72,15,110,205 + mov rax,r14 + mov r10,rdx + + mul r8 + add r10,rax + mov rax,r15 +DB 102,73,15,110,214 + adc rdx,0 + mov r11,rdx + + mul r8 + add r11,rax + mov rax,r15 +DB 102,73,15,110,223 + adc rdx,0 + mov r12,rdx + + + mul r14 + mov r13,rax + mov rax,r14 + mov r14,rdx + + + mul rbp + add r11,rax + mov rax,r15 + adc rdx,0 + mov r15,rdx + + mul rbp + add r12,rax + adc rdx,0 + + add r12,r15 + adc r13,rdx + adc r14,0 + + + xor r15,r15 + mov rax,r8 + add r9,r9 + adc r10,r10 + adc r11,r11 + adc r12,r12 + adc r13,r13 + adc r14,r14 + adc r15,0 + + + mul rax + mov r8,rax +DB 102,72,15,126,200 + mov rbp,rdx + + mul rax + add r9,rbp + adc r10,rax +DB 102,72,15,126,208 + adc rdx,0 + mov rbp,rdx + + mul rax + add r11,rbp + adc r12,rax +DB 102,72,15,126,216 + adc rdx,0 + mov rbp,rdx + + mov rcx,r8 + imul r8,QWORD[32+rsi] + + mul rax + add r13,rbp + adc r14,rax + mov rax,QWORD[rsi] + adc r15,rdx + + + mul r8 + mov rbp,r8 + add rcx,rax + mov rax,QWORD[8+rsi] + adc rcx,rdx + + sub r10,r8 + sbb rbp,0 + + mul r8 + add r9,rcx + adc rdx,0 + add r9,rax + mov rax,r8 + adc r10,rdx + mov rdx,r8 + adc rbp,0 + + mov rcx,r9 + imul r9,QWORD[32+rsi] + + shl rax,32 + shr rdx,32 + sub r11,rax + mov rax,QWORD[rsi] + sbb r8,rdx + + add r11,rbp + adc r8,0 + + + mul r9 + mov rbp,r9 + add rcx,rax + mov rax,QWORD[8+rsi] + adc rcx,rdx + + sub r11,r9 + sbb rbp,0 + + mul r9 + add r10,rcx + adc rdx,0 + add r10,rax + mov rax,r9 + adc r11,rdx + mov rdx,r9 + adc rbp,0 + + mov rcx,r10 + imul r10,QWORD[32+rsi] + + shl rax,32 + shr rdx,32 + sub r8,rax + mov rax,QWORD[rsi] + sbb r9,rdx + + add r8,rbp + adc r9,0 + + + mul r10 + mov rbp,r10 + add rcx,rax + mov rax,QWORD[8+rsi] + adc rcx,rdx + + sub r8,r10 + sbb rbp,0 + + mul r10 + add r11,rcx + adc rdx,0 + add r11,rax + mov rax,r10 + adc r8,rdx + mov rdx,r10 + adc rbp,0 + + mov rcx,r11 + imul r11,QWORD[32+rsi] + + shl rax,32 + shr rdx,32 + sub r9,rax + mov rax,QWORD[rsi] + sbb r10,rdx + + add r9,rbp + adc r10,0 + + + mul r11 + mov rbp,r11 + add rcx,rax + mov rax,QWORD[8+rsi] + adc rcx,rdx + + sub r9,r11 + sbb rbp,0 + + mul r11 + add r8,rcx + adc rdx,0 + add r8,rax + mov rax,r11 + adc r9,rdx + mov rdx,r11 + adc rbp,0 + + shl rax,32 + shr rdx,32 + sub r10,rax + sbb r11,rdx + + add r10,rbp + adc r11,0 + + + xor rdx,rdx + add r8,r12 + adc r9,r13 + mov r12,r8 + adc r10,r14 + adc r11,r15 + mov rax,r9 + adc rdx,0 + + + sub r8,QWORD[rsi] + mov r14,r10 + sbb r9,QWORD[8+rsi] + sbb r10,QWORD[16+rsi] + mov r15,r11 + sbb r11,QWORD[24+rsi] + sbb rdx,0 + + cmovc r8,r12 + cmovnc rax,r9 + cmovnc r14,r10 + cmovnc r15,r11 + + dec rbx + jnz NEAR $L$oop_ord_sqr + + mov QWORD[rdi],r8 + mov QWORD[8+rdi],rax + pxor xmm1,xmm1 + mov QWORD[16+rdi],r14 + pxor xmm2,xmm2 + mov QWORD[24+rdi],r15 + pxor xmm3,xmm3 + + mov r15,QWORD[rsp] + + mov r14,QWORD[8+rsp] + + mov r13,QWORD[16+rsp] + + mov r12,QWORD[24+rsp] + + mov rbx,QWORD[32+rsp] + + mov rbp,QWORD[40+rsp] + + lea rsp,[48+rsp] + +$L$ord_sqr_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_ecp_nistz256_ord_sqr_mont: + + +ALIGN 32 +ecp_nistz256_ord_mul_montx: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_ord_mul_montx: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +$L$ecp_nistz256_ord_mul_montx: + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + +$L$ord_mulx_body: + + mov rbx,rdx + mov rdx,QWORD[rdx] + mov r9,QWORD[rsi] + mov r10,QWORD[8+rsi] + mov r11,QWORD[16+rsi] + mov r12,QWORD[24+rsi] + lea rsi,[((-128))+rsi] + lea r14,[(($L$ord-128))] + mov r15,QWORD[$L$ordK] + + + mulx r9,r8,r9 + mulx r10,rcx,r10 + mulx r11,rbp,r11 + add r9,rcx + mulx r12,rcx,r12 + mov rdx,r8 + mulx rax,rdx,r15 + adc r10,rbp + adc r11,rcx + adc r12,0 + + + xor r13,r13 + mulx rbp,rcx,QWORD[((0+128))+r14] + adcx r8,rcx + adox r9,rbp + + mulx rbp,rcx,QWORD[((8+128))+r14] + adcx r9,rcx + adox r10,rbp + + mulx rbp,rcx,QWORD[((16+128))+r14] + adcx r10,rcx + adox r11,rbp + + mulx rbp,rcx,QWORD[((24+128))+r14] + mov rdx,QWORD[8+rbx] + adcx r11,rcx + adox r12,rbp + adcx r12,r8 + adox r13,r8 + adc r13,0 + + + mulx rbp,rcx,QWORD[((0+128))+rsi] + adcx r9,rcx + adox r10,rbp + + mulx rbp,rcx,QWORD[((8+128))+rsi] + adcx r10,rcx + adox r11,rbp + + mulx rbp,rcx,QWORD[((16+128))+rsi] + adcx r11,rcx + adox r12,rbp + + mulx rbp,rcx,QWORD[((24+128))+rsi] + mov rdx,r9 + mulx rax,rdx,r15 + adcx r12,rcx + adox r13,rbp + + adcx r13,r8 + adox r8,r8 + adc r8,0 + + + mulx rbp,rcx,QWORD[((0+128))+r14] + adcx r9,rcx + adox r10,rbp + + mulx rbp,rcx,QWORD[((8+128))+r14] + adcx r10,rcx + adox r11,rbp + + mulx rbp,rcx,QWORD[((16+128))+r14] + adcx r11,rcx + adox r12,rbp + + mulx rbp,rcx,QWORD[((24+128))+r14] + mov rdx,QWORD[16+rbx] + adcx r12,rcx + adox r13,rbp + adcx r13,r9 + adox r8,r9 + adc r8,0 + + + mulx rbp,rcx,QWORD[((0+128))+rsi] + adcx r10,rcx + adox r11,rbp + + mulx rbp,rcx,QWORD[((8+128))+rsi] + adcx r11,rcx + adox r12,rbp + + mulx rbp,rcx,QWORD[((16+128))+rsi] + adcx r12,rcx + adox r13,rbp + + mulx rbp,rcx,QWORD[((24+128))+rsi] + mov rdx,r10 + mulx rax,rdx,r15 + adcx r13,rcx + adox r8,rbp + + adcx r8,r9 + adox r9,r9 + adc r9,0 + + + mulx rbp,rcx,QWORD[((0+128))+r14] + adcx r10,rcx + adox r11,rbp + + mulx rbp,rcx,QWORD[((8+128))+r14] + adcx r11,rcx + adox r12,rbp + + mulx rbp,rcx,QWORD[((16+128))+r14] + adcx r12,rcx + adox r13,rbp + + mulx rbp,rcx,QWORD[((24+128))+r14] + mov rdx,QWORD[24+rbx] + adcx r13,rcx + adox r8,rbp + adcx r8,r10 + adox r9,r10 + adc r9,0 + + + mulx rbp,rcx,QWORD[((0+128))+rsi] + adcx r11,rcx + adox r12,rbp + + mulx rbp,rcx,QWORD[((8+128))+rsi] + adcx r12,rcx + adox r13,rbp + + mulx rbp,rcx,QWORD[((16+128))+rsi] + adcx r13,rcx + adox r8,rbp + + mulx rbp,rcx,QWORD[((24+128))+rsi] + mov rdx,r11 + mulx rax,rdx,r15 + adcx r8,rcx + adox r9,rbp + + adcx r9,r10 + adox r10,r10 + adc r10,0 + + + mulx rbp,rcx,QWORD[((0+128))+r14] + adcx r11,rcx + adox r12,rbp + + mulx rbp,rcx,QWORD[((8+128))+r14] + adcx r12,rcx + adox r13,rbp + + mulx rbp,rcx,QWORD[((16+128))+r14] + adcx r13,rcx + adox r8,rbp + + mulx rbp,rcx,QWORD[((24+128))+r14] + lea r14,[128+r14] + mov rbx,r12 + adcx r8,rcx + adox r9,rbp + mov rdx,r13 + adcx r9,r11 + adox r10,r11 + adc r10,0 + + + + mov rcx,r8 + sub r12,QWORD[r14] + sbb r13,QWORD[8+r14] + sbb r8,QWORD[16+r14] + mov rbp,r9 + sbb r9,QWORD[24+r14] + sbb r10,0 + + cmovc r12,rbx + cmovc r13,rdx + cmovc r8,rcx + cmovc r9,rbp + + mov QWORD[rdi],r12 + mov QWORD[8+rdi],r13 + mov QWORD[16+rdi],r8 + mov QWORD[24+rdi],r9 + + mov r15,QWORD[rsp] + + mov r14,QWORD[8+rsp] + + mov r13,QWORD[16+rsp] + + mov r12,QWORD[24+rsp] + + mov rbx,QWORD[32+rsp] + + mov rbp,QWORD[40+rsp] + + lea rsp,[48+rsp] + +$L$ord_mulx_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_ecp_nistz256_ord_mul_montx: + + +ALIGN 32 +ecp_nistz256_ord_sqr_montx: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_ord_sqr_montx: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +$L$ecp_nistz256_ord_sqr_montx: + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + +$L$ord_sqrx_body: + + mov rbx,rdx + mov rdx,QWORD[rsi] + mov r14,QWORD[8+rsi] + mov r15,QWORD[16+rsi] + mov r8,QWORD[24+rsi] + lea rsi,[$L$ord] + jmp NEAR $L$oop_ord_sqrx + +ALIGN 32 +$L$oop_ord_sqrx: + mulx r10,r9,r14 + mulx r11,rcx,r15 + mov rax,rdx +DB 102,73,15,110,206 + mulx r12,rbp,r8 + mov rdx,r14 + add r10,rcx +DB 102,73,15,110,215 + adc r11,rbp + adc r12,0 + xor r13,r13 + + mulx rbp,rcx,r15 + adcx r11,rcx + adox r12,rbp + + mulx rbp,rcx,r8 + mov rdx,r15 + adcx r12,rcx + adox r13,rbp + adc r13,0 + + mulx r14,rcx,r8 + mov rdx,rax +DB 102,73,15,110,216 + xor r15,r15 + adcx r9,r9 + adox r13,rcx + adcx r10,r10 + adox r14,r15 + + + mulx rbp,r8,rdx +DB 102,72,15,126,202 + adcx r11,r11 + adox r9,rbp + adcx r12,r12 + mulx rax,rcx,rdx +DB 102,72,15,126,210 + adcx r13,r13 + adox r10,rcx + adcx r14,r14 + mulx rbp,rcx,rdx +DB 0x67 +DB 102,72,15,126,218 + adox r11,rax + adcx r15,r15 + adox r12,rcx + adox r13,rbp + mulx rax,rcx,rdx + adox r14,rcx + adox r15,rax + + + mov rdx,r8 + mulx rcx,rdx,QWORD[32+rsi] + + xor rax,rax + mulx rbp,rcx,QWORD[rsi] + adcx r8,rcx + adox r9,rbp + mulx rbp,rcx,QWORD[8+rsi] + adcx r9,rcx + adox r10,rbp + mulx rbp,rcx,QWORD[16+rsi] + adcx r10,rcx + adox r11,rbp + mulx rbp,rcx,QWORD[24+rsi] + adcx r11,rcx + adox r8,rbp + adcx r8,rax + + + mov rdx,r9 + mulx rcx,rdx,QWORD[32+rsi] + + mulx rbp,rcx,QWORD[rsi] + adox r9,rcx + adcx r10,rbp + mulx rbp,rcx,QWORD[8+rsi] + adox r10,rcx + adcx r11,rbp + mulx rbp,rcx,QWORD[16+rsi] + adox r11,rcx + adcx r8,rbp + mulx rbp,rcx,QWORD[24+rsi] + adox r8,rcx + adcx r9,rbp + adox r9,rax + + + mov rdx,r10 + mulx rcx,rdx,QWORD[32+rsi] + + mulx rbp,rcx,QWORD[rsi] + adcx r10,rcx + adox r11,rbp + mulx rbp,rcx,QWORD[8+rsi] + adcx r11,rcx + adox r8,rbp + mulx rbp,rcx,QWORD[16+rsi] + adcx r8,rcx + adox r9,rbp + mulx rbp,rcx,QWORD[24+rsi] + adcx r9,rcx + adox r10,rbp + adcx r10,rax + + + mov rdx,r11 + mulx rcx,rdx,QWORD[32+rsi] + + mulx rbp,rcx,QWORD[rsi] + adox r11,rcx + adcx r8,rbp + mulx rbp,rcx,QWORD[8+rsi] + adox r8,rcx + adcx r9,rbp + mulx rbp,rcx,QWORD[16+rsi] + adox r9,rcx + adcx r10,rbp + mulx rbp,rcx,QWORD[24+rsi] + adox r10,rcx + adcx r11,rbp + adox r11,rax + + + add r12,r8 + adc r9,r13 + mov rdx,r12 + adc r10,r14 + adc r11,r15 + mov r14,r9 + adc rax,0 + + + sub r12,QWORD[rsi] + mov r15,r10 + sbb r9,QWORD[8+rsi] + sbb r10,QWORD[16+rsi] + mov r8,r11 + sbb r11,QWORD[24+rsi] + sbb rax,0 + + cmovnc rdx,r12 + cmovnc r14,r9 + cmovnc r15,r10 + cmovnc r8,r11 + + dec rbx + jnz NEAR $L$oop_ord_sqrx + + mov QWORD[rdi],rdx + mov QWORD[8+rdi],r14 + pxor xmm1,xmm1 + mov QWORD[16+rdi],r15 + pxor xmm2,xmm2 + mov QWORD[24+rdi],r8 + pxor xmm3,xmm3 + + mov r15,QWORD[rsp] + + mov r14,QWORD[8+rsp] + + mov r13,QWORD[16+rsp] + + mov r12,QWORD[24+rsp] + + mov rbx,QWORD[32+rsp] + + mov rbp,QWORD[40+rsp] + + lea rsp,[48+rsp] + +$L$ord_sqrx_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_ecp_nistz256_ord_sqr_montx: + + + + global ecp_nistz256_to_mont ALIGN 32 @@ -2808,15 +3983,23 @@ $L$SEH_begin_ecp_nistz256_mul_mont: mov rdx,r8 + mov ecx,0x80100 and ecx,DWORD[((OPENSSL_ia32cap_P+8))] $L$mul_mont: push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + +$L$mul_body: cmp ecx,0x80100 je NEAR $L$mul_montx mov rbx,rdx @@ -2841,15 +4024,25 @@ $L$mul_montx: call __ecp_nistz256_mul_montx $L$mul_mont_done: - pop r15 - pop r14 - pop r13 - pop r12 - pop rbx - pop rbp + mov r15,QWORD[rsp] + + mov r14,QWORD[8+rsp] + + mov r13,QWORD[16+rsp] + + mov r12,QWORD[24+rsp] + + mov rbx,QWORD[32+rsp] + + mov rbp,QWORD[40+rsp] + + lea rsp,[48+rsp] + +$L$mul_epilogue: mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret + $L$SEH_end_ecp_nistz256_mul_mont: @@ -3087,14 +4280,22 @@ $L$SEH_begin_ecp_nistz256_sqr_mont: mov rsi,rdx + mov ecx,0x80100 and ecx,DWORD[((OPENSSL_ia32cap_P+8))] push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + +$L$sqr_body: cmp ecx,0x80100 je NEAR $L$sqr_montx mov rax,QWORD[rsi] @@ -3115,15 +4316,25 @@ $L$sqr_montx: call __ecp_nistz256_sqr_montx $L$sqr_mont_done: - pop r15 - pop r14 - pop r13 - pop r12 - pop rbx - pop rbp + mov r15,QWORD[rsp] + + mov r14,QWORD[8+rsp] + + mov r13,QWORD[16+rsp] + + mov r12,QWORD[24+rsp] + + mov rbx,QWORD[32+rsp] + + mov rbp,QWORD[40+rsp] + + lea rsp,[48+rsp] + +$L$sqr_epilogue: mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret + $L$SEH_end_ecp_nistz256_sqr_mont: @@ -3599,9 +4810,13 @@ $L$SEH_begin_ecp_nistz256_from_mont: mov rsi,rdx + push r12 + push r13 +$L$from_body: + mov rax,QWORD[rsi] mov r13,QWORD[(($L$poly+24))] mov r9,QWORD[8+rsi] @@ -3681,11 +4896,17 @@ $L$SEH_begin_ecp_nistz256_from_mont: mov QWORD[16+rdi],r10 mov QWORD[24+rdi],r11 - pop r13 - pop r12 + mov r13,QWORD[rsp] + + mov r12,QWORD[8+rsp] + + lea rsp,[16+rsp] + +$L$from_epilogue: mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret + $L$SEH_end_ecp_nistz256_from_mont: @@ -3794,8 +5015,8 @@ $L$select_loop_sse_w5: movaps xmm14,XMMWORD[128+rsp] movaps xmm15,XMMWORD[144+rsp] lea rsp,[168+rsp] -$L$SEH_end_ecp_nistz256_gather_w5: DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_gather_w5: @@ -3889,8 +5110,8 @@ $L$select_loop_sse_w7: movaps xmm14,XMMWORD[128+rsp] movaps xmm15,XMMWORD[144+rsp] lea rsp,[168+rsp] -$L$SEH_end_ecp_nistz256_gather_w7: DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_gather_w7: @@ -3900,6 +5121,7 @@ ecp_nistz256_avx2_gather_w5: $L$avx2_gather_w5: vzeroupper lea rax,[((-136))+rsp] + mov r11,rsp $L$SEH_begin_ecp_nistz256_avx2_gather_w5: DB 0x48,0x8d,0x60,0xe0 DB 0xc5,0xf8,0x29,0x70,0xe0 @@ -3973,9 +5195,9 @@ $L$select_loop_avx2_w5: movaps xmm13,XMMWORD[112+rsp] movaps xmm14,XMMWORD[128+rsp] movaps xmm15,XMMWORD[144+rsp] - lea rsp,[168+rsp] -$L$SEH_end_ecp_nistz256_avx2_gather_w5: + lea rsp,[r11] DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_avx2_gather_w5: @@ -3986,6 +5208,7 @@ ALIGN 32 ecp_nistz256_avx2_gather_w7: $L$avx2_gather_w7: vzeroupper + mov r11,rsp lea rax,[((-136))+rsp] $L$SEH_begin_ecp_nistz256_avx2_gather_w7: DB 0x48,0x8d,0x60,0xe0 @@ -4075,9 +5298,9 @@ $L$select_loop_avx2_w7: movaps xmm13,XMMWORD[112+rsp] movaps xmm14,XMMWORD[128+rsp] movaps xmm15,XMMWORD[144+rsp] - lea rsp,[168+rsp] -$L$SEH_end_ecp_nistz256_avx2_gather_w7: + lea rsp,[r11] DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_avx2_gather_w7: ALIGN 32 @@ -4212,18 +5435,27 @@ $L$SEH_begin_ecp_nistz256_point_double: mov rsi,rdx + mov ecx,0x80100 and ecx,DWORD[((OPENSSL_ia32cap_P+8))] cmp ecx,0x80100 je NEAR $L$point_doublex push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + sub rsp,32*5+8 +$L$point_doubleq_body: + $L$point_double_shortcutq: movdqu xmm0,XMMWORD[rsi] mov rbx,rsi @@ -4405,16 +5637,27 @@ DB 102,72,15,126,203 DB 102,72,15,126,207 call __ecp_nistz256_sub_fromq - add rsp,32*5+8 - pop r15 - pop r14 - pop r13 - pop r12 - pop rbx - pop rbp + lea rsi,[((160+56))+rsp] + + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbx,QWORD[((-16))+rsi] + + mov rbp,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$point_doubleq_epilogue: mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret + $L$SEH_end_ecp_nistz256_point_double: global ecp_nistz256_point_add @@ -4429,18 +5672,27 @@ $L$SEH_begin_ecp_nistz256_point_add: mov rdx,r8 + mov ecx,0x80100 and ecx,DWORD[((OPENSSL_ia32cap_P+8))] cmp ecx,0x80100 je NEAR $L$point_addx push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + sub rsp,32*18+8 +$L$point_addq_body: + movdqu xmm0,XMMWORD[rsi] movdqu xmm1,XMMWORD[16+rsi] movdqu xmm2,XMMWORD[32+rsi] @@ -4816,16 +6068,27 @@ DB 102,72,15,126,199 movdqu XMMWORD[48+rdi],xmm3 $L$add_doneq: - add rsp,32*18+8 - pop r15 - pop r14 - pop r13 - pop r12 - pop rbx - pop rbp + lea rsi,[((576+56))+rsp] + + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbx,QWORD[((-16))+rsi] + + mov rbp,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$point_addq_epilogue: mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret + $L$SEH_end_ecp_nistz256_point_add: global ecp_nistz256_point_add_affine @@ -4840,18 +6103,27 @@ $L$SEH_begin_ecp_nistz256_point_add_affine: mov rdx,r8 + mov ecx,0x80100 and ecx,DWORD[((OPENSSL_ia32cap_P+8))] cmp ecx,0x80100 je NEAR $L$point_add_affinex push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + sub rsp,32*15+8 +$L$add_affineq_body: + movdqu xmm0,XMMWORD[rsi] mov rbx,rdx movdqu xmm1,XMMWORD[16+rsi] @@ -5133,16 +6405,27 @@ DB 102,72,15,126,199 movdqu XMMWORD[32+rdi],xmm2 movdqu XMMWORD[48+rdi],xmm3 - add rsp,32*15+8 - pop r15 - pop r14 - pop r13 - pop r12 - pop rbx - pop rbp + lea rsi,[((480+56))+rsp] + + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbx,QWORD[((-16))+rsi] + + mov rbp,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$add_affineq_epilogue: mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret + $L$SEH_end_ecp_nistz256_point_add_affine: ALIGN 32 @@ -5282,15 +6565,24 @@ $L$SEH_begin_ecp_nistz256_point_doublex: mov rsi,rdx + $L$point_doublex: push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + sub rsp,32*5+8 +$L$point_doublex_body: + $L$point_double_shortcutx: movdqu xmm0,XMMWORD[rsi] mov rbx,rsi @@ -5472,16 +6764,27 @@ DB 102,72,15,126,203 DB 102,72,15,126,207 call __ecp_nistz256_sub_fromx - add rsp,32*5+8 - pop r15 - pop r14 - pop r13 - pop r12 - pop rbx - pop rbp + lea rsi,[((160+56))+rsp] + + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbx,QWORD[((-16))+rsi] + + mov rbp,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$point_doublex_epilogue: mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret + $L$SEH_end_ecp_nistz256_point_doublex: ALIGN 32 @@ -5495,15 +6798,24 @@ $L$SEH_begin_ecp_nistz256_point_addx: mov rdx,r8 + $L$point_addx: push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + sub rsp,32*18+8 +$L$point_addx_body: + movdqu xmm0,XMMWORD[rsi] movdqu xmm1,XMMWORD[16+rsi] movdqu xmm2,XMMWORD[32+rsi] @@ -5879,16 +7191,27 @@ DB 102,72,15,126,199 movdqu XMMWORD[48+rdi],xmm3 $L$add_donex: - add rsp,32*18+8 - pop r15 - pop r14 - pop r13 - pop r12 - pop rbx - pop rbp + lea rsi,[((576+56))+rsp] + + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbx,QWORD[((-16))+rsi] + + mov rbp,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$point_addx_epilogue: mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret + $L$SEH_end_ecp_nistz256_point_addx: ALIGN 32 @@ -5902,15 +7225,24 @@ $L$SEH_begin_ecp_nistz256_point_add_affinex: mov rdx,r8 + $L$point_add_affinex: push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + sub rsp,32*15+8 +$L$add_affinex_body: + movdqu xmm0,XMMWORD[rsi] mov rbx,rdx movdqu xmm1,XMMWORD[16+rsi] @@ -6192,14 +7524,375 @@ DB 102,72,15,126,199 movdqu XMMWORD[32+rdi],xmm2 movdqu XMMWORD[48+rdi],xmm3 - add rsp,32*15+8 + lea rsi,[((480+56))+rsp] + + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbx,QWORD[((-16))+rsi] + + mov rbp,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$add_affinex_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_ecp_nistz256_point_add_affinex: +EXTERN __imp_RtlVirtualUnwind + + +ALIGN 16 +short_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + lea rax,[16+rax] + + mov r12,QWORD[((-8))+rax] + mov r13,QWORD[((-16))+rax] + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + + jmp NEAR $L$common_seh_tail + + + +ALIGN 16 +full_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + mov r10d,DWORD[8+r11] + lea rax,[r10*1+rax] + + mov rbp,QWORD[((-8))+rax] + mov rbx,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r13,QWORD[((-32))+rax] + mov r14,QWORD[((-40))+rax] + mov r15,QWORD[((-48))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + mov QWORD[240+r8],r15 + +$L$common_seh_tail: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq pop r15 pop r14 pop r13 pop r12 - pop rbx pop rbp - mov rdi,QWORD[8+rsp] ;WIN64 epilogue - mov rsi,QWORD[16+rsp] + pop rbx + pop rdi + pop rsi DB 0F3h,0C3h ;repret -$L$SEH_end_ecp_nistz256_point_add_affinex: + + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_ecp_nistz256_mul_by_2 wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_mul_by_2 wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_mul_by_2 wrt ..imagebase + + DD $L$SEH_begin_ecp_nistz256_div_by_2 wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_div_by_2 wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_div_by_2 wrt ..imagebase + + DD $L$SEH_begin_ecp_nistz256_mul_by_3 wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_mul_by_3 wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_mul_by_3 wrt ..imagebase + + DD $L$SEH_begin_ecp_nistz256_add wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_add wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_add wrt ..imagebase + + DD $L$SEH_begin_ecp_nistz256_sub wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_sub wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_sub wrt ..imagebase + + DD $L$SEH_begin_ecp_nistz256_neg wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_neg wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_neg wrt ..imagebase + + DD $L$SEH_begin_ecp_nistz256_ord_mul_mont wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_ord_mul_mont wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_ord_mul_mont wrt ..imagebase + + DD $L$SEH_begin_ecp_nistz256_ord_sqr_mont wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_ord_sqr_mont wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_ord_sqr_mont wrt ..imagebase + DD $L$SEH_begin_ecp_nistz256_ord_mul_montx wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_ord_mul_montx wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_ord_mul_montx wrt ..imagebase + + DD $L$SEH_begin_ecp_nistz256_ord_sqr_montx wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_ord_sqr_montx wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_ord_sqr_montx wrt ..imagebase + DD $L$SEH_begin_ecp_nistz256_to_mont wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_to_mont wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_to_mont wrt ..imagebase + + DD $L$SEH_begin_ecp_nistz256_mul_mont wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_mul_mont wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_mul_mont wrt ..imagebase + + DD $L$SEH_begin_ecp_nistz256_sqr_mont wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_sqr_mont wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_sqr_mont wrt ..imagebase + + DD $L$SEH_begin_ecp_nistz256_from_mont wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_from_mont wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_from_mont wrt ..imagebase + + DD $L$SEH_begin_ecp_nistz256_gather_w5 wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_gather_w5 wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_gather_wX wrt ..imagebase + + DD $L$SEH_begin_ecp_nistz256_gather_w7 wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_gather_w7 wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_gather_wX wrt ..imagebase + DD $L$SEH_begin_ecp_nistz256_avx2_gather_w5 wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_avx2_gather_w5 wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_avx2_gather_wX wrt ..imagebase + + DD $L$SEH_begin_ecp_nistz256_avx2_gather_w7 wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_avx2_gather_w7 wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_avx2_gather_wX wrt ..imagebase + DD $L$SEH_begin_ecp_nistz256_point_double wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_point_double wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_point_double wrt ..imagebase + + DD $L$SEH_begin_ecp_nistz256_point_add wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_point_add wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_point_add wrt ..imagebase + + DD $L$SEH_begin_ecp_nistz256_point_add_affine wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_point_add_affine wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_point_add_affine wrt ..imagebase + DD $L$SEH_begin_ecp_nistz256_point_doublex wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_point_doublex wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_point_doublex wrt ..imagebase + + DD $L$SEH_begin_ecp_nistz256_point_addx wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_point_addx wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_point_addx wrt ..imagebase + + DD $L$SEH_begin_ecp_nistz256_point_add_affinex wrt ..imagebase + DD $L$SEH_end_ecp_nistz256_point_add_affinex wrt ..imagebase + DD $L$SEH_info_ecp_nistz256_point_add_affinex wrt ..imagebase + +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_ecp_nistz256_mul_by_2: +DB 9,0,0,0 + DD short_handler wrt ..imagebase + DD $L$mul_by_2_body wrt ..imagebase,$L$mul_by_2_epilogue wrt ..imagebase +$L$SEH_info_ecp_nistz256_div_by_2: +DB 9,0,0,0 + DD short_handler wrt ..imagebase + DD $L$div_by_2_body wrt ..imagebase,$L$div_by_2_epilogue wrt ..imagebase +$L$SEH_info_ecp_nistz256_mul_by_3: +DB 9,0,0,0 + DD short_handler wrt ..imagebase + DD $L$mul_by_3_body wrt ..imagebase,$L$mul_by_3_epilogue wrt ..imagebase +$L$SEH_info_ecp_nistz256_add: +DB 9,0,0,0 + DD short_handler wrt ..imagebase + DD $L$add_body wrt ..imagebase,$L$add_epilogue wrt ..imagebase +$L$SEH_info_ecp_nistz256_sub: +DB 9,0,0,0 + DD short_handler wrt ..imagebase + DD $L$sub_body wrt ..imagebase,$L$sub_epilogue wrt ..imagebase +$L$SEH_info_ecp_nistz256_neg: +DB 9,0,0,0 + DD short_handler wrt ..imagebase + DD $L$neg_body wrt ..imagebase,$L$neg_epilogue wrt ..imagebase +$L$SEH_info_ecp_nistz256_ord_mul_mont: +DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$ord_mul_body wrt ..imagebase,$L$ord_mul_epilogue wrt ..imagebase + DD 48,0 +$L$SEH_info_ecp_nistz256_ord_sqr_mont: +DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$ord_sqr_body wrt ..imagebase,$L$ord_sqr_epilogue wrt ..imagebase + DD 48,0 +$L$SEH_info_ecp_nistz256_ord_mul_montx: +DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$ord_mulx_body wrt ..imagebase,$L$ord_mulx_epilogue wrt ..imagebase + DD 48,0 +$L$SEH_info_ecp_nistz256_ord_sqr_montx: +DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$ord_sqrx_body wrt ..imagebase,$L$ord_sqrx_epilogue wrt ..imagebase + DD 48,0 +$L$SEH_info_ecp_nistz256_to_mont: +DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase + DD 48,0 +$L$SEH_info_ecp_nistz256_mul_mont: +DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase + DD 48,0 +$L$SEH_info_ecp_nistz256_sqr_mont: +DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$sqr_body wrt ..imagebase,$L$sqr_epilogue wrt ..imagebase + DD 48,0 +$L$SEH_info_ecp_nistz256_from_mont: +DB 9,0,0,0 + DD short_handler wrt ..imagebase + DD $L$from_body wrt ..imagebase,$L$from_epilogue wrt ..imagebase +$L$SEH_info_ecp_nistz256_gather_wX: +DB 0x01,0x33,0x16,0x00 +DB 0x33,0xf8,0x09,0x00 +DB 0x2e,0xe8,0x08,0x00 +DB 0x29,0xd8,0x07,0x00 +DB 0x24,0xc8,0x06,0x00 +DB 0x1f,0xb8,0x05,0x00 +DB 0x1a,0xa8,0x04,0x00 +DB 0x15,0x98,0x03,0x00 +DB 0x10,0x88,0x02,0x00 +DB 0x0c,0x78,0x01,0x00 +DB 0x08,0x68,0x00,0x00 +DB 0x04,0x01,0x15,0x00 +ALIGN 8 +$L$SEH_info_ecp_nistz256_avx2_gather_wX: +DB 0x01,0x36,0x17,0x0b +DB 0x36,0xf8,0x09,0x00 +DB 0x31,0xe8,0x08,0x00 +DB 0x2c,0xd8,0x07,0x00 +DB 0x27,0xc8,0x06,0x00 +DB 0x22,0xb8,0x05,0x00 +DB 0x1d,0xa8,0x04,0x00 +DB 0x18,0x98,0x03,0x00 +DB 0x13,0x88,0x02,0x00 +DB 0x0e,0x78,0x01,0x00 +DB 0x09,0x68,0x00,0x00 +DB 0x04,0x01,0x15,0x00 +DB 0x00,0xb3,0x00,0x00 +ALIGN 8 +$L$SEH_info_ecp_nistz256_point_double: +DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$point_doubleq_body wrt ..imagebase,$L$point_doubleq_epilogue wrt ..imagebase + DD 32*5+56,0 +$L$SEH_info_ecp_nistz256_point_add: +DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$point_addq_body wrt ..imagebase,$L$point_addq_epilogue wrt ..imagebase + DD 32*18+56,0 +$L$SEH_info_ecp_nistz256_point_add_affine: +DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$add_affineq_body wrt ..imagebase,$L$add_affineq_epilogue wrt ..imagebase + DD 32*15+56,0 +ALIGN 8 +$L$SEH_info_ecp_nistz256_point_doublex: +DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$point_doublex_body wrt ..imagebase,$L$point_doublex_epilogue wrt ..imagebase + DD 32*5+56,0 +$L$SEH_info_ecp_nistz256_point_addx: +DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$point_addx_body wrt ..imagebase,$L$point_addx_epilogue wrt ..imagebase + DD 32*18+56,0 +$L$SEH_info_ecp_nistz256_point_add_affinex: +DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$add_affinex_body wrt ..imagebase,$L$add_affinex_epilogue wrt ..imagebase + DD 32*15+56,0 diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/ec/x25519-x86_64.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/ec/x25519-x86_64.asm new file mode 100644 index 0000000000..84d55134ac --- /dev/null +++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/ec/x25519-x86_64.asm @@ -0,0 +1,1054 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + + +global x25519_fe51_mul + +ALIGN 32 +x25519_fe51_mul: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_x25519_fe51_mul: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + lea rsp,[((-40))+rsp] + +$L$fe51_mul_body: + + mov rax,QWORD[rsi] + mov r11,QWORD[rdx] + mov r12,QWORD[8+rdx] + mov r13,QWORD[16+rdx] + mov rbp,QWORD[24+rdx] + mov r14,QWORD[32+rdx] + + mov QWORD[32+rsp],rdi + mov rdi,rax + mul r11 + mov QWORD[rsp],r11 + mov rbx,rax + mov rax,rdi + mov rcx,rdx + mul r12 + mov QWORD[8+rsp],r12 + mov r8,rax + mov rax,rdi + lea r15,[r14*8+r14] + mov r9,rdx + mul r13 + mov QWORD[16+rsp],r13 + mov r10,rax + mov rax,rdi + lea rdi,[r15*2+r14] + mov r11,rdx + mul rbp + mov r12,rax + mov rax,QWORD[rsi] + mov r13,rdx + mul r14 + mov r14,rax + mov rax,QWORD[8+rsi] + mov r15,rdx + + mul rdi + add rbx,rax + mov rax,QWORD[16+rsi] + adc rcx,rdx + mul rdi + add r8,rax + mov rax,QWORD[24+rsi] + adc r9,rdx + mul rdi + add r10,rax + mov rax,QWORD[32+rsi] + adc r11,rdx + mul rdi + imul rdi,rbp,19 + add r12,rax + mov rax,QWORD[8+rsi] + adc r13,rdx + mul rbp + mov rbp,QWORD[16+rsp] + add r14,rax + mov rax,QWORD[16+rsi] + adc r15,rdx + + mul rdi + add rbx,rax + mov rax,QWORD[24+rsi] + adc rcx,rdx + mul rdi + add r8,rax + mov rax,QWORD[32+rsi] + adc r9,rdx + mul rdi + imul rdi,rbp,19 + add r10,rax + mov rax,QWORD[8+rsi] + adc r11,rdx + mul rbp + add r12,rax + mov rax,QWORD[16+rsi] + adc r13,rdx + mul rbp + mov rbp,QWORD[8+rsp] + add r14,rax + mov rax,QWORD[24+rsi] + adc r15,rdx + + mul rdi + add rbx,rax + mov rax,QWORD[32+rsi] + adc rcx,rdx + mul rdi + add r8,rax + mov rax,QWORD[8+rsi] + adc r9,rdx + mul rbp + imul rdi,rbp,19 + add r10,rax + mov rax,QWORD[16+rsi] + adc r11,rdx + mul rbp + add r12,rax + mov rax,QWORD[24+rsi] + adc r13,rdx + mul rbp + mov rbp,QWORD[rsp] + add r14,rax + mov rax,QWORD[32+rsi] + adc r15,rdx + + mul rdi + add rbx,rax + mov rax,QWORD[8+rsi] + adc rcx,rdx + mul rbp + add r8,rax + mov rax,QWORD[16+rsi] + adc r9,rdx + mul rbp + add r10,rax + mov rax,QWORD[24+rsi] + adc r11,rdx + mul rbp + add r12,rax + mov rax,QWORD[32+rsi] + adc r13,rdx + mul rbp + add r14,rax + adc r15,rdx + + mov rdi,QWORD[32+rsp] + jmp NEAR $L$reduce51 +$L$fe51_mul_epilogue: + +$L$SEH_end_x25519_fe51_mul: + +global x25519_fe51_sqr + +ALIGN 32 +x25519_fe51_sqr: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_x25519_fe51_sqr: + mov rdi,rcx + mov rsi,rdx + + + + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + lea rsp,[((-40))+rsp] + +$L$fe51_sqr_body: + + mov rax,QWORD[rsi] + mov r15,QWORD[16+rsi] + mov rbp,QWORD[32+rsi] + + mov QWORD[32+rsp],rdi + lea r14,[rax*1+rax] + mul rax + mov rbx,rax + mov rax,QWORD[8+rsi] + mov rcx,rdx + mul r14 + mov r8,rax + mov rax,r15 + mov QWORD[rsp],r15 + mov r9,rdx + mul r14 + mov r10,rax + mov rax,QWORD[24+rsi] + mov r11,rdx + imul rdi,rbp,19 + mul r14 + mov r12,rax + mov rax,rbp + mov r13,rdx + mul r14 + mov r14,rax + mov rax,rbp + mov r15,rdx + + mul rdi + add r12,rax + mov rax,QWORD[8+rsi] + adc r13,rdx + + mov rsi,QWORD[24+rsi] + lea rbp,[rax*1+rax] + mul rax + add r10,rax + mov rax,QWORD[rsp] + adc r11,rdx + mul rbp + add r12,rax + mov rax,rbp + adc r13,rdx + mul rsi + add r14,rax + mov rax,rbp + adc r15,rdx + imul rbp,rsi,19 + mul rdi + add rbx,rax + lea rax,[rsi*1+rsi] + adc rcx,rdx + + mul rdi + add r10,rax + mov rax,rsi + adc r11,rdx + mul rbp + add r8,rax + mov rax,QWORD[rsp] + adc r9,rdx + + lea rsi,[rax*1+rax] + mul rax + add r14,rax + mov rax,rbp + adc r15,rdx + mul rsi + add rbx,rax + mov rax,rsi + adc rcx,rdx + mul rdi + add r8,rax + adc r9,rdx + + mov rdi,QWORD[32+rsp] + jmp NEAR $L$reduce51 + +ALIGN 32 +$L$reduce51: + mov rbp,0x7ffffffffffff + + mov rdx,r10 + shr r10,51 + shl r11,13 + and rdx,rbp + or r11,r10 + add r12,r11 + adc r13,0 + + mov rax,rbx + shr rbx,51 + shl rcx,13 + and rax,rbp + or rcx,rbx + add r8,rcx + adc r9,0 + + mov rbx,r12 + shr r12,51 + shl r13,13 + and rbx,rbp + or r13,r12 + add r14,r13 + adc r15,0 + + mov rcx,r8 + shr r8,51 + shl r9,13 + and rcx,rbp + or r9,r8 + add rdx,r9 + + mov r10,r14 + shr r14,51 + shl r15,13 + and r10,rbp + or r15,r14 + + lea r14,[r15*8+r15] + lea r15,[r14*2+r15] + add rax,r15 + + mov r8,rdx + and rdx,rbp + shr r8,51 + add rbx,r8 + + mov r9,rax + and rax,rbp + shr r9,51 + add rcx,r9 + + mov QWORD[rdi],rax + mov QWORD[8+rdi],rcx + mov QWORD[16+rdi],rdx + mov QWORD[24+rdi],rbx + mov QWORD[32+rdi],r10 + + mov r15,QWORD[40+rsp] + + mov r14,QWORD[48+rsp] + + mov r13,QWORD[56+rsp] + + mov r12,QWORD[64+rsp] + + mov rbx,QWORD[72+rsp] + + mov rbp,QWORD[80+rsp] + + lea rsp,[88+rsp] + +$L$fe51_sqr_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_x25519_fe51_sqr: + +global x25519_fe51_mul121666 + +ALIGN 32 +x25519_fe51_mul121666: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_x25519_fe51_mul121666: + mov rdi,rcx + mov rsi,rdx + + + + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + lea rsp,[((-40))+rsp] + +$L$fe51_mul121666_body: + mov eax,121666 + + mul QWORD[rsi] + mov rbx,rax + mov eax,121666 + mov rcx,rdx + mul QWORD[8+rsi] + mov r8,rax + mov eax,121666 + mov r9,rdx + mul QWORD[16+rsi] + mov r10,rax + mov eax,121666 + mov r11,rdx + mul QWORD[24+rsi] + mov r12,rax + mov eax,121666 + mov r13,rdx + mul QWORD[32+rsi] + mov r14,rax + mov r15,rdx + + jmp NEAR $L$reduce51 +$L$fe51_mul121666_epilogue: + +$L$SEH_end_x25519_fe51_mul121666: +EXTERN OPENSSL_ia32cap_P +global x25519_fe64_eligible + +ALIGN 32 +x25519_fe64_eligible: + mov ecx,DWORD[((OPENSSL_ia32cap_P+8))] + xor eax,eax + and ecx,0x80100 + cmp ecx,0x80100 + cmove eax,ecx + DB 0F3h,0C3h ;repret + + +global x25519_fe64_mul + +ALIGN 32 +x25519_fe64_mul: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_x25519_fe64_mul: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + push rdi + + lea rsp,[((-16))+rsp] + +$L$fe64_mul_body: + + mov rax,rdx + mov rbp,QWORD[rdx] + mov rdx,QWORD[rsi] + mov rcx,QWORD[8+rax] + mov r14,QWORD[16+rax] + mov r15,QWORD[24+rax] + + mulx rax,r8,rbp + xor edi,edi + mulx rbx,r9,rcx + adcx r9,rax + mulx rax,r10,r14 + adcx r10,rbx + mulx r12,r11,r15 + mov rdx,QWORD[8+rsi] + adcx r11,rax + mov QWORD[rsp],r14 + adcx r12,rdi + + mulx rbx,rax,rbp + adox r9,rax + adcx r10,rbx + mulx rbx,rax,rcx + adox r10,rax + adcx r11,rbx + mulx rbx,rax,r14 + adox r11,rax + adcx r12,rbx + mulx r13,rax,r15 + mov rdx,QWORD[16+rsi] + adox r12,rax + adcx r13,rdi + adox r13,rdi + + mulx rbx,rax,rbp + adcx r10,rax + adox r11,rbx + mulx rbx,rax,rcx + adcx r11,rax + adox r12,rbx + mulx rbx,rax,r14 + adcx r12,rax + adox r13,rbx + mulx r14,rax,r15 + mov rdx,QWORD[24+rsi] + adcx r13,rax + adox r14,rdi + adcx r14,rdi + + mulx rbx,rax,rbp + adox r11,rax + adcx r12,rbx + mulx rbx,rax,rcx + adox r12,rax + adcx r13,rbx + mulx rbx,rax,QWORD[rsp] + adox r13,rax + adcx r14,rbx + mulx r15,rax,r15 + mov edx,38 + adox r14,rax + adcx r15,rdi + adox r15,rdi + + jmp NEAR $L$reduce64 +$L$fe64_mul_epilogue: + +$L$SEH_end_x25519_fe64_mul: + +global x25519_fe64_sqr + +ALIGN 32 +x25519_fe64_sqr: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_x25519_fe64_sqr: + mov rdi,rcx + mov rsi,rdx + + + + push rbp + + push rbx + + push r12 + + push r13 + + push r14 + + push r15 + + push rdi + + lea rsp,[((-16))+rsp] + +$L$fe64_sqr_body: + + mov rdx,QWORD[rsi] + mov rcx,QWORD[8+rsi] + mov rbp,QWORD[16+rsi] + mov rsi,QWORD[24+rsi] + + + mulx r15,r8,rdx + mulx rax,r9,rcx + xor edi,edi + mulx rbx,r10,rbp + adcx r10,rax + mulx r12,r11,rsi + mov rdx,rcx + adcx r11,rbx + adcx r12,rdi + + + mulx rbx,rax,rbp + adox r11,rax + adcx r12,rbx + mulx r13,rax,rsi + mov rdx,rbp + adox r12,rax + adcx r13,rdi + + + mulx r14,rax,rsi + mov rdx,rcx + adox r13,rax + adcx r14,rdi + adox r14,rdi + + adcx r9,r9 + adox r9,r15 + adcx r10,r10 + mulx rbx,rax,rdx + mov rdx,rbp + adcx r11,r11 + adox r10,rax + adcx r12,r12 + adox r11,rbx + mulx rbx,rax,rdx + mov rdx,rsi + adcx r13,r13 + adox r12,rax + adcx r14,r14 + adox r13,rbx + mulx r15,rax,rdx + mov edx,38 + adox r14,rax + adcx r15,rdi + adox r15,rdi + jmp NEAR $L$reduce64 + +ALIGN 32 +$L$reduce64: + mulx rbx,rax,r12 + adcx r8,rax + adox r9,rbx + mulx rbx,rax,r13 + adcx r9,rax + adox r10,rbx + mulx rbx,rax,r14 + adcx r10,rax + adox r11,rbx + mulx r12,rax,r15 + adcx r11,rax + adox r12,rdi + adcx r12,rdi + + mov rdi,QWORD[16+rsp] + imul r12,rdx + + add r8,r12 + adc r9,0 + adc r10,0 + adc r11,0 + + sbb rax,rax + and rax,38 + + add r8,rax + mov QWORD[8+rdi],r9 + mov QWORD[16+rdi],r10 + mov QWORD[24+rdi],r11 + mov QWORD[rdi],r8 + + mov r15,QWORD[24+rsp] + + mov r14,QWORD[32+rsp] + + mov r13,QWORD[40+rsp] + + mov r12,QWORD[48+rsp] + + mov rbx,QWORD[56+rsp] + + mov rbp,QWORD[64+rsp] + + lea rsp,[72+rsp] + +$L$fe64_sqr_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_x25519_fe64_sqr: + +global x25519_fe64_mul121666 + +ALIGN 32 +x25519_fe64_mul121666: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_x25519_fe64_mul121666: + mov rdi,rcx + mov rsi,rdx + + +$L$fe64_mul121666_body: + mov edx,121666 + mulx rcx,r8,QWORD[rsi] + mulx rax,r9,QWORD[8+rsi] + add r9,rcx + mulx rcx,r10,QWORD[16+rsi] + adc r10,rax + mulx rax,r11,QWORD[24+rsi] + adc r11,rcx + adc rax,0 + + imul rax,rax,38 + + add r8,rax + adc r9,0 + adc r10,0 + adc r11,0 + + sbb rax,rax + and rax,38 + + add r8,rax + mov QWORD[8+rdi],r9 + mov QWORD[16+rdi],r10 + mov QWORD[24+rdi],r11 + mov QWORD[rdi],r8 + +$L$fe64_mul121666_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_x25519_fe64_mul121666: + +global x25519_fe64_add + +ALIGN 32 +x25519_fe64_add: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_x25519_fe64_add: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +$L$fe64_add_body: + mov r8,QWORD[rsi] + mov r9,QWORD[8+rsi] + mov r10,QWORD[16+rsi] + mov r11,QWORD[24+rsi] + + add r8,QWORD[rdx] + adc r9,QWORD[8+rdx] + adc r10,QWORD[16+rdx] + adc r11,QWORD[24+rdx] + + sbb rax,rax + and rax,38 + + add r8,rax + adc r9,0 + adc r10,0 + mov QWORD[8+rdi],r9 + adc r11,0 + mov QWORD[16+rdi],r10 + sbb rax,rax + mov QWORD[24+rdi],r11 + and rax,38 + + add r8,rax + mov QWORD[rdi],r8 + +$L$fe64_add_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_x25519_fe64_add: + +global x25519_fe64_sub + +ALIGN 32 +x25519_fe64_sub: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_x25519_fe64_sub: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +$L$fe64_sub_body: + mov r8,QWORD[rsi] + mov r9,QWORD[8+rsi] + mov r10,QWORD[16+rsi] + mov r11,QWORD[24+rsi] + + sub r8,QWORD[rdx] + sbb r9,QWORD[8+rdx] + sbb r10,QWORD[16+rdx] + sbb r11,QWORD[24+rdx] + + sbb rax,rax + and rax,38 + + sub r8,rax + sbb r9,0 + sbb r10,0 + mov QWORD[8+rdi],r9 + sbb r11,0 + mov QWORD[16+rdi],r10 + sbb rax,rax + mov QWORD[24+rdi],r11 + and rax,38 + + sub r8,rax + mov QWORD[rdi],r8 + +$L$fe64_sub_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_x25519_fe64_sub: + +global x25519_fe64_tobytes + +ALIGN 32 +x25519_fe64_tobytes: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_x25519_fe64_tobytes: + mov rdi,rcx + mov rsi,rdx + + +$L$fe64_to_body: + mov r8,QWORD[rsi] + mov r9,QWORD[8+rsi] + mov r10,QWORD[16+rsi] + mov r11,QWORD[24+rsi] + + + lea rax,[r11*1+r11] + sar r11,63 + shr rax,1 + and r11,19 + add r11,19 + + add r8,r11 + adc r9,0 + adc r10,0 + adc rax,0 + + lea r11,[rax*1+rax] + sar rax,63 + shr r11,1 + not rax + and rax,19 + + sub r8,rax + sbb r9,0 + sbb r10,0 + sbb r11,0 + + mov QWORD[rdi],r8 + mov QWORD[8+rdi],r9 + mov QWORD[16+rdi],r10 + mov QWORD[24+rdi],r11 + +$L$fe64_to_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_x25519_fe64_tobytes: +DB 88,50,53,53,49,57,32,112,114,105,109,105,116,105,118,101 +DB 115,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82 +DB 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 +DB 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +EXTERN __imp_RtlVirtualUnwind + + +ALIGN 16 +short_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov rax,QWORD[152+r8] + jmp NEAR $L$common_seh_tail + + + +ALIGN 16 +full_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + mov r10d,DWORD[8+r11] + lea rax,[r10*1+rax] + + mov rbp,QWORD[((-8))+rax] + mov rbx,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r13,QWORD[((-32))+rax] + mov r14,QWORD[((-40))+rax] + mov r15,QWORD[((-48))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + mov QWORD[240+r8],r15 + +$L$common_seh_tail: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret + + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_x25519_fe51_mul wrt ..imagebase + DD $L$SEH_end_x25519_fe51_mul wrt ..imagebase + DD $L$SEH_info_x25519_fe51_mul wrt ..imagebase + + DD $L$SEH_begin_x25519_fe51_sqr wrt ..imagebase + DD $L$SEH_end_x25519_fe51_sqr wrt ..imagebase + DD $L$SEH_info_x25519_fe51_sqr wrt ..imagebase + + DD $L$SEH_begin_x25519_fe51_mul121666 wrt ..imagebase + DD $L$SEH_end_x25519_fe51_mul121666 wrt ..imagebase + DD $L$SEH_info_x25519_fe51_mul121666 wrt ..imagebase + DD $L$SEH_begin_x25519_fe64_mul wrt ..imagebase + DD $L$SEH_end_x25519_fe64_mul wrt ..imagebase + DD $L$SEH_info_x25519_fe64_mul wrt ..imagebase + + DD $L$SEH_begin_x25519_fe64_sqr wrt ..imagebase + DD $L$SEH_end_x25519_fe64_sqr wrt ..imagebase + DD $L$SEH_info_x25519_fe64_sqr wrt ..imagebase + + DD $L$SEH_begin_x25519_fe64_mul121666 wrt ..imagebase + DD $L$SEH_end_x25519_fe64_mul121666 wrt ..imagebase + DD $L$SEH_info_x25519_fe64_mul121666 wrt ..imagebase + + DD $L$SEH_begin_x25519_fe64_add wrt ..imagebase + DD $L$SEH_end_x25519_fe64_add wrt ..imagebase + DD $L$SEH_info_x25519_fe64_add wrt ..imagebase + + DD $L$SEH_begin_x25519_fe64_sub wrt ..imagebase + DD $L$SEH_end_x25519_fe64_sub wrt ..imagebase + DD $L$SEH_info_x25519_fe64_sub wrt ..imagebase + + DD $L$SEH_begin_x25519_fe64_tobytes wrt ..imagebase + DD $L$SEH_end_x25519_fe64_tobytes wrt ..imagebase + DD $L$SEH_info_x25519_fe64_tobytes wrt ..imagebase +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_x25519_fe51_mul: +DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$fe51_mul_body wrt ..imagebase,$L$fe51_mul_epilogue wrt ..imagebase + DD 88,0 +$L$SEH_info_x25519_fe51_sqr: +DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$fe51_sqr_body wrt ..imagebase,$L$fe51_sqr_epilogue wrt ..imagebase + DD 88,0 +$L$SEH_info_x25519_fe51_mul121666: +DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$fe51_mul121666_body wrt ..imagebase,$L$fe51_mul121666_epilogue wrt ..imagebase + DD 88,0 +$L$SEH_info_x25519_fe64_mul: +DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$fe64_mul_body wrt ..imagebase,$L$fe64_mul_epilogue wrt ..imagebase + DD 72,0 +$L$SEH_info_x25519_fe64_sqr: +DB 9,0,0,0 + DD full_handler wrt ..imagebase + DD $L$fe64_sqr_body wrt ..imagebase,$L$fe64_sqr_epilogue wrt ..imagebase + DD 72,0 +$L$SEH_info_x25519_fe64_mul121666: +DB 9,0,0,0 + DD short_handler wrt ..imagebase + DD $L$fe64_mul121666_body wrt ..imagebase,$L$fe64_mul121666_epilogue wrt ..imagebase +$L$SEH_info_x25519_fe64_add: +DB 9,0,0,0 + DD short_handler wrt ..imagebase + DD $L$fe64_add_body wrt ..imagebase,$L$fe64_add_epilogue wrt ..imagebase +$L$SEH_info_x25519_fe64_sub: +DB 9,0,0,0 + DD short_handler wrt ..imagebase + DD $L$fe64_sub_body wrt ..imagebase,$L$fe64_sub_epilogue wrt ..imagebase +$L$SEH_info_x25519_fe64_tobytes: +DB 9,0,0,0 + DD short_handler wrt ..imagebase + DD $L$fe64_to_body wrt ..imagebase,$L$fe64_to_epilogue wrt ..imagebase |