1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
|
.text
.globl bn_mul_mont
.type bn_mul_mont,@function
.align 16
bn_mul_mont:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
movl %r9d,%r9d
leaq 2(%r9),%r10
movq %rsp,%r11
negq %r10
leaq (%rsp,%r10,8),%rsp
andq $-1024,%rsp
movq %r11,8(%rsp,%r9,8)
.Lprologue:
movq %rdx,%r12
movq (%r8),%r8
xorq %r14,%r14
xorq %r15,%r15
movq (%r12),%rbx
movq (%rsi),%rax
mulq %rbx
movq %rax,%r10
movq %rdx,%r11
imulq %r8,%rax
movq %rax,%rbp
mulq (%rcx)
addq %r10,%rax
adcq $0,%rdx
movq %rdx,%r13
leaq 1(%r15),%r15
.L1st:
movq (%rsi,%r15,8),%rax
mulq %rbx
addq %r11,%rax
adcq $0,%rdx
movq %rax,%r10
movq (%rcx,%r15,8),%rax
movq %rdx,%r11
mulq %rbp
addq %r13,%rax
leaq 1(%r15),%r15
adcq $0,%rdx
addq %r10,%rax
adcq $0,%rdx
movq %rax,-16(%rsp,%r15,8)
cmpq %r9,%r15
movq %rdx,%r13
jl .L1st
xorq %rdx,%rdx
addq %r11,%r13
adcq $0,%rdx
movq %r13,-8(%rsp,%r9,8)
movq %rdx,(%rsp,%r9,8)
leaq 1(%r14),%r14
.align 4
.Louter:
xorq %r15,%r15
movq (%r12,%r14,8),%rbx
movq (%rsi),%rax
mulq %rbx
addq (%rsp),%rax
adcq $0,%rdx
movq %rax,%r10
movq %rdx,%r11
imulq %r8,%rax
movq %rax,%rbp
mulq (%rcx,%r15,8)
addq %r10,%rax
movq 8(%rsp),%r10
adcq $0,%rdx
movq %rdx,%r13
leaq 1(%r15),%r15
.align 4
.Linner:
movq (%rsi,%r15,8),%rax
mulq %rbx
addq %r11,%rax
adcq $0,%rdx
addq %rax,%r10
movq (%rcx,%r15,8),%rax
adcq $0,%rdx
movq %rdx,%r11
mulq %rbp
addq %r13,%rax
leaq 1(%r15),%r15
adcq $0,%rdx
addq %r10,%rax
adcq $0,%rdx
movq (%rsp,%r15,8),%r10
cmpq %r9,%r15
movq %rax,-16(%rsp,%r15,8)
movq %rdx,%r13
jl .Linner
xorq %rdx,%rdx
addq %r11,%r13
adcq $0,%rdx
addq %r10,%r13
adcq $0,%rdx
movq %r13,-8(%rsp,%r9,8)
movq %rdx,(%rsp,%r9,8)
leaq 1(%r14),%r14
cmpq %r9,%r14
jl .Louter
leaq (%rsp),%rsi
leaq -1(%r9),%r15
movq (%rsi),%rax
xorq %r14,%r14
jmp .Lsub
.align 16
.Lsub: sbbq (%rcx,%r14,8),%rax
movq %rax,(%rdi,%r14,8)
decq %r15
movq 8(%rsi,%r14,8),%rax
leaq 1(%r14),%r14
jge .Lsub
sbbq $0,%rax
andq %rax,%rsi
notq %rax
movq %rdi,%rcx
andq %rax,%rcx
leaq -1(%r9),%r15
orq %rcx,%rsi
.align 16
.Lcopy:
movq (%rsi,%r15,8),%rax
movq %rax,(%rdi,%r15,8)
movq %r14,(%rsp,%r15,8)
decq %r15
jge .Lcopy
movq 8(%rsp,%r9,8),%rsi
movq $1,%rax
movq (%rsi),%r15
movq 8(%rsi),%r14
movq 16(%rsi),%r13
movq 24(%rsi),%r12
movq 32(%rsi),%rbp
movq 40(%rsi),%rbx
leaq 48(%rsi),%rsp
.Lepilogue:
.byte 0xf3,0xc3
.size bn_mul_mont,.-bn_mul_mont
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 16
|