commit 742d3430640065250b9b635b917869bdd9fcf7f1
parent 6f5ee4cf72c8ad42a214880a84370b2dbfbefacd
Author: Christian Grothoff <christian@grothoff.org>
Date: Sun, 14 Jan 2024 11:59:37 +0100
kill whitespace only
Diffstat:
| M | xcode/gpuv-montg.cl | | | 114 | ++++++++++++++++++++++++++++++++++++++++---------------------------------------- |
1 file changed, 57 insertions(+), 57 deletions(-)
diff --git a/xcode/gpuv-montg.cl b/xcode/gpuv-montg.cl
@@ -45,7 +45,7 @@ typedef uint gpu_register;
int mult(gpu_register p[2], gpu_register x, gpu_register y)
{
-
+
p[1] = mul_hi(x,y);
p[0] = x * y;
@@ -262,7 +262,7 @@ gpu_register subtract(__private gpu_register *w, __private gpu_register *u, __pr
gpu_register k;
size_t j;
-
+
/* Step S1. Initialise */
k = 0;
@@ -277,7 +277,7 @@ gpu_register subtract(__private gpu_register *w, __private gpu_register *u, __pr
k = 1;
else
k = 0;
-
+
w[j] -= v[j];
if (w[j] > MAX_DIGIT - v[j])
k++;
@@ -320,28 +320,28 @@ void equal_ll( __private gpu_register *a, __private gpu_register *b, size_t ndi
void erase_all( __private gpu_register *a, size_t n)
{
-
+
for (int i = 0; i < n; i++)
{
a[i] = 0;
}
-
+
}
void shift_right(__private gpu_register *r, int n) {
-
+
for (int i = 0; i < R+1; i++) {
-
+
r[i] = r[i + n];
//r[i + n] = 0;
-
+
}
-
+
}
// 1 if r > l ; -1 if r < l; == 0
int compare(__private gpu_register *r, __private gpu_register *l, int n) {
-
+
int x = 0;
for (int i = n - 1; i >= 0; i--) {
x = r[i] > l[i];
@@ -353,7 +353,7 @@ int compare(__private gpu_register *r, __private gpu_register *l, int n) {
}
int compare_g(__private gpu_register *r, __global gpu_register *l, int n) {
-
+
int x = 0;
for (int i = n - 1; i >= 0; i--) {
x = r[i] > l[i];
@@ -365,9 +365,9 @@ int compare_g(__private gpu_register *r, __global gpu_register *l, int n) {
}
int testbit(gpu_register e, int i) {
-
+
return (e & (0x1 << (gpu_register)i) ) > 0 ? 1 : 0;
-
+
}
void montMul( __private gpu_register *ret,
@@ -375,24 +375,24 @@ void montMul( __private gpu_register *ret,
__global gpu_register *ni, __global gpu_register *n,
__private gpu_register *tmp_1, __private gpu_register *tmp_2, __private gpu_register *tmp_3
) {
-
+
multiply(tmp_1,a,b,R);
multiply(tmp_2,tmp_1,ni,R);
multiply(tmp_3,tmp_2,n,R);
-
+
add(tmp_2,tmp_1,tmp_3,R*2+1);
-
+
shift_right(tmp_2, R);
-
+
erase_all(tmp_3, R+1);
equal_lg(tmp_3, n, R);
-
+
if (compare(tmp_2, tmp_3, R+1) >= 0) {
subtract(ret, tmp_2, tmp_3, R+1);
} else {
equal_ll(ret, tmp_2, R);
}
-
+
}
void montSqr( __private gpu_register *ret,
@@ -400,26 +400,26 @@ void montSqr( __private gpu_register *ret,
__global gpu_register *ni, __global gpu_register *n,
__private gpu_register *tmp_1, __private gpu_register *tmp_2, __private gpu_register *tmp_3
) {
-
+
square(tmp_1,a,R);
multiply(tmp_2,tmp_1,ni,R);
multiply(tmp_3,tmp_2,n,R);
-
+
add(tmp_2,tmp_1,tmp_3,R*2+1);
-
+
shift_right(tmp_2, R);
-
+
erase_all(tmp_3, R+1);
equal_lg(tmp_3, n, R);
-
+
if (compare(tmp_2, tmp_3, R+1) >= 0) {
subtract(ret, tmp_2, tmp_3, R+1);
} else {
equal_ll(ret, tmp_2, R);
}
-
-
-
+
+
+
}
void montFinish( __private gpu_register *ret,
@@ -427,26 +427,26 @@ void montFinish( __private gpu_register *ret,
__global gpu_register *ni, __global gpu_register *n,
__private gpu_register *tmp_1, __private gpu_register *tmp_2, __private gpu_register *tmp_3
) {
-
+
erase_all(tmp_1, R*2+1);
equal_ll(tmp_1,a,R);
-
+
multiply(tmp_2,tmp_1,ni,R);
multiply(tmp_3,tmp_2,n,R);
-
+
add(tmp_2,tmp_1,tmp_3,R*2+1);
-
+
shift_right(tmp_2, R);
-
+
erase_all(tmp_3, R+1);
equal_lg(tmp_3, n, R);
-
+
if (compare(tmp_2, tmp_3, R+1) >= 0) {
subtract(ret, tmp_2, tmp_3, R+1);
} else {
equal_ll(ret, tmp_2, R);
}
-
+
}
@@ -457,55 +457,55 @@ __kernel void mont(__global gpu_register *x, __global gpu_register *m,
__global uint *pks, __global uint *out // 32 bit output
)
{
-
+
__private gpu_register res_local[R];
__private gpu_register x_local[R];
__private gpu_register tmp_1_local[R * 2 + 1];
__private gpu_register tmp_2_local[R * 2 + 1];
__private gpu_register tmp_3_local[R * 2 + 1];
-
+
for (int z = 0; z < R*2+1; z++) { tmp_1_local[z] = 0; tmp_2_local[z] = 0; tmp_3_local[z] = 0; }
-
+
size_t i = get_global_id(0);
-
+
uint pk = pks[i];
-
-
-
+
+
+
ulong pk_i = R * pk;
ulong s_i = R * i;
-
+
// printf((char __constant *)"%lu\n", ni[pk_i]);
-
+
int k = ceil(log2((float)exp[pk] + (float)1));
-
+
equal_lg(x_local,&x[s_i],R);
-
+
for (int j = k - 1; j >= 0; j--) {
-
+
montSqr(res_local, x_local, &ni[pk_i], &n[pk_i], tmp_1_local, tmp_2_local, tmp_3_local);
-
+
if (testbit(exp[pk], j)) {
-
+
equal_ll(x_local, res_local, R);
montMul(res_local, x_local, &m[s_i], &ni[pk_i], &n[pk_i], tmp_1_local, tmp_2_local, tmp_3_local);
-
+
}
-
+
equal_ll(x_local, res_local, R);
}
-
+
montFinish(res_local, x_local, &ni[pk_i], &n[pk_i], tmp_1_local, tmp_2_local, tmp_3_local);
-
+
if (compare_g(res_local, &cmp[s_i], R) == 0)
{
-
+
uint out_offset = i / (sizeof(uint) * 8); // 32 bit
-
+
uint mv = 1 << i;
-
+
atomic_or(&out[out_offset], mv);
-
-
+
+
}
}