libgpuverify

Signature verification on GPUs (WiP)
Log | Files | Refs | README | LICENSE

commit 742d3430640065250b9b635b917869bdd9fcf7f1
parent 6f5ee4cf72c8ad42a214880a84370b2dbfbefacd
Author: Christian Grothoff <christian@grothoff.org>
Date:   Sun, 14 Jan 2024 11:59:37 +0100

kill whitespace only

Diffstat:
Mxcode/gpuv-montg.cl | 114++++++++++++++++++++++++++++++++++++++++----------------------------------------
1 file changed, 57 insertions(+), 57 deletions(-)

diff --git a/xcode/gpuv-montg.cl b/xcode/gpuv-montg.cl @@ -45,7 +45,7 @@ typedef uint gpu_register; int mult(gpu_register p[2], gpu_register x, gpu_register y) { - + p[1] = mul_hi(x,y); p[0] = x * y; @@ -262,7 +262,7 @@ gpu_register subtract(__private gpu_register *w, __private gpu_register *u, __pr gpu_register k; size_t j; - + /* Step S1. Initialise */ k = 0; @@ -277,7 +277,7 @@ gpu_register subtract(__private gpu_register *w, __private gpu_register *u, __pr k = 1; else k = 0; - + w[j] -= v[j]; if (w[j] > MAX_DIGIT - v[j]) k++; @@ -320,28 +320,28 @@ void equal_ll( __private gpu_register *a, __private gpu_register *b, size_t ndi void erase_all( __private gpu_register *a, size_t n) { - + for (int i = 0; i < n; i++) { a[i] = 0; } - + } void shift_right(__private gpu_register *r, int n) { - + for (int i = 0; i < R+1; i++) { - + r[i] = r[i + n]; //r[i + n] = 0; - + } - + } // 1 if r > l ; -1 if r < l; == 0 int compare(__private gpu_register *r, __private gpu_register *l, int n) { - + int x = 0; for (int i = n - 1; i >= 0; i--) { x = r[i] > l[i]; @@ -353,7 +353,7 @@ int compare(__private gpu_register *r, __private gpu_register *l, int n) { } int compare_g(__private gpu_register *r, __global gpu_register *l, int n) { - + int x = 0; for (int i = n - 1; i >= 0; i--) { x = r[i] > l[i]; @@ -365,9 +365,9 @@ int compare_g(__private gpu_register *r, __global gpu_register *l, int n) { } int testbit(gpu_register e, int i) { - + return (e & (0x1 << (gpu_register)i) ) > 0 ? 1 : 0; - + } void montMul( __private gpu_register *ret, @@ -375,24 +375,24 @@ void montMul( __private gpu_register *ret, __global gpu_register *ni, __global gpu_register *n, __private gpu_register *tmp_1, __private gpu_register *tmp_2, __private gpu_register *tmp_3 ) { - + multiply(tmp_1,a,b,R); multiply(tmp_2,tmp_1,ni,R); multiply(tmp_3,tmp_2,n,R); - + add(tmp_2,tmp_1,tmp_3,R*2+1); - + shift_right(tmp_2, R); - + erase_all(tmp_3, R+1); equal_lg(tmp_3, n, R); - + if (compare(tmp_2, tmp_3, R+1) >= 0) { subtract(ret, tmp_2, tmp_3, R+1); } else { equal_ll(ret, tmp_2, R); } - + } void montSqr( __private gpu_register *ret, @@ -400,26 +400,26 @@ void montSqr( __private gpu_register *ret, __global gpu_register *ni, __global gpu_register *n, __private gpu_register *tmp_1, __private gpu_register *tmp_2, __private gpu_register *tmp_3 ) { - + square(tmp_1,a,R); multiply(tmp_2,tmp_1,ni,R); multiply(tmp_3,tmp_2,n,R); - + add(tmp_2,tmp_1,tmp_3,R*2+1); - + shift_right(tmp_2, R); - + erase_all(tmp_3, R+1); equal_lg(tmp_3, n, R); - + if (compare(tmp_2, tmp_3, R+1) >= 0) { subtract(ret, tmp_2, tmp_3, R+1); } else { equal_ll(ret, tmp_2, R); } - - - + + + } void montFinish( __private gpu_register *ret, @@ -427,26 +427,26 @@ void montFinish( __private gpu_register *ret, __global gpu_register *ni, __global gpu_register *n, __private gpu_register *tmp_1, __private gpu_register *tmp_2, __private gpu_register *tmp_3 ) { - + erase_all(tmp_1, R*2+1); equal_ll(tmp_1,a,R); - + multiply(tmp_2,tmp_1,ni,R); multiply(tmp_3,tmp_2,n,R); - + add(tmp_2,tmp_1,tmp_3,R*2+1); - + shift_right(tmp_2, R); - + erase_all(tmp_3, R+1); equal_lg(tmp_3, n, R); - + if (compare(tmp_2, tmp_3, R+1) >= 0) { subtract(ret, tmp_2, tmp_3, R+1); } else { equal_ll(ret, tmp_2, R); } - + } @@ -457,55 +457,55 @@ __kernel void mont(__global gpu_register *x, __global gpu_register *m, __global uint *pks, __global uint *out // 32 bit output ) { - + __private gpu_register res_local[R]; __private gpu_register x_local[R]; __private gpu_register tmp_1_local[R * 2 + 1]; __private gpu_register tmp_2_local[R * 2 + 1]; __private gpu_register tmp_3_local[R * 2 + 1]; - + for (int z = 0; z < R*2+1; z++) { tmp_1_local[z] = 0; tmp_2_local[z] = 0; tmp_3_local[z] = 0; } - + size_t i = get_global_id(0); - + uint pk = pks[i]; - - - + + + ulong pk_i = R * pk; ulong s_i = R * i; - + // printf((char __constant *)"%lu\n", ni[pk_i]); - + int k = ceil(log2((float)exp[pk] + (float)1)); - + equal_lg(x_local,&x[s_i],R); - + for (int j = k - 1; j >= 0; j--) { - + montSqr(res_local, x_local, &ni[pk_i], &n[pk_i], tmp_1_local, tmp_2_local, tmp_3_local); - + if (testbit(exp[pk], j)) { - + equal_ll(x_local, res_local, R); montMul(res_local, x_local, &m[s_i], &ni[pk_i], &n[pk_i], tmp_1_local, tmp_2_local, tmp_3_local); - + } - + equal_ll(x_local, res_local, R); } - + montFinish(res_local, x_local, &ni[pk_i], &n[pk_i], tmp_1_local, tmp_2_local, tmp_3_local); - + if (compare_g(res_local, &cmp[s_i], R) == 0) { - + uint out_offset = i / (sizeof(uint) * 8); // 32 bit - + uint mv = 1 << i; - + atomic_or(&out[out_offset], mv); - - + + } }