libgpuverify

Signature verification on GPUs (WiP)
Log | Files | Refs | README | LICENSE

commit e7b32caad0c0abf324ccd1469062046d7b2f2a3d
parent 8c195299c2dbac1ea152aaa5b61a769116743405
Author: Cedric <cedric.zwahlen@students.bfh.ch>
Date:   Fri, 24 Nov 2023 13:49:10 +0100

Extend montgomery.cl

Not tested yet

Diffstat:
Msource/lib-gpu-verify.c | 2+-
Mxcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcuserdata/cedriczwahlen.xcuserdatad/UserInterfaceState.xcuserstate | 0
Mxcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist | 4++--
Mxcode/montgomery.cl | 600+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
4 files changed, 594 insertions(+), 12 deletions(-)

diff --git a/source/lib-gpu-verify.c b/source/lib-gpu-verify.c @@ -35,7 +35,7 @@ int main(int argc, char** argv) //opencl_tests(); - rsa_tests(); + //rsa_tests(); // montgomery_test(); diff --git a/xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcuserdata/cedriczwahlen.xcuserdatad/UserInterfaceState.xcuserstate b/xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcuserdata/cedriczwahlen.xcuserdatad/UserInterfaceState.xcuserstate Binary files differ. diff --git a/xcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist b/xcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist @@ -2888,8 +2888,8 @@ filePath = "montgomery.cl" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "1662" - endingLineNumber = "1662" + startingLineNumber = "1664" + endingLineNumber = "1664" landmarkName = "mpz_sizeinbase()" landmarkType = "9"> </BreakpointContent> diff --git a/xcode/montgomery.cl b/xcode/montgomery.cl @@ -213,6 +213,7 @@ typedef struct typedef __mpz_struct mpz_t[1]; typedef __mpz_struct *mpz_ptr; + typedef const __mpz_struct *mpz_srcptr; struct gmp_div_inverse @@ -292,6 +293,7 @@ mpz_set (mpz_t r, const mpz_t x) } } + void mpz_set_ui (mpz_t r, unsigned long int x) { @@ -1749,6 +1751,8 @@ mpz_abs (mpz_t r, const mpz_t u) mpz_set (r, u); r->_mp_size = GMP_ABS (r->_mp_size); } + + mp_bitcnt_t mpn_scan1 (mp_srcptr ptr, mp_bitcnt_t bit) { @@ -1759,6 +1763,42 @@ mpn_scan1 (mp_srcptr ptr, mp_bitcnt_t bit) i, ptr, i, 0); } +mp_bitcnt_t +mpz_scan1 (const mpz_t u, mp_bitcnt_t starting_bit) +{ + mp_ptr up; + mp_size_t us, un, i; + mp_limb_t limb, ux; + + us = u->_mp_size; + un = GMP_ABS (us); + i = starting_bit / GMP_LIMB_BITS; + + /* Past the end there's no 1 bits for u>=0, or an immediate 1 bit + for u<0. Notice this test picks up any u==0 too. */ + if (i >= un) + return (us >= 0 ? ~(mp_bitcnt_t) 0 : starting_bit); + + up = u->_mp_d; + ux = 0; + limb = up[i]; + + if (starting_bit != 0) + { + if (us < 0) + { + ux = mpn_zero_p (up, i); + limb = ~ limb + ux; + ux = - (mp_limb_t) (limb >= ux); + } + + /* Mask to 0 all bits before starting_bit, thus ignoring them. */ + limb &= GMP_LIMB_MAX << (starting_bit % GMP_LIMB_BITS); + } + + return mpn_common_scan (limb, i, up, un, ux); +} + mp_bitcnt_t mpz_make_odd (mpz_t r) @@ -2063,15 +2103,557 @@ mpz_addmul_ui (mpz_t r, const mpz_t u, unsigned long int v) mpz_clear (t); } -__kernel void montgomery(__global unsigned long* x, __global const unsigned long *s_len, - __global unsigned long* e, __global const unsigned long *e_len, - __global unsigned long* m, __global const unsigned long *n_len, - __global unsigned long *mm, __global const unsigned long *mm_len, - __global unsigned long* valid, - const unsigned int count, - const unsigned int pks - ) { + +// STRING CONVERSION + +unsigned +mpn_base_power_of_two_p (unsigned b) +{ + switch (b) + { + case 2: return 1; + case 4: return 2; + case 8: return 3; + case 16: return 4; + case 32: return 5; + case 64: return 6; + case 128: return 7; + case 256: return 8; + default: return 0; + } +} + +struct mpn_base_info +{ + /* bb is the largest power of the base which fits in one limb, and + exp is the corresponding exponent. */ + unsigned exp; + mp_limb_t bb; +}; + +void +mpn_get_base_info (struct mpn_base_info *info, mp_limb_t b) +{ + mp_limb_t m; + mp_limb_t p; + unsigned exp; + + m = GMP_LIMB_MAX / b; + for (exp = 1, p = b; p <= m; exp++) + p *= b; + + info->exp = exp; + info->bb = p; +} + +int isspace(unsigned char c) { + if (c == '\n' || c == ' ' || c == '\t' || c == '\r' || c == '\f' || c == '\v') + return 1; + return 0; +} + +int strlen(__constant char *c) { + + // rather naive implementation – we assume a string is terminated, and is not 0 characters long. + + int i = 0; + while (1) { + if (c[i] == '\0') + return i; + } + return i; +} + +mp_size_t +mpn_set_str_bits (mp_ptr rp, const unsigned char *sp, size_t sn, + unsigned bits) +{ + mp_size_t rn; + mp_limb_t limb; + unsigned shift; + + for (limb = 0, rn = 0, shift = 0; sn-- > 0; ) + { + limb |= (mp_limb_t) sp[sn] << shift; + shift += bits; + if (shift >= GMP_LIMB_BITS) + { + shift -= GMP_LIMB_BITS; + rp[rn++] = limb; + /* Next line is correct also if shift == 0, + bits == 8, and mp_limb_t == unsigned char. */ + limb = (unsigned int) sp[sn] >> (bits - shift); + } + } + if (limb != 0) + rp[rn++] = limb; + else + rn = mpn_normalized_size (rp, rn); + return rn; +} + +mp_size_t +mpn_set_str_other (mp_ptr rp, const unsigned char *sp, size_t sn, + mp_limb_t b, const struct mpn_base_info *info) +{ + mp_size_t rn; + mp_limb_t w; + unsigned k; + size_t j; + + assert (sn > 0); + + k = 1 + (sn - 1) % info->exp; + + j = 0; + w = sp[j++]; + while (--k != 0) + w = w * b + sp[j++]; + + rp[0] = w; + + for (rn = 1; j < sn;) + { + mp_limb_t cy; + + w = sp[j++]; + for (k = 1; k < info->exp; k++) + w = w * b + sp[j++]; + + cy = mpn_mul_1 (rp, rp, rn, info->bb); + cy += mpn_add_1 (rp, rp, rn, w); + if (cy > 0) + rp[rn++] = cy; + } + assert (j == sn); + + return rn; +} + + +int +mpz_set_str (mpz_t r, __constant char *sp, int base) +{ + unsigned bits, value_of_a; + mp_size_t rn, alloc; + mp_ptr rp; + size_t dn, sn; + int sign; + unsigned char dp[2048]; + + assert (base == 0 || (base >= 2 && base <= 62)); + + while (isspace( (unsigned char) *sp)) + sp++; + + sign = (*sp == '-'); + sp += sign; + + if (base == 0) + { + if (sp[0] == '0') + { + if (sp[1] == 'x' || sp[1] == 'X') + { + base = 16; + sp += 2; + } + else if (sp[1] == 'b' || sp[1] == 'B') + { + base = 2; + sp += 2; + } + else + base = 8; + } + else + base = 10; + } + + if (!*sp) + { + r->_mp_size = 0; + return -1; + } + sn = strlen(sp); + //dp = (unsigned char *) gmp_alloc (sn); + + + value_of_a = (base > 36) ? 36 : 10; + for (dn = 0; *sp; sp++) + { + unsigned digit; + + if (isspace ((unsigned char) *sp)) + continue; + else if (*sp >= '0' && *sp <= '9') + digit = *sp - '0'; + else if (*sp >= 'a' && *sp <= 'z') + digit = *sp - 'a' + value_of_a; + else if (*sp >= 'A' && *sp <= 'Z') + digit = *sp - 'A' + 10; + else + digit = base; /* fail */ + + if (digit >= (unsigned) base) + { + //gmp_free (dp, sn); + r->_mp_size = 0; + return -1; + } + + dp[dn++] = digit; + } + + if (!dn) + { + //gmp_free (dp, sn); + r->_mp_size = 0; + return -1; + } + bits = mpn_base_power_of_two_p (base); + + if (bits > 0) + { + alloc = (dn * bits + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS; + rp = MPZ_REALLOC (r, alloc); + rn = mpn_set_str_bits (rp, dp, dn, bits); + } + else + { + struct mpn_base_info info; + mpn_get_base_info (&info, base); + alloc = (dn + info.exp - 1) / info.exp; + rp = MPZ_REALLOC (r, alloc); + rn = mpn_set_str_other (rp, dp, dn, base, &info); + /* Normalization, needed for all-zero input. */ + assert (rn > 0); + rn -= rp[rn-1] == 0; + } + assert (rn <= alloc); + //gmp_free (dp, sn); + + r->_mp_size = sign ? - rn : rn; + + return 0; +} + + + +int +mpz_init_set_str (mpz_t r, __constant char *sp, int base) +{ + mpz_init (r); + return mpz_set_str (r, sp, base); +} + + + + + + + + +// Montgomery multiplication + +void mont_prepare(mpz_t b, mpz_t e, mpz_t m, + mpz_t r, mpz_t r_1, + mpz_t ni, mpz_t M, mpz_t x + ); + +void mont_product(mpz_t ret, + const mpz_t a, const mpz_t b, + const mpz_t r, const mpz_t r_1, + const mpz_t n, const mpz_t ni + ); + +void mont_modexp(mpz_t ret, + mpz_t a, mpz_t e, + const mpz_t M, + const mpz_t n, const mpz_t ni, + const mpz_t r, const mpz_t r_1 + ); + +void mont_finish(mpz_t ret, + const mpz_t xx, + const mpz_t n, const mpz_t ni, + const mpz_t r, const mpz_t r_1 + ); + +void mont_prepare_even_modulus(mpz_t m, mpz_t q, mpz_t powj); + +void mont_mulmod(mpz_t res, const mpz_t a, const mpz_t b, const mpz_t mod); + + + + +void mont_prepare_even_modulus(mpz_t m, mpz_t q, mpz_t powj) { + + mpz_t two; // powj == 2^j + + mpz_init_set_ui(two, 2); + + mp_bitcnt_t j = mpz_scan1(m, 0); + + mpz_tdiv_q_2exp(q,m,j); + mpz_mul_2exp(powj,two,j - 1); + + mpz_clear(two); + +} + +// CPU +void mont_prepare(mpz_t b, mpz_t e, mpz_t m, + mpz_t r, mpz_t r_1, + mpz_t ni, mpz_t M, mpz_t x) { + + // MARK: break this up, reduce the amount of temporary variables + + // r and n (modulus) must be relatively prime (this is a given if n (modulus) is odd) + + // calculate r, which must be larger than the modulo and also a power of 2 + + mpz_t one, oo; // some helper variables + mpz_init_set_si(one,1); + mpz_init_set_si(oo,0); + + size_t len = mpz_sizeinbase(m,2); + + mpz_mul_2exp(r,one,len); + + mpz_set_si(one, 0); + + mpz_gcdext(one, r_1, ni, r, m); // set r_1 and ni + + int sgn = mpz_sgn(r_1); + + mpz_abs(r_1, r_1); + mpz_abs(ni, ni); + + if (sgn == -1) { + mpz_sub(ni, r, ni); + mpz_sub(r_1, m, r_1); + } + + if (mpz_cmp_ui(one, 1)) + assert(0); + + mpz_mul(one, r, r_1); + mpz_mul(oo,ni,m); + + mpz_sub(one, one, oo); // oo must be one + + if (mpz_cmp_ui(one, 1)) + assert(0); + + mpz_mul(M, b, r); + mpz_mod(M, M, m); // set M + + mpz_mod(x, r, m); // set x + + mpz_clear(one); + mpz_clear(oo); + +} + +// maybe GPU? +// MARK: n MUST be an odd number +void mont_modexp(mpz_t ret, + mpz_t a, mpz_t e, + const mpz_t M, + const mpz_t n, const mpz_t ni, + const mpz_t r, const mpz_t r_1 + ) { + + mpz_t aa,xx; + + mpz_init_set(aa, M); + mpz_init_set(xx, a); + + int k = (int)mpz_sizeinbase(e,2); + + for (int i = k - 1; i >= 0; i--) { + + mont_product(xx, xx, xx, r, r_1, n, ni); + + if (mpz_tstbit(e, i)) + mont_product(xx, aa, xx, r, r_1, n, ni); + + } + + mpz_set(ret, xx); + + mpz_clear(aa); + mpz_clear(xx); + +} + +void mont_finish(mpz_t ret, + const mpz_t xx, + const mpz_t n, const mpz_t ni, + const mpz_t r, const mpz_t r_1 + ) { + + + mpz_t x,one; + + mpz_init(x); + mpz_init_set_ui(one, 1); + + mont_product(x, xx, one, r, r_1, n, ni); + + mpz_set(ret, x); + + mpz_clear(x); + mpz_clear(one); + +} + + +// GPU +void mont_product(mpz_t ret, + const mpz_t a, const mpz_t b, + const mpz_t r, const mpz_t r_1, + const mpz_t n, const mpz_t ni + ) { + + mpz_t t,m,u; + + mpz_init(t); + mpz_init(m); + mpz_init(u); + + + + mont_mulmod(t, b, a, r); + + mont_mulmod(m, ni, t, r); + + mpz_t ab,mn; + + mpz_init(ab); + mpz_init(mn); + + mpz_mul(ab, a, b); + mpz_mul(mn, m, n); + + mpz_add(ab, ab, mn); + + unsigned long sz = mpz_sizeinbase(r,2) - 1; + mpz_tdiv_q_2exp(u, ab, sz); // this is essentially a bit shift, instead of a division + + if (mpz_cmp(u, n) >= 0) + mpz_sub(u, u, n); + + mpz_set(ret, u); + + mpz_clear(ab); + mpz_clear(mn); + mpz_clear(t); + mpz_clear(m); + mpz_clear(u); + +} + + +// not the fastest... but it does not increase the variable sizes +void mont_mulmod(mpz_t res, const mpz_t a, const mpz_t b, const mpz_t mod) { + + mpz_t aa, bb; + mpz_init_set(aa, a); + mpz_init_set(bb,b); + + mpz_mod(aa, aa, mod); // in case a is bigger + + while (mpz_cmp_ui(bb, 0) > 0) { + if (mpz_odd_p(bb)) { + mpz_add(res, res, aa); + mpz_mod(res, res, mod); + } + + mpz_mul_2exp(aa,aa,1); + mpz_mod(aa, aa, mod); + mpz_tdiv_q_2exp(bb, bb, 1); + } +} + + + +__kernel void montgomery(__constant unsigned long *valid, __constant char *base, __constant char *exponent, __constant char *modulus, __constant char *signature) { + + int radix = 16; + + mpz_t b,e,m, res; + + mpz_init(res); + + mpz_init_set_str(b,base,radix); // M + mpz_init_set_str(e,exponent,radix); + mpz_init_set_str(m,modulus,radix); // n + + mpz_t r, r_1, ni, M, x; + mpz_init(r); // MARK: I think I have to destroy these myself + mpz_init(r_1); + mpz_init(ni); + mpz_init(M); + mpz_init(x); + + mpz_t xx; + mpz_init(xx); + + if (mpz_even_p(m)) { + + mpz_t bb, x1, x2, q, powj; + mpz_init(bb); + mpz_init(x1); + mpz_init(x2); + mpz_init(q); + mpz_init(powj); + + mont_prepare_even_modulus(m, q, powj); + + // q is uneven, so we can use regular modexp + // MARK: we can improve the efficiency here by doing simple reductions + + mpz_mod(bb, b, q); // reductions like this + + mont_prepare(bb, e, q, r, r_1, ni, M, x); + mont_modexp(xx, x, e, M, q, ni, r, r_1); + mont_finish(x1, xx, q, ni, r, r_1); + + + // MARK: we can also reduce and really speed this up as well -> binary method? + mpz_powm(x2, b, e, powj); + + mpz_t y, q_1; + mpz_init(y); + mpz_init(q_1); + + mpz_sub(y, x2, x1); + + mpz_invert(q_1, q, powj); + + mpz_mul(y, y, q_1); + mpz_mod(y, y, powj); + + mpz_addmul(x1, q, y); + + mpz_set(res, x1); + + printf("--\n"); + + + + } else { + + mont_prepare(b, e, m, r, r_1, ni, M, x); + + mont_modexp(xx, x, e, M, m, ni, r, r_1); + + mont_finish(res, xx, m, ni, r, r_1); + + } + - *valid = 1; }