libgpuverify

Signature verification on GPUs (WiP)
Log | Files | Refs | README | LICENSE

commit b1f4f32728669766e1e7121856f779a97ef01f50
parent e496a5a3755148a391d018e3174401a8f894ea9b
Author: Cedric <cedric.zwahlen@students.bfh.ch>
Date:   Sun, 26 Nov 2023 00:30:04 +0100

Batch verification with montgomery kernel

Diffstat:
M.DS_Store | 0
Dsource/gmp_GPU.c | 2923-------------------------------------------------------------------------------
Dsource/gmp_GPU.h | 27---------------------------
Msource/lib-gpu-verify.c | 62+++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
Msource/rsa-test.c | 220++++++++++++++++++++++++++++++++++++-------------------------------------------
Mxcode/.DS_Store | 0
Mxcode/lib-gpu-verify.xcodeproj/project.pbxproj | 10++++------
Mxcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcuserdata/cedriczwahlen.xcuserdatad/UserInterfaceState.xcuserstate | 0
Mxcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist | 274+++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------
Mxcode/montgomery.cl | 65++++++++++++++++++++++++-----------------------------------------
10 files changed, 385 insertions(+), 3196 deletions(-)

diff --git a/.DS_Store b/.DS_Store Binary files differ. diff --git a/source/gmp_GPU.c b/source/gmp_GPU.c @@ -1,2923 +0,0 @@ -// -// gmp_GPU.c -// lib-gpu-verify -// -// Created by Cedric Zwahlen on 25.11.2023. -// - -#include "gmp_GPU.h" - -#define MINI_GMP_LIMB_TYPE long - -#define GMP_LIMB_BITS (sizeof(mp_limb_t) * CHAR_BIT) - -#define GMP_LIMB_MAX ((mp_limb_t) ~ (mp_limb_t) 0) -#define GMP_LIMB_HIGHBIT ((mp_limb_t) 1 << (GMP_LIMB_BITS - 1)) - -#define GMP_HLIMB_BIT ((mp_limb_t) 1 << (GMP_LIMB_BITS / 2)) -#define GMP_LLIMB_MASK (GMP_HLIMB_BIT - 1) - -#define GMP_ULONG_BITS (sizeof(unsigned long) * CHAR_BIT) -#define GMP_ULONG_HIGHBIT ((unsigned long) 1 << (GMP_ULONG_BITS - 1)) - -#define GMP_ABS(x) ((x) >= 0 ? (x) : -(x)) -#define GMP_NEG_CAST(T,x) (-((T)((x) + 1) - 1)) - -#define GMP_MIN(a, b) ((a) < (b) ? (a) : (b)) -#define GMP_MAX(a, b) ((a) > (b) ? (a) : (b)) - -#define GMP_CMP(a,b) (((a) > (b)) - ((a) < (b))) - -#define GMP_MPN_OVERLAP_P(xp, xsize, yp, ysize) \ - ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp)) - - -#define gmp_clz(count, x) do { \ - mp_limb_t __clz_x = (x); \ - unsigned __clz_c = 0; \ - int LOCAL_SHIFT_BITS = 8; \ - if (GMP_LIMB_BITS > LOCAL_SHIFT_BITS) \ - for (; \ - (__clz_x & ((mp_limb_t) 0xff << (GMP_LIMB_BITS - 8))) == 0; \ - __clz_c += 8) \ - { __clz_x <<= LOCAL_SHIFT_BITS; } \ - for (; (__clz_x & GMP_LIMB_HIGHBIT) == 0; __clz_c++) \ - __clz_x <<= 1; \ - (count) = __clz_c; \ - } while (0) - -#define gmp_umullo_limb(u, v) \ - ((sizeof(mp_limb_t) >= sizeof(int)) ? (u)*(v) : (unsigned int)(u) * (v)) - -#define gmp_umul_ppmm(w1, w0, u, v) \ - do { \ - int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS; \ - if (sizeof(unsigned int) * CHAR_BIT >= 2 * GMP_LIMB_BITS) \ - { \ - unsigned int __ww = (unsigned int) (u) * (v); \ - w0 = (mp_limb_t) __ww; \ - w1 = (mp_limb_t) (__ww >> LOCAL_GMP_LIMB_BITS); \ - } \ - else if (GMP_ULONG_BITS >= 2 * GMP_LIMB_BITS) \ - { \ - unsigned long int __ww = (unsigned long int) (u) * (v); \ - w0 = (mp_limb_t) __ww; \ - w1 = (mp_limb_t) (__ww >> LOCAL_GMP_LIMB_BITS); \ - } \ - else { \ - mp_limb_t __x0, __x1, __x2, __x3; \ - unsigned __ul, __vl, __uh, __vh; \ - mp_limb_t __u = (u), __v = (v); \ - assert (sizeof (unsigned) * 2 >= sizeof (mp_limb_t)); \ - \ - __ul = __u & GMP_LLIMB_MASK; \ - __uh = __u >> (GMP_LIMB_BITS / 2); \ - __vl = __v & GMP_LLIMB_MASK; \ - __vh = __v >> (GMP_LIMB_BITS / 2); \ - \ - __x0 = (mp_limb_t) __ul * __vl; \ - __x1 = (mp_limb_t) __ul * __vh; \ - __x2 = (mp_limb_t) __uh * __vl; \ - __x3 = (mp_limb_t) __uh * __vh; \ - \ - __x1 += __x0 >> (GMP_LIMB_BITS / 2);/* this can't give carry */ \ - __x1 += __x2; /* but this indeed can */ \ - if (__x1 < __x2) /* did we get it? */ \ - __x3 += GMP_HLIMB_BIT; /* yes, add it in the proper pos. */ \ - \ - (w1) = __x3 + (__x1 >> (GMP_LIMB_BITS / 2)); \ - (w0) = (__x1 << (GMP_LIMB_BITS / 2)) + (__x0 & GMP_LLIMB_MASK); \ - } \ - } while (0) - -#define gmp_assert_nocarry(x) do { \ - mp_limb_t __cy = (x); \ - assert (__cy == 0); \ - (void) (__cy); \ - } while (0) - -#define gmp_add_ssaaaa(sh, sl, ah, al, bh, bl) \ - do { \ - mp_limb_t __x; \ - __x = (al) + (bl); \ - (sh) = (ah) + (bh) + (__x < (al)); \ - (sl) = __x; \ - } while (0) - -#define gmp_sub_ddmmss(sh, sl, ah, al, bh, bl) \ - do { \ - mp_limb_t __x; \ - __x = (al) - (bl); \ - (sh) = (ah) - (bh) - ((al) < (bl)); \ - (sl) = __x; \ - } while (0) - - -#define gmp_udiv_qrnnd_preinv(q, r, nh, nl, d, di) \ - do { \ - mp_limb_t _qh, _ql, _r, _mask; \ - gmp_umul_ppmm (_qh, _ql, (nh), (di)); \ - gmp_add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \ - _r = (nl) - gmp_umullo_limb (_qh, (d)); \ - _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \ - _qh += _mask; \ - _r += _mask & (d); \ - if (_r >= (d)) \ - { \ - _r -= (d); \ - _qh++; \ - } \ - \ - (r) = _r; \ - (q) = _qh; \ - } while (0) - -#define gmp_udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv) \ - do { \ - mp_limb_t _q0, _t1, _t0, _mask; \ - gmp_umul_ppmm ((q), _q0, (n2), (dinv)); \ - gmp_add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1)); \ - \ - /* Compute the two most significant limbs of n - q'd */ \ - (r1) = (n1) - gmp_umullo_limb ((d1), (q)); \ - gmp_sub_ddmmss ((r1), (r0), (r1), (n0), (d1), (d0)); \ - gmp_umul_ppmm (_t1, _t0, (d0), (q)); \ - gmp_sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0); \ - (q)++; \ - \ - /* Conditionally adjust q and the remainders */ \ - _mask = - (mp_limb_t) ((r1) >= _q0); \ - (q) += _mask; \ - gmp_add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0)); \ - if ((r1) >= (d1)) \ - { \ - if ((r1) > (d1) || (r0) >= (d0)) \ - { \ - (q)++; \ - gmp_sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0)); \ - } \ - } \ - } while (0) - -#define gmp_ctz(count, x) do { \ - mp_limb_t __ctz_x = (x); \ - unsigned __ctz_c = 0; \ - gmp_clz (__ctz_c, __ctz_x & - __ctz_x); \ - (count) = GMP_LIMB_BITS - 1 - __ctz_c; \ - } while (0) - - -#define MPZ_SRCPTR_SWAP(x, y) \ - do { \ - mpz_srcptr __mpz_srcptr_swap__tmp = (x); \ - (x) = (y); \ - (y) = __mpz_srcptr_swap__tmp; \ - } while (0) - -#define MP_SIZE_T_SWAP(x, y) \ - do { \ - mp_size_t __mp_size_t_swap__tmp = (x); \ - (x) = (y); \ - (y) = __mp_size_t_swap__tmp; \ - } while (0) - -#define MPZ_PTR_SWAP(x, y) \ - do { \ - mpz_ptr __mpz_ptr_swap__tmp = (x); \ - (x) = (y); \ - (y) = __mpz_ptr_swap__tmp; \ - } while (0) - -#define MP_BITCNT_T_SWAP(x,y) \ - do { \ - mp_bitcnt_t __mp_bitcnt_t_swap__tmp = (x); \ - (x) = (y); \ - (y) = __mp_bitcnt_t_swap__tmp; \ - } while (0) - - -#define assert(x){if((x)==0){printf("assert reached\n");}} - -typedef unsigned MINI_GMP_LIMB_TYPE mp_limb_t; -typedef long mp_size_t; -typedef unsigned long mp_bitcnt_t; - -typedef mp_limb_t *mp_ptr; -typedef const mp_limb_t *mp_srcptr; - -typedef struct -{ - int _mp_alloc; /* Number of *limbs* allocated and pointed - to by the _mp_d field. */ - int _mp_size; /* abs(_mp_size) is the number of limbs the - last field points to. If _mp_size is - negative this is a negative number. */ - //mp_limb_t *_mp_d; /* Pointer to the limbs. */ - - mp_limb_t _mp_d[256]; - -} __mpz_struct; - -typedef __mpz_struct mpz_t[1]; - -typedef __mpz_struct *mpz_ptr; - -typedef const __mpz_struct *mpz_srcptr; - -struct gmp_div_inverse -{ - /* Normalization shift count. */ - unsigned shift; - /* Normalized divisor (d0 unused for mpn_div_qr_1) */ - mp_limb_t d1, d0; - /* Inverse, for 2/1 or 3/2. */ - mp_limb_t di; -}; - - -struct mpn_base_info -{ - /* bb is the largest power of the base which fits in one limb, and - exp is the corresponding exponent. */ - unsigned exp; - mp_limb_t bb; -}; - - -enum mpz_div_round_mode { GMP_DIV_FLOOR, GMP_DIV_CEIL, GMP_DIV_TRUNC }; - -void mpz_init (mpz_t r); -void mpn_copyi (mp_ptr d, mp_srcptr s, mp_size_t n); -void mpz_set (mpz_t r, const mpz_t x); -void -mpz_set (mpz_t r, const mpz_t x); -void -mpz_set_ui (mpz_t r, unsigned long int x); -void -mpz_set_si (mpz_t r, signed long int x); -void -mpz_init_set_si (mpz_t r, signed long int x); -void -mpz_init_set (mpz_t r, const mpz_t x); -void -mpz_init2 (mpz_t r, mp_bitcnt_t bits); -void -mpz_init_set_ui (mpz_t r, unsigned long int x); -void -mpz_clear (mpz_t r); -void -gmp_die (const char *msg); - - -mp_size_t mpn_normalized_size (mp_srcptr xp, mp_size_t n); -void -mpz_add_ui (mpz_t r, const mpz_t a, unsigned long b); -void -mpz_ui_sub (mpz_t r, unsigned long a, const mpz_t b); -void -mpz_sub_ui (mpz_t r, const mpz_t a, unsigned long b); -int -mpn_absfits_ulong_p (mp_srcptr up, mp_size_t un); -unsigned long int -mpz_get_ui (const mpz_t u); -int -mpz_cmpabs_ui (const mpz_t u, unsigned long v); -mp_limb_t -mpn_sub_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b); -mp_limb_t -mpn_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n); -mp_limb_t -mpn_sub (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn); -mp_limb_t -mpn_invert_3by2 (mp_limb_t u1, mp_limb_t u0); -int -mpz_div_qr (mpz_t q, mpz_t r, - const mpz_t n, const mpz_t d, enum mpz_div_round_mode mode); -void -mpz_mod (mpz_t r, const mpz_t n, const mpz_t d); -void -mpn_div_qr_1_invert (struct gmp_div_inverse *inv, mp_limb_t d); - -void -mpn_div_qr_2_invert (struct gmp_div_inverse *inv, - mp_limb_t d1, mp_limb_t d0); - -void -mpn_div_qr_invert (struct gmp_div_inverse *inv, - mp_srcptr dp, mp_size_t dn); -int -mpz_cmp_ui (const mpz_t u, unsigned long v); -int -mpn_cmp (mp_srcptr ap, mp_srcptr bp, mp_size_t n); -mp_limb_t -mpn_lshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt); -mp_limb_t -mpn_rshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt); -int -mpz_invert (mpz_t r, const mpz_t u, const mpz_t m); -mp_limb_t -mpn_div_qr_1_preinv (mp_ptr qp, mp_srcptr np, mp_size_t nn, - const struct gmp_div_inverse *inv); -mp_limb_t -mpn_add_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n); -void -mpn_div_qr_2_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn, - const struct gmp_div_inverse *inv); -mp_limb_t -mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl); -void -mpn_div_qr_pi1 (mp_ptr qp, - mp_ptr np, mp_size_t nn, mp_limb_t n1, - mp_srcptr dp, mp_size_t dn, - mp_limb_t dinv); -void -mpn_div_qr_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn, - mp_srcptr dp, mp_size_t dn, - const struct gmp_div_inverse *inv); -void -mpz_powm (mpz_t r, const mpz_t b, const mpz_t e, const mpz_t m); -int -mpn_cmp4 (mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn); -mp_size_t -mpz_abs_sub (mpz_t r, const mpz_t a, const mpz_t b); -mp_limb_t -mpn_add_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b); -mp_limb_t -mpn_add (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn); -mp_size_t -mpz_abs_add (mpz_t r, const mpz_t a, const mpz_t b); -void -mpz_sub (mpz_t r, const mpz_t a, const mpz_t b); -mp_limb_t -mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl); -mp_limb_t -mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl); -mp_limb_t -mpn_mul (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn); -void -mpz_mul (mpz_t r, const mpz_t u, const mpz_t v); -void -mpn_copyd (mp_ptr d, mp_srcptr s, mp_size_t n); -void -mpn_zero (mp_ptr rp, mp_size_t n); -void -mpz_mul_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bits); -int -mpn_zero_p(mp_srcptr rp, mp_size_t n); -void -mpz_div_q_2exp (mpz_t q, const mpz_t u, mp_bitcnt_t bit_index, - enum mpz_div_round_mode mode); -void -mpz_tdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt); -int -mpz_cmp (const mpz_t a, const mpz_t b); -void -mpz_add (mpz_t r, const mpz_t a, const mpz_t b); -int -mpz_tstbit (const mpz_t d, mp_bitcnt_t bit_index); -mp_bitcnt_t -mpn_limb_size_in_base_2 (mp_limb_t u); -size_t -mpz_sizeinbase (const mpz_t u, int base); -int -mpz_sgn (const mpz_t u); -mp_bitcnt_t -mpn_common_scan (mp_limb_t limb, mp_size_t i, mp_srcptr up, mp_size_t un, - mp_limb_t ux); -mp_bitcnt_t -mpn_scan1 (mp_srcptr ptr, mp_bitcnt_t bit); -mp_bitcnt_t -mpz_scan1 (mpz_t u, mp_bitcnt_t starting_bit); -mp_bitcnt_t -mpz_make_odd (mpz_t r); -void -mpz_tdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d); -void -mpz_abs_add_bit (mpz_t d, mp_bitcnt_t bit_index); -void -mpz_abs_sub_bit (mpz_t d, mp_bitcnt_t bit_index); -void -mpz_setbit (mpz_t d, mp_bitcnt_t bit_index); -void -mpz_divexact (mpz_t q, const mpz_t n, const mpz_t d); -int -mpz_cmpabs (const mpz_t u, const mpz_t v); -void -mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v); -void -mpz_addmul_ui (mpz_t r, const mpz_t u, unsigned long int v); - -unsigned -mpn_base_power_of_two_p (unsigned b); -void -mpn_get_base_info (struct mpn_base_info *info, mp_limb_t b); -int isspace_gpu(unsigned char c); -int strlen_c(const char *c); -mp_size_t mpn_set_str_bits (mp_ptr rp, const unsigned char *sp, size_t sn, - unsigned bits); -mp_size_t -mpn_set_str_other (mp_ptr rp, const unsigned char *sp, size_t sn, - mp_limb_t b, const struct mpn_base_info *info); -int -mpz_set_str (mpz_t r, const char *sp, int base); -int -mpz_init_set_str (mpz_t r, const char *sp, int base); - -//void mpz_sub (mpz_t r, const mpz_t a, const mpz_t b); -////void mpz_add (mpz_t, const mpz_t, const mpz_t); - -void mpz_abs (mpz_t, const mpz_t); - -void mpz_neg (mpz_t, const mpz_t); -void mpz_swap (mpz_t, mpz_t); -//void mpz_mod (mpz_t, const mpz_t, const mpz_t); -// -////int mpz_sgn (const mpz_t); -// -////void mpz_mul (mpz_t, const mpz_t, const mpz_t); -//void mpz_mul_2exp (mpz_t, const mpz_t, mp_bitcnt_t); -// -//void mpz_gcdext (mpz_t, mpz_t, mpz_t, const mpz_t, const mpz_t); -////void mpz_powm (mpz_t, const mpz_t, const mpz_t, const mpz_t); -// -void mpz_addmul (mpz_t, const mpz_t, const mpz_t); -// -//int mpz_tstbit (const mpz_t, mp_bitcnt_t); -// -//int mpz_cmp_ui (const mpz_t u, unsigned long v); -// -void mpn_div_qr (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn); -// -//mp_limb_t mpn_invert_3by2 (mp_limb_t, mp_limb_t); - -#define mpn_invert_limb(x) mpn_invert_3by2 ((x), 0) - -#define MPZ_REALLOC(z,n) (z)->_mp_d - -void -mpz_init (mpz_t r) -{ - const mp_limb_t dummy_limb = GMP_LIMB_MAX & 0xc1a0; - - r->_mp_alloc = 0; - r->_mp_size = 0; - - // memset(r->_mp_d, 0, 256); - - // r->_mp_d = (mp_ptr) &dummy_limb; -} - -void -mpn_copyi (mp_ptr d, mp_srcptr s, mp_size_t n) -{ - mp_size_t i; - for (i = 0; i < n; i++) - d[i] = s[i]; -} - -void -mpz_set (mpz_t r, const mpz_t x) -{ - /* Allow the NOP r == x */ - if (r != x) - { - mp_size_t n; - mp_ptr rp; - - n = GMP_ABS (x->_mp_size); - rp = MPZ_REALLOC (r, n); - - mpn_copyi (rp, x->_mp_d, n); - r->_mp_size = x->_mp_size; - } -} - - -void -mpz_set_ui (mpz_t r, unsigned long int x) -{ - if (x > 0) - { - r->_mp_size = 1; - MPZ_REALLOC (r, 1)[0] = x; - if (GMP_LIMB_BITS < GMP_ULONG_BITS) - { - int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS; - while (x >>= LOCAL_GMP_LIMB_BITS) - { - ++ r->_mp_size; - MPZ_REALLOC (r, r->_mp_size)[r->_mp_size - 1] = x; - } - } - } - else - r->_mp_size = 0; -} - - -void -mpz_neg (mpz_t r, const mpz_t u) -{ - mpz_set (r, u); - r->_mp_size = -r->_mp_size; -} - - -void -mpz_set_si (mpz_t r, signed long int x) -{ - if (x >= 0) - mpz_set_ui (r, x); - else /* (x < 0) */ - if (GMP_LIMB_BITS < GMP_ULONG_BITS) - { - mpz_set_ui (r, GMP_NEG_CAST (unsigned long int, x)); - mpz_neg (r, r); - } - else - { - r->_mp_size = -1; - MPZ_REALLOC (r, 1)[0] = GMP_NEG_CAST (unsigned long int, x); - } -} - -void -mpz_init_set_si (mpz_t r, signed long int x) -{ - mpz_init (r); - mpz_set_si (r, x); -} - - -void -mpz_init_set (mpz_t r, const mpz_t x) -{ - mpz_init (r); - mpz_set (r, x); -} - -void -mpz_init2 (mpz_t r, mp_bitcnt_t bits) -{ - mp_size_t rn; - - bits -= (bits != 0); /* Round down, except if 0 */ - rn = 1 + bits / GMP_LIMB_BITS; - - r->_mp_alloc = rn; - r->_mp_size = 0; - // r->_mp_d = gmp_alloc_limbs (rn); -} - -void -mpz_init_set_ui (mpz_t r, unsigned long int x) -{ - mpz_init (r); - mpz_set_ui (r, x); -} - -void -mpz_clear (mpz_t r) -{ - //if (r->_mp_alloc) - //gmp_free_limbs (r->_mp_d, r->_mp_alloc); -} - - -void -gmp_die (const char *msg) -{ - //fprintf (stderr, "%s\n", msg); - //abort(); -} - -mp_size_t mpn_normalized_size (mp_srcptr xp, mp_size_t n) -{ - while (n > 0 && xp[n-1] == 0) - --n; - return n; -} - -void -mpz_add_ui (mpz_t r, const mpz_t a, unsigned long b) -{ - mpz_t bb; - mpz_init_set_ui (bb, b); - mpz_add (r, a, bb); - mpz_clear (bb); -} - -void -mpz_ui_sub (mpz_t r, unsigned long a, const mpz_t b) -{ - mpz_neg (r, b); - mpz_add_ui (r, r, a); -} - - -void -mpz_sub_ui (mpz_t r, const mpz_t a, unsigned long b) -{ - mpz_ui_sub (r, b, a); - mpz_neg (r, r); -} - -int -mpn_absfits_ulong_p (mp_srcptr up, mp_size_t un) -{ - int ulongsize = GMP_ULONG_BITS / GMP_LIMB_BITS; - mp_limb_t ulongrem = 0; - - if (GMP_ULONG_BITS % GMP_LIMB_BITS != 0) - ulongrem = (mp_limb_t) (ULONG_MAX >> GMP_LIMB_BITS * ulongsize) + 1; - - return un <= ulongsize || (up[ulongsize] < ulongrem && un == ulongsize + 1); -} - -unsigned long int -mpz_get_ui (const mpz_t u) -{ - if (GMP_LIMB_BITS < GMP_ULONG_BITS) - { - int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS; - unsigned long r = 0; - mp_size_t n = GMP_ABS (u->_mp_size); - n = GMP_MIN (n, 1 + (mp_size_t) (GMP_ULONG_BITS - 1) / GMP_LIMB_BITS); - while (--n >= 0) - r = (r << LOCAL_GMP_LIMB_BITS) + u->_mp_d[n]; - return r; - } - - return u->_mp_size == 0 ? 0 : u->_mp_d[0]; -} - -int -mpz_cmpabs_ui (const mpz_t u, unsigned long v) -{ - mp_size_t un = GMP_ABS (u->_mp_size); - - if (! mpn_absfits_ulong_p (u->_mp_d, un)) - return 1; - else - { - unsigned long uu = mpz_get_ui (u); - return GMP_CMP(uu, v); - } -} - -mp_limb_t -mpn_sub_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b) -{ - mp_size_t i; - - assert (n > 0); - - i = 0; - do - { - mp_limb_t a = ap[i]; - /* Carry out */ - mp_limb_t cy = a < b; - rp[i] = a - b; - b = cy; - } - while (++i < n); - - return b; -} - -mp_limb_t -mpn_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n) -{ - mp_size_t i; - mp_limb_t cy; - - for (i = 0, cy = 0; i < n; i++) - { - mp_limb_t a, b; - a = ap[i]; b = bp[i]; - b += cy; - cy = (b < cy); - cy += (a < b); - rp[i] = a - b; - } - return cy; -} - -mp_limb_t -mpn_sub (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn) -{ - mp_limb_t cy; - - assert (an >= bn); - - cy = mpn_sub_n (rp, ap, bp, bn); - if (an > bn) - cy = mpn_sub_1 (rp + bn, ap + bn, an - bn, cy); - return cy; -} - - -mp_limb_t -mpn_invert_3by2 (mp_limb_t u1, mp_limb_t u0) -{ - mp_limb_t r, m; - - { - mp_limb_t p, ql; - unsigned ul, uh, qh; - - assert (sizeof (unsigned) * 2 >= sizeof (mp_limb_t)); - /* For notation, let b denote the half-limb base, so that B = b^2. - Split u1 = b uh + ul. */ - ul = u1 & GMP_LLIMB_MASK; - uh = u1 >> (GMP_LIMB_BITS / 2); - - /* Approximation of the high half of quotient. Differs from the 2/1 - inverse of the half limb uh, since we have already subtracted - u0. */ - qh = (u1 ^ GMP_LIMB_MAX) / uh; - - /* Adjust to get a half-limb 3/2 inverse, i.e., we want - - qh' = floor( (b^3 - 1) / u) - b = floor ((b^3 - b u - 1) / u - = floor( (b (~u) + b-1) / u), - - and the remainder - - r = b (~u) + b-1 - qh (b uh + ul) - = b (~u - qh uh) + b-1 - qh ul - - Subtraction of qh ul may underflow, which implies adjustments. - But by normalization, 2 u >= B > qh ul, so we need to adjust by - at most 2. - */ - - r = ((~u1 - (mp_limb_t) qh * uh) << (GMP_LIMB_BITS / 2)) | GMP_LLIMB_MASK; - - p = (mp_limb_t) qh * ul; - /* Adjustment steps taken from udiv_qrnnd_c */ - if (r < p) - { - qh--; - r += u1; - if (r >= u1) /* i.e. we didn't get carry when adding to r */ - if (r < p) - { - qh--; - r += u1; - } - } - r -= p; - - /* Low half of the quotient is - - ql = floor ( (b r + b-1) / u1). - - This is a 3/2 division (on half-limbs), for which qh is a - suitable inverse. */ - - p = (r >> (GMP_LIMB_BITS / 2)) * qh + r; - /* Unlike full-limb 3/2, we can add 1 without overflow. For this to - work, it is essential that ql is a full mp_limb_t. */ - ql = (p >> (GMP_LIMB_BITS / 2)) + 1; - - /* By the 3/2 trick, we don't need the high half limb. */ - r = (r << (GMP_LIMB_BITS / 2)) + GMP_LLIMB_MASK - ql * u1; - - if (r >= (GMP_LIMB_MAX & (p << (GMP_LIMB_BITS / 2)))) - { - ql--; - r += u1; - } - m = ((mp_limb_t) qh << (GMP_LIMB_BITS / 2)) + ql; - if (r >= u1) - { - m++; - r -= u1; - } - } - - /* Now m is the 2/1 inverse of u1. If u0 > 0, adjust it to become a - 3/2 inverse. */ - if (u0 > 0) - { - mp_limb_t th, tl; - r = ~r; - r += u0; - if (r < u0) - { - m--; - if (r >= u1) - { - m--; - r -= u1; - } - r -= u1; - } - gmp_umul_ppmm (th, tl, u0, m); - r += th; - if (r < th) - { - m--; - m -= ((r > u1) | ((r == u1) & (tl > u0))); - } - } - - return m; -} - -int -mpz_div_qr (mpz_t q, mpz_t r, - const mpz_t n, const mpz_t d, enum mpz_div_round_mode mode) -{ - mp_size_t ns, ds, nn, dn, qs; - ns = n->_mp_size; - ds = d->_mp_size; - - if (ds == 0) {} - //gmp_die("mpz_div_qr: Divide by zero."); - - if (ns == 0) - { - if (q) - q->_mp_size = 0; - if (r) - r->_mp_size = 0; - return 0; - } - - nn = GMP_ABS (ns); - dn = GMP_ABS (ds); - - qs = ds ^ ns; - - if (nn < dn) - { - if (mode == GMP_DIV_CEIL && qs >= 0) - { - /* q = 1, r = n - d */ - if (r) - mpz_sub (r, n, d); - if (q) - mpz_set_ui (q, 1); - } - else if (mode == GMP_DIV_FLOOR && qs < 0) - { - /* q = -1, r = n + d */ - if (r) - mpz_add (r, n, d); - if (q) - mpz_set_si (q, -1); - } - else - { - /* q = 0, r = d */ - if (r) - mpz_set (r, n); - if (q) - q->_mp_size = 0; - } - return 1; - } - else - { - mp_ptr np, qp; - mp_size_t qn, rn; - mpz_t tq, tr; - - mpz_init_set (tr, n); - np = tr->_mp_d; - - qn = nn - dn + 1; - - if (q) - { - mpz_init2 (tq, qn * GMP_LIMB_BITS); - qp = tq->_mp_d; - } - else - qp = NULL; - - mpn_div_qr (qp, np, nn, d->_mp_d, dn); - - if (qp) - { - qn -= (qp[qn-1] == 0); - - tq->_mp_size = qs < 0 ? -qn : qn; - } - rn = mpn_normalized_size (np, dn); - tr->_mp_size = ns < 0 ? - rn : rn; - - if (mode == GMP_DIV_FLOOR && qs < 0 && rn != 0) - { - if (q) - mpz_sub_ui (tq, tq, 1); - if (r) - mpz_add (tr, tr, d); - } - else if (mode == GMP_DIV_CEIL && qs >= 0 && rn != 0) - { - if (q) - mpz_add_ui (tq, tq, 1); - if (r) - mpz_sub (tr, tr, d); - } - - if (q) - { - mpz_swap (tq, q); - mpz_clear (tq); - } - if (r) - mpz_swap (tr, r); - - mpz_clear (tr); - - return rn != 0; - } -} - -void -mpn_div_qr (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn) -{ - struct gmp_div_inverse inv; - // mp_ptr tp = NULL; - - mpz_t tp; - - assert (dn > 0); - assert (nn >= dn); - - mpn_div_qr_invert (&inv, dp, dn); - if (dn > 2 && inv.shift > 0) - { - //tp = gmp_alloc_limbs (dn); - gmp_assert_nocarry (mpn_lshift (tp->_mp_d, dp, dn, inv.shift)); - dp = tp->_mp_d; - } - mpn_div_qr_preinv (qp, np, nn, dp, dn, &inv); - if (tp) {} - //gmp_free_limbs (tp, dn); -} - -void -mpz_addmul (mpz_t r, const mpz_t u, const mpz_t v) -{ - mpz_t t; - mpz_init (t); - mpz_mul (t, u, v); - mpz_add (r, r, t); - mpz_clear (t); -} - -void -mpz_swap (mpz_t u, mpz_t v) -{ - //MP_SIZE_T_SWAP (u->_mp_alloc, v->_mp_alloc); - //MPN_PTR_SWAP (u->_mp_d, u->_mp_size, v->_mp_d, v->_mp_size); - - mpz_t temp; - mpz_init(temp); - - *temp = *u; - *u = *v; - *v = *temp; - -} - -void -mpz_mod (mpz_t r, const mpz_t n, const mpz_t d) -{ - mpz_div_qr (NULL, r, n, d, d->_mp_size >= 0 ? GMP_DIV_FLOOR : GMP_DIV_CEIL); -} - -void -mpn_div_qr_1_invert (struct gmp_div_inverse *inv, mp_limb_t d) -{ - unsigned shift; - - assert (d > 0); - gmp_clz (shift, d); - inv->shift = shift; - inv->d1 = d << shift; - inv->di = mpn_invert_limb (inv->d1); -} - -void -mpn_div_qr_2_invert (struct gmp_div_inverse *inv, - mp_limb_t d1, mp_limb_t d0) -{ - unsigned shift; - - assert (d1 > 0); - gmp_clz (shift, d1); - inv->shift = shift; - if (shift > 0) - { - d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift)); - d0 <<= shift; - } - inv->d1 = d1; - inv->d0 = d0; - inv->di = mpn_invert_3by2 (d1, d0); -} - -void -mpn_div_qr_invert (struct gmp_div_inverse *inv, - mp_srcptr dp, mp_size_t dn) -{ - assert (dn > 0); - - if (dn == 1) - mpn_div_qr_1_invert (inv, dp[0]); - else if (dn == 2) - mpn_div_qr_2_invert (inv, dp[1], dp[0]); - else - { - unsigned shift; - mp_limb_t d1, d0; - - d1 = dp[dn-1]; - d0 = dp[dn-2]; - assert (d1 > 0); - gmp_clz (shift, d1); - inv->shift = shift; - if (shift > 0) - { - d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift)); - d0 = (d0 << shift) | (dp[dn-3] >> (GMP_LIMB_BITS - shift)); - } - inv->d1 = d1; - inv->d0 = d0; - inv->di = mpn_invert_3by2 (d1, d0); - } -} - - -int -mpz_cmp_ui (const mpz_t u, unsigned long v) -{ - mp_size_t usize = u->_mp_size; - - if (usize < 0) - return -1; - else - return mpz_cmpabs_ui (u, v); -} - -int -mpn_cmp (mp_srcptr ap, mp_srcptr bp, mp_size_t n) -{ - while (--n >= 0) - { - if (ap[n] != bp[n]) - return ap[n] > bp[n] ? 1 : -1; - } - return 0; -} - -mp_limb_t -mpn_lshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt) -{ - mp_limb_t high_limb, low_limb; - unsigned int tnc; - mp_limb_t retval; - - assert (n >= 1); - assert (cnt >= 1); - assert (cnt < GMP_LIMB_BITS); - - up += n; - rp += n; - - tnc = GMP_LIMB_BITS - cnt; - low_limb = *--up; - retval = low_limb >> tnc; - high_limb = (low_limb << cnt); - - while (--n != 0) - { - low_limb = *--up; - *--rp = high_limb | (low_limb >> tnc); - high_limb = (low_limb << cnt); - } - *--rp = high_limb; - - return retval; -} - -mp_limb_t -mpn_rshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt) -{ - mp_limb_t high_limb, low_limb; - unsigned int tnc; - mp_limb_t retval; - - assert (n >= 1); - assert (cnt >= 1); - assert (cnt < GMP_LIMB_BITS); - - tnc = GMP_LIMB_BITS - cnt; - high_limb = *up++; - retval = (high_limb << tnc); - low_limb = high_limb >> cnt; - - while (--n != 0) - { - high_limb = *up++; - *rp++ = low_limb | (high_limb << tnc); - low_limb = high_limb >> cnt; - } - *rp = low_limb; - - return retval; -} - -int -mpz_invert (mpz_t r, const mpz_t u, const mpz_t m) -{ - mpz_t g, tr; - int invertible; - - if (u->_mp_size == 0 || mpz_cmpabs_ui (m, 1) <= 0) - return 0; - - mpz_init (g); - mpz_init (tr); - - mpz_gcdext (g, tr, NULL, u, m); - invertible = (mpz_cmp_ui (g, 1) == 0); - - if (invertible) - { - if (tr->_mp_size < 0) - { - if (m->_mp_size >= 0) - mpz_add (tr, tr, m); - else - mpz_sub (tr, tr, m); - } - mpz_swap (r, tr); - } - - mpz_clear (g); - mpz_clear (tr); - return invertible; -} - -/* Not matching current public gmp interface, rather corresponding to - the sbpi1_div_* functions. */ -mp_limb_t -mpn_div_qr_1_preinv (mp_ptr qp, mp_srcptr np, mp_size_t nn, - const struct gmp_div_inverse *inv) -{ - mp_limb_t d, di; - mp_limb_t r; - mp_ptr tp = NULL; - mp_size_t tn = 0; - - if (inv->shift > 0) - { - /* Shift, reusing qp area if possible. In-place shift if qp == np. */ - tp = qp; - if (!tp) - { - tn = nn; - - // tp = gmp_alloc_limbs (tn); - } - r = mpn_lshift (tp, np, nn, inv->shift); - np = tp; - } - else - r = 0; - - d = inv->d1; - di = inv->di; - while (--nn >= 0) - { - mp_limb_t q; - - gmp_udiv_qrnnd_preinv (q, r, r, np[nn], d, di); - if (qp) - qp[nn] = q; - } - //if (tn) - //gmp_free_limbs (tp, tn); - - return r >> inv->shift; -} - -mp_limb_t -mpn_add_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n) -{ - mp_size_t i; - mp_limb_t cy; - - for (i = 0, cy = 0; i < n; i++) - { - mp_limb_t a, b, r; - a = ap[i]; b = bp[i]; - r = a + cy; - cy = (r < cy); - r += b; - cy += (r < b); - rp[i] = r; - } - return cy; -} - -void -mpn_div_qr_2_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn, - const struct gmp_div_inverse *inv) -{ - unsigned shift; - mp_size_t i; - mp_limb_t d1, d0, di, r1, r0; - - assert (nn >= 2); - shift = inv->shift; - d1 = inv->d1; - d0 = inv->d0; - di = inv->di; - - if (shift > 0) - r1 = mpn_lshift (np, np, nn, shift); - else - r1 = 0; - - r0 = np[nn - 1]; - - i = nn - 2; - do - { - mp_limb_t n0, q; - n0 = np[i]; - gmp_udiv_qr_3by2 (q, r1, r0, r1, r0, n0, d1, d0, di); - - if (qp) - qp[i] = q; - } - while (--i >= 0); - - if (shift > 0) - { - assert ((r0 & (GMP_LIMB_MAX >> (GMP_LIMB_BITS - shift))) == 0); - r0 = (r0 >> shift) | (r1 << (GMP_LIMB_BITS - shift)); - r1 >>= shift; - } - - np[1] = r1; - np[0] = r0; -} - -mp_limb_t -mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl) -{ - mp_limb_t ul, cl, hpl, lpl, rl; - - assert (n >= 1); - - cl = 0; - do - { - ul = *up++; - gmp_umul_ppmm (hpl, lpl, ul, vl); - - lpl += cl; - cl = (lpl < cl) + hpl; - - rl = *rp; - lpl = rl - lpl; - cl += lpl > rl; - *rp++ = lpl; - } - while (--n != 0); - - return cl; -} - -void -mpn_div_qr_pi1 (mp_ptr qp, - mp_ptr np, mp_size_t nn, mp_limb_t n1, - mp_srcptr dp, mp_size_t dn, - mp_limb_t dinv) -{ - mp_size_t i; - - mp_limb_t d1, d0; - mp_limb_t cy, cy1; - mp_limb_t q; - - assert (dn > 2); - assert (nn >= dn); - - d1 = dp[dn - 1]; - d0 = dp[dn - 2]; - - assert ((d1 & GMP_LIMB_HIGHBIT) != 0); - /* Iteration variable is the index of the q limb. - * - * We divide <n1, np[dn-1+i], np[dn-2+i], np[dn-3+i],..., np[i]> - * by <d1, d0, dp[dn-3], ..., dp[0] > - */ - - i = nn - dn; - do - { - mp_limb_t n0 = np[dn-1+i]; - - if (n1 == d1 && n0 == d0) - { - q = GMP_LIMB_MAX; - mpn_submul_1 (np+i, dp, dn, q); - n1 = np[dn-1+i]; /* update n1, last loop's value will now be invalid */ - } - else - { - gmp_udiv_qr_3by2 (q, n1, n0, n1, n0, np[dn-2+i], d1, d0, dinv); - - cy = mpn_submul_1 (np + i, dp, dn-2, q); - - cy1 = n0 < cy; - n0 = n0 - cy; - cy = n1 < cy1; - n1 = n1 - cy1; - np[dn-2+i] = n0; - - if (cy != 0) - { - n1 += d1 + mpn_add_n (np + i, np + i, dp, dn - 1); - q--; - } - } - - if (qp) - qp[i] = q; - } - while (--i >= 0); - - np[dn - 1] = n1; -} - -void -mpn_div_qr_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn, - mp_srcptr dp, mp_size_t dn, - const struct gmp_div_inverse *inv) -{ - assert (dn > 0); - assert (nn >= dn); - - if (dn == 1) - np[0] = mpn_div_qr_1_preinv (qp, np, nn, inv); - else if (dn == 2) - mpn_div_qr_2_preinv (qp, np, nn, inv); - else - { - mp_limb_t nh; - unsigned shift; - - assert (inv->d1 == dp[dn-1]); - assert (inv->d0 == dp[dn-2]); - assert ((inv->d1 & GMP_LIMB_HIGHBIT) != 0); - - shift = inv->shift; - if (shift > 0) - nh = mpn_lshift (np, np, nn, shift); - else - nh = 0; - - mpn_div_qr_pi1 (qp, np, nn, nh, dp, dn, inv->di); - - if (shift > 0) - gmp_assert_nocarry (mpn_rshift (np, np, dn, shift)); - } -} - -void -mpz_powm (mpz_t r, const mpz_t b, const mpz_t e, const mpz_t m) -{ - mpz_t tr; - mpz_t base; - mp_size_t en, mn; - mp_srcptr mp; - struct gmp_div_inverse minv; - unsigned shift; - //mp_ptr tp = NULL; - mpz_t tp; - - //mpz_init(tp); - - en = GMP_ABS (e->_mp_size); - mn = GMP_ABS (m->_mp_size); - if (mn == 0) {} - //gmp_die ("mpz_powm: Zero modulo."); - - if (en == 0) - { - mpz_set_ui (r, mpz_cmpabs_ui (m, 1)); - return; - } - - mp = m->_mp_d; - mpn_div_qr_invert (&minv, mp, mn); - shift = minv.shift; - - if (shift > 0) - { - /* To avoid shifts, we do all our reductions, except the final - one, using a *normalized* m. */ - minv.shift = 0; - - // tp = gmp_alloc_limbs (mn); - gmp_assert_nocarry (mpn_lshift (tp->_mp_d, mp, mn, shift)); - mp = tp->_mp_d; - } - - mpz_init (base); - - if (e->_mp_size < 0) - { - if (!mpz_invert (base, b, m)) {} - //gmp_die ("mpz_powm: Negative exponent and non-invertible base."); - } - else - { - mp_size_t bn; - mpz_abs (base, b); - - bn = base->_mp_size; - if (bn >= mn) - { - mpn_div_qr_preinv (NULL, base->_mp_d, base->_mp_size, mp, mn, &minv); - bn = mn; - } - - /* We have reduced the absolute value. Now take care of the - sign. Note that we get zero represented non-canonically as - m. */ - if (b->_mp_size < 0) - { - mp_ptr bp = MPZ_REALLOC (base, mn); - gmp_assert_nocarry (mpn_sub (bp, mp, mn, bp, bn)); - bn = mn; - } - base->_mp_size = mpn_normalized_size (base->_mp_d, bn); - } - mpz_init_set_ui (tr, 1); - - while (--en >= 0) - { - mp_limb_t w = e->_mp_d[en]; - mp_limb_t bit; - - bit = GMP_LIMB_HIGHBIT; - do - { - mpz_mul (tr, tr, tr); - if (w & bit) - mpz_mul (tr, tr, base); - if (tr->_mp_size > mn) - { - mpn_div_qr_preinv (NULL, tr->_mp_d, tr->_mp_size, mp, mn, &minv); - tr->_mp_size = mpn_normalized_size (tr->_mp_d, mn); - } - bit >>= 1; - } - while (bit > 0); - } - - /* Final reduction */ - if (tr->_mp_size >= mn) - { - minv.shift = shift; - mpn_div_qr_preinv (NULL, tr->_mp_d, tr->_mp_size, mp, mn, &minv); - tr->_mp_size = mpn_normalized_size (tr->_mp_d, mn); - } - //if (tp) - //gmp_free_limbs (tp, mn); - - mpz_swap (r, tr); - mpz_clear (tr); - mpz_clear (base); -} - -int -mpn_cmp4 (mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn) -{ - if (an != bn) - return an < bn ? -1 : 1; - else - return mpn_cmp (ap, bp, an); -} - - -mp_size_t -mpz_abs_sub (mpz_t r, const mpz_t a, const mpz_t b) -{ - mp_size_t an = GMP_ABS (a->_mp_size); - mp_size_t bn = GMP_ABS (b->_mp_size); - int cmp; - mp_ptr rp; - - cmp = mpn_cmp4 (a->_mp_d, an, b->_mp_d, bn); - if (cmp > 0) - { - rp = MPZ_REALLOC (r, an); - gmp_assert_nocarry (mpn_sub (rp, a->_mp_d, an, b->_mp_d, bn)); - return mpn_normalized_size (rp, an); - } - else if (cmp < 0) - { - rp = MPZ_REALLOC (r, bn); - gmp_assert_nocarry (mpn_sub (rp, b->_mp_d, bn, a->_mp_d, an)); - return -mpn_normalized_size (rp, bn); - } - else - return 0; -} - -mp_limb_t -mpn_add_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b) -{ - mp_size_t i; - - assert (n > 0); - i = 0; - do - { - mp_limb_t r = ap[i] + b; - /* Carry out */ - b = (r < b); - rp[i] = r; - } - while (++i < n); - - return b; -} - - -mp_limb_t -mpn_add (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn) -{ - mp_limb_t cy; - - assert (an >= bn); - - cy = mpn_add_n (rp, ap, bp, bn); - if (an > bn) - cy = mpn_add_1 (rp + bn, ap + bn, an - bn, cy); - return cy; -} - -mp_size_t -mpz_abs_add (mpz_t r, const mpz_t a, const mpz_t b) -{ - mp_size_t an = GMP_ABS (a->_mp_size); - mp_size_t bn = GMP_ABS (b->_mp_size); - mp_ptr rp; - mp_limb_t cy; - - if (an < bn) - { - MPZ_SRCPTR_SWAP (a, b); - MP_SIZE_T_SWAP (an, bn); - } - - rp = MPZ_REALLOC (r, an + 1); - cy = mpn_add (rp, a->_mp_d, an, b->_mp_d, bn); - - rp[an] = cy; - - return an + cy; -} - -void -mpz_sub (mpz_t r, const mpz_t a, const mpz_t b) -{ - mp_size_t rn; - - if ( (a->_mp_size ^ b->_mp_size) >= 0) - rn = mpz_abs_sub (r, a, b); - else - rn = mpz_abs_add (r, a, b); - - r->_mp_size = a->_mp_size >= 0 ? rn : - rn; -} - -mp_limb_t -mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl) -{ - mp_limb_t ul, cl, hpl, lpl, rl; - - assert (n >= 1); - - cl = 0; - do - { - ul = *up++; - gmp_umul_ppmm (hpl, lpl, ul, vl); - - lpl += cl; - cl = (lpl < cl) + hpl; - - rl = *rp; - lpl = rl + lpl; - cl += lpl < rl; - *rp++ = lpl; - } - while (--n != 0); - - return cl; -} - -mp_limb_t -mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl) -{ - mp_limb_t ul, cl, hpl, lpl; - - assert (n >= 1); - - cl = 0; - do - { - ul = *up++; - gmp_umul_ppmm (hpl, lpl, ul, vl); - - lpl += cl; - cl = (lpl < cl) + hpl; - - *rp++ = lpl; - } - while (--n != 0); - - return cl; -} - - -mp_limb_t -mpn_mul (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn) -{ - assert (un >= vn); - assert (vn >= 1); - assert (!GMP_MPN_OVERLAP_P(rp, un + vn, up, un)); - assert (!GMP_MPN_OVERLAP_P(rp, un + vn, vp, vn)); - - /* We first multiply by the low order limb. This result can be - stored, not added, to rp. We also avoid a loop for zeroing this - way. */ - - rp[un] = mpn_mul_1 (rp, up, un, vp[0]); - - /* Now accumulate the product of up[] and the next higher limb from - vp[]. */ - - while (--vn >= 1) - { - rp += 1, vp += 1; - rp[un] = mpn_addmul_1 (rp, up, un, vp[0]); - } - return rp[un]; -} - - -void -mpz_mul (mpz_t r, const mpz_t u, const mpz_t v) -{ - int sign; - mp_size_t un, vn, rn; - mpz_t t; - mp_ptr tp; - - un = u->_mp_size; - vn = v->_mp_size; - - if (un == 0 || vn == 0) - { - r->_mp_size = 0; - return; - } - - sign = (un ^ vn) < 0; - - un = GMP_ABS (un); - vn = GMP_ABS (vn); - - mpz_init2 (t, (un + vn) * GMP_LIMB_BITS); - - tp = t->_mp_d; - if (un >= vn) - mpn_mul (tp, u->_mp_d, un, v->_mp_d, vn); - else - mpn_mul (tp, v->_mp_d, vn, u->_mp_d, un); - - rn = un + vn; - rn -= tp[rn-1] == 0; - - t->_mp_size = sign ? - rn : rn; - mpz_swap (r, t); - mpz_clear (t); -} - -void -mpn_copyd (mp_ptr d, mp_srcptr s, mp_size_t n) -{ - while (--n >= 0) - d[n] = s[n]; -} - -void -mpn_zero (mp_ptr rp, mp_size_t n) -{ - while (--n >= 0) - rp[n] = 0; -} - - -void -mpz_mul_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bits) -{ - mp_size_t un, rn; - mp_size_t limbs; - unsigned shift; - mp_ptr rp; - - un = GMP_ABS (u->_mp_size); - if (un == 0) - { - r->_mp_size = 0; - return; - } - - limbs = bits / GMP_LIMB_BITS; - shift = bits % GMP_LIMB_BITS; - - rn = un + limbs + (shift > 0); - rp = MPZ_REALLOC (r, rn); - if (shift > 0) - { - mp_limb_t cy = mpn_lshift (rp + limbs, u->_mp_d, un, shift); - rp[rn-1] = cy; - rn -= (cy == 0); - } - else - mpn_copyd (rp + limbs, u->_mp_d, un); - - mpn_zero (rp, limbs); - - r->_mp_size = (u->_mp_size < 0) ? - rn : rn; -} - -int -mpn_zero_p(mp_srcptr rp, mp_size_t n) -{ - return mpn_normalized_size (rp, n) == 0; -} - - -void -mpz_div_q_2exp (mpz_t q, const mpz_t u, mp_bitcnt_t bit_index, - enum mpz_div_round_mode mode) -{ - mp_size_t un, qn; - mp_size_t limb_cnt; - mp_ptr qp; - int adjust; - - un = u->_mp_size; - if (un == 0) - { - q->_mp_size = 0; - return; - } - limb_cnt = bit_index / GMP_LIMB_BITS; - qn = GMP_ABS (un) - limb_cnt; - bit_index %= GMP_LIMB_BITS; - - if (mode == ((un > 0) ? GMP_DIV_CEIL : GMP_DIV_FLOOR)) /* un != 0 here. */ - /* Note: Below, the final indexing at limb_cnt is valid because at - that point we have qn > 0. */ - adjust = (qn <= 0 - || !mpn_zero_p (u->_mp_d, limb_cnt) - || (u->_mp_d[limb_cnt] - & (((mp_limb_t) 1 << bit_index) - 1))); - else - adjust = 0; - - if (qn <= 0) - qn = 0; - else - { - qp = MPZ_REALLOC (q, qn); - - if (bit_index != 0) - { - mpn_rshift (qp, u->_mp_d + limb_cnt, qn, bit_index); - qn -= qp[qn - 1] == 0; - } - else - { - mpn_copyi (qp, u->_mp_d + limb_cnt, qn); - } - } - - q->_mp_size = qn; - - if (adjust) - mpz_add_ui (q, q, 1); - if (un < 0) - mpz_neg (q, q); -} - -void -mpz_tdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt) -{ - mpz_div_q_2exp (r, u, cnt, GMP_DIV_TRUNC); -} - -int -mpz_cmp (const mpz_t a, const mpz_t b) -{ - mp_size_t asize = a->_mp_size; - mp_size_t bsize = b->_mp_size; - - if (asize != bsize) - return (asize < bsize) ? -1 : 1; - else if (asize >= 0) - return mpn_cmp (a->_mp_d, b->_mp_d, asize); - else - return mpn_cmp (b->_mp_d, a->_mp_d, -asize); -} - -void -mpz_add (mpz_t r, const mpz_t a, const mpz_t b) -{ - mp_size_t rn; - - if ( (a->_mp_size ^ b->_mp_size) >= 0) - rn = mpz_abs_add (r, a, b); - else - rn = mpz_abs_sub (r, a, b); - - r->_mp_size = a->_mp_size >= 0 ? rn : - rn; -} - - -int -mpz_tstbit (const mpz_t d, mp_bitcnt_t bit_index) -{ - mp_size_t limb_index; - unsigned shift; - mp_size_t ds; - mp_size_t dn; - mp_limb_t w; - int bit; - - ds = d->_mp_size; - dn = GMP_ABS (ds); - limb_index = bit_index / GMP_LIMB_BITS; - if (limb_index >= dn) - return ds < 0; - - shift = bit_index % GMP_LIMB_BITS; - w = d->_mp_d[limb_index]; - bit = (w >> shift) & 1; - - if (ds < 0) - { - /* d < 0. Check if any of the bits below is set: If so, our bit - must be complemented. */ - if (shift > 0 && (mp_limb_t) (w << (GMP_LIMB_BITS - shift)) > 0) - return bit ^ 1; - while (--limb_index >= 0) - if (d->_mp_d[limb_index] > 0) - return bit ^ 1; - } - return bit; -} - -mp_bitcnt_t -mpn_limb_size_in_base_2 (mp_limb_t u) -{ - unsigned shift; - - assert (u > 0); - gmp_clz (shift, u); - return GMP_LIMB_BITS - shift; -} - -size_t -mpz_sizeinbase (const mpz_t u, int base) -{ - mp_size_t un, tn; - mp_srcptr up; - //mp_ptr tp; - mpz_t tp; - - mp_bitcnt_t bits; - struct gmp_div_inverse bi; - size_t ndigits; - - mpz_init(tp); - - assert (base >= 2); - assert (base <= 62); - - un = GMP_ABS (u->_mp_size); - if (un == 0) - return 1; - - up = u->_mp_d; - - bits = (un - 1) * GMP_LIMB_BITS + mpn_limb_size_in_base_2 (up[un-1]); - switch (base) - { - case 2: - return bits; - case 4: - return (bits + 1) / 2; - case 8: - return (bits + 2) / 3; - case 16: - return (bits + 3) / 4; - case 32: - return (bits + 4) / 5; - /* FIXME: Do something more clever for the common case of base - 10. */ - } - - //tp = gmp_alloc_limbs (un); - - mpn_copyi (tp->_mp_d, up, un); - mpn_div_qr_1_invert (&bi, base); - - tn = un; - ndigits = 0; - do - { - ndigits++; - mpn_div_qr_1_preinv (tp->_mp_d, tp->_mp_d, tn, &bi); - tn -= (tp->_mp_d[tn-1] == 0); - } - while (tn > 0); - - //gmp_free_limbs (tp, un); - return ndigits; -} - -int -mpz_sgn (const mpz_t u) -{ - return GMP_CMP (u->_mp_size, 0); -} - -mp_bitcnt_t -mpn_common_scan (mp_limb_t limb, mp_size_t i, mp_srcptr up, mp_size_t un, - mp_limb_t ux) -{ - unsigned cnt; - - assert (ux == 0 || ux == GMP_LIMB_MAX); - assert (0 <= i && i <= un ); - - while (limb == 0) - { - i++; - if (i == un) - return (ux == 0 ? ~(mp_bitcnt_t) 0 : un * GMP_LIMB_BITS); - limb = ux ^ up[i]; - } - gmp_ctz (cnt, limb); - return (mp_bitcnt_t) i * GMP_LIMB_BITS + cnt; -} - -void -mpz_abs (mpz_t r, const mpz_t u) -{ - mpz_set (r, u); - r->_mp_size = GMP_ABS (r->_mp_size); -} - - -mp_bitcnt_t -mpn_scan1 (mp_srcptr ptr, mp_bitcnt_t bit) -{ - mp_size_t i; - i = bit / GMP_LIMB_BITS; - - return mpn_common_scan ( ptr[i] & (GMP_LIMB_MAX << (bit % GMP_LIMB_BITS)), - i, ptr, i, 0); -} - -mp_bitcnt_t -mpz_scan1 (mpz_t u, mp_bitcnt_t starting_bit) -{ - mp_ptr up; - mp_size_t us, un, i; - mp_limb_t limb, ux; - - us = u->_mp_size; - un = GMP_ABS (us); - i = starting_bit / GMP_LIMB_BITS; - - /* Past the end there's no 1 bits for u>=0, or an immediate 1 bit - for u<0. Notice this test picks up any u==0 too. */ - if (i >= un) - return (us >= 0 ? ~(mp_bitcnt_t) 0 : starting_bit); - - up = u->_mp_d; - ux = 0; - limb = up[i]; - - if (starting_bit != 0) - { - if (us < 0) - { - ux = mpn_zero_p (up, i); - limb = ~ limb + ux; - ux = - (mp_limb_t) (limb >= ux); - } - - /* Mask to 0 all bits before starting_bit, thus ignoring them. */ - limb &= GMP_LIMB_MAX << (starting_bit % GMP_LIMB_BITS); - } - - return mpn_common_scan (limb, i, up, un, ux); -} - - -mp_bitcnt_t -mpz_make_odd (mpz_t r) -{ - mp_bitcnt_t shift; - - assert (r->_mp_size > 0); - /* Count trailing zeros, equivalent to mpn_scan1, because we know that there is a 1 */ - shift = mpn_scan1 (r->_mp_d, 0); - mpz_tdiv_q_2exp (r, r, shift); - - return shift; -} - -void -mpz_tdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d) -{ - mpz_div_qr (q, r, n, d, GMP_DIV_TRUNC); -} - -void -mpz_abs_add_bit (mpz_t d, mp_bitcnt_t bit_index) -{ - mp_size_t dn, limb_index; - mp_limb_t bit; - mp_ptr dp; - - dn = GMP_ABS (d->_mp_size); - - limb_index = bit_index / GMP_LIMB_BITS; - bit = (mp_limb_t) 1 << (bit_index % GMP_LIMB_BITS); - - if (limb_index >= dn) - { - mp_size_t i; - /* The bit should be set outside of the end of the number. - We have to increase the size of the number. */ - dp = MPZ_REALLOC (d, limb_index + 1); - - dp[limb_index] = bit; - for (i = dn; i < limb_index; i++) - dp[i] = 0; - dn = limb_index + 1; - } - else - { - mp_limb_t cy; - - dp = d->_mp_d; - - cy = mpn_add_1 (dp + limb_index, dp + limb_index, dn - limb_index, bit); - if (cy > 0) - { - dp = MPZ_REALLOC (d, dn + 1); - dp[dn++] = cy; - } - } - - d->_mp_size = (d->_mp_size < 0) ? - dn : dn; -} - -void -mpz_abs_sub_bit (mpz_t d, mp_bitcnt_t bit_index) -{ - mp_size_t dn, limb_index; - mp_ptr dp; - mp_limb_t bit; - - dn = GMP_ABS (d->_mp_size); - dp = d->_mp_d; - - limb_index = bit_index / GMP_LIMB_BITS; - bit = (mp_limb_t) 1 << (bit_index % GMP_LIMB_BITS); - - assert (limb_index < dn); - - gmp_assert_nocarry (mpn_sub_1 (dp + limb_index, dp + limb_index, - dn - limb_index, bit)); - dn = mpn_normalized_size (dp, dn); - d->_mp_size = (d->_mp_size < 0) ? - dn : dn; -} - -void -mpz_setbit (mpz_t d, mp_bitcnt_t bit_index) -{ - if (!mpz_tstbit (d, bit_index)) - { - if (d->_mp_size >= 0) - mpz_abs_add_bit (d, bit_index); - else - mpz_abs_sub_bit (d, bit_index); - } -} - -void -mpz_divexact (mpz_t q, const mpz_t n, const mpz_t d) -{ - gmp_assert_nocarry (mpz_div_qr (q, NULL, n, d, GMP_DIV_TRUNC)); -} - -#define mpz_odd_p(z) (((z)->_mp_size != 0) & (int) (z)->_mp_d[0]) -#define mpz_even_p(z) (! mpz_odd_p (z)) - -int -mpz_cmpabs (const mpz_t u, const mpz_t v) -{ - return mpn_cmp4 (u->_mp_d, GMP_ABS (u->_mp_size), - v->_mp_d, GMP_ABS (v->_mp_size)); -} - -void -mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v) -{ - mpz_t tu, tv, s0, s1, t0, t1; - mp_bitcnt_t uz, vz, gz; - mp_bitcnt_t power; - - if (u->_mp_size == 0) - { - /* g = 0 u + sgn(v) v */ - signed long sign = mpz_sgn (v); - mpz_abs (g, v); - if (s) - s->_mp_size = 0; - if (t) - mpz_set_si (t, sign); - return; - } - - if (v->_mp_size == 0) - { - /* g = sgn(u) u + 0 v */ - signed long sign = mpz_sgn (u); - mpz_abs (g, u); - if (s) - mpz_set_si (s, sign); - if (t) - t->_mp_size = 0; - return; - } - - mpz_init (tu); - mpz_init (tv); - mpz_init (s0); - mpz_init (s1); - mpz_init (t0); - mpz_init (t1); - - mpz_abs (tu, u); - uz = mpz_make_odd (tu); - mpz_abs (tv, v); - vz = mpz_make_odd (tv); - gz = GMP_MIN (uz, vz); - - uz -= gz; - vz -= gz; - - /* Cofactors corresponding to odd gcd. gz handled later. */ - if (tu->_mp_size < tv->_mp_size) - { - mpz_swap (tu, tv); - MPZ_SRCPTR_SWAP (u, v); - MPZ_PTR_SWAP (s, t); - MP_BITCNT_T_SWAP (uz, vz); - } - - /* Maintain - * - * u = t0 tu + t1 tv - * v = s0 tu + s1 tv - * - * where u and v denote the inputs with common factors of two - * eliminated, and det (s0, t0; s1, t1) = 2^p. Then - * - * 2^p tu = s1 u - t1 v - * 2^p tv = -s0 u + t0 v - */ - - /* After initial division, tu = q tv + tu', we have - * - * u = 2^uz (tu' + q tv) - * v = 2^vz tv - * - * or - * - * t0 = 2^uz, t1 = 2^uz q - * s0 = 0, s1 = 2^vz - */ - - mpz_tdiv_qr (t1, tu, tu, tv); - mpz_mul_2exp (t1, t1, uz); - - mpz_setbit (s1, vz); - power = uz + vz; - - if (tu->_mp_size > 0) - { - mp_bitcnt_t shift; - shift = mpz_make_odd (tu); - mpz_setbit (t0, uz + shift); - power += shift; - - for (;;) - { - int c; - c = mpz_cmp (tu, tv); - if (c == 0) - break; - - if (c < 0) - { - /* tv = tv' + tu - * - * u = t0 tu + t1 (tv' + tu) = (t0 + t1) tu + t1 tv' - * v = s0 tu + s1 (tv' + tu) = (s0 + s1) tu + s1 tv' */ - - mpz_sub (tv, tv, tu); - mpz_add (t0, t0, t1); - mpz_add (s0, s0, s1); - - shift = mpz_make_odd (tv); - mpz_mul_2exp (t1, t1, shift); - mpz_mul_2exp (s1, s1, shift); - } - else - { - mpz_sub (tu, tu, tv); - mpz_add (t1, t0, t1); - mpz_add (s1, s0, s1); - - shift = mpz_make_odd (tu); - mpz_mul_2exp (t0, t0, shift); - mpz_mul_2exp (s0, s0, shift); - } - power += shift; - } - } - else - mpz_setbit (t0, uz); - - /* Now tv = odd part of gcd, and -s0 and t0 are corresponding - cofactors. */ - - mpz_mul_2exp (tv, tv, gz); - mpz_neg (s0, s0); - - /* 2^p g = s0 u + t0 v. Eliminate one factor of two at a time. To - adjust cofactors, we need u / g and v / g */ - - mpz_divexact (s1, v, tv); - mpz_abs (s1, s1); - mpz_divexact (t1, u, tv); - mpz_abs (t1, t1); - - while (power-- > 0) - { - /* s0 u + t0 v = (s0 - v/g) u - (t0 + u/g) v */ - if (mpz_odd_p (s0) || mpz_odd_p (t0)) - { - mpz_sub (s0, s0, s1); - mpz_add (t0, t0, t1); - } - //assert (mpz_even_p (t0) && mpz_even_p (s0)); - mpz_tdiv_q_2exp (s0, s0, 1); - mpz_tdiv_q_2exp (t0, t0, 1); - } - - /* Arrange so that |s| < |u| / 2g */ - mpz_add (s1, s0, s1); - if (mpz_cmpabs (s0, s1) > 0) - { - mpz_swap (s0, s1); - mpz_sub (t0, t0, t1); - } - if (u->_mp_size < 0) - mpz_neg (s0, s0); - if (v->_mp_size < 0) - mpz_neg (t0, t0); - - mpz_swap (g, tv); - if (s) - mpz_swap (s, s0); - if (t) - mpz_swap (t, t0); - - mpz_clear (tu); - mpz_clear (tv); - mpz_clear (s0); - mpz_clear (s1); - mpz_clear (t0); - mpz_clear (t1); -} - - -void -mpz_addmul_ui (mpz_t r, const mpz_t u, unsigned long int v) -{ - mpz_t t; - mpz_init_set_ui (t, v); - mpz_mul (t, u, t); - mpz_add (r, r, t); - mpz_clear (t); -} - - -// STRING CONVERSION - -unsigned -mpn_base_power_of_two_p (unsigned b) -{ - switch (b) - { - case 2: return 1; - case 4: return 2; - case 8: return 3; - case 16: return 4; - case 32: return 5; - case 64: return 6; - case 128: return 7; - case 256: return 8; - default: return 0; - } -} - - - -void -mpn_get_base_info (struct mpn_base_info *info, mp_limb_t b) -{ - mp_limb_t m; - mp_limb_t p; - unsigned exp; - - m = GMP_LIMB_MAX / b; - for (exp = 1, p = b; p <= m; exp++) - p *= b; - - info->exp = exp; - info->bb = p; -} - -int isspace_gpu(unsigned char c) { - if (c == '\n' || c == ' ' || c == '\t' || c == '\r' || c == '\f' || c == '\v') - return 1; - return 0; -} - -int strlen_c(const char *c) { - - // rather naive implementation – we assume a string is terminated, and is not 0 characters long. - - int i = 0; - while (1) { - if (c[i] == '\0') - return i; - i++; - } - return i; -} - -mp_size_t -mpn_set_str_bits (mp_ptr rp, const unsigned char *sp, size_t sn, - unsigned bits) -{ - mp_size_t rn; - mp_limb_t limb; - unsigned shift; - - for (limb = 0, rn = 0, shift = 0; sn-- > 0; ) - { - limb |= (mp_limb_t) sp[sn] << shift; - shift += bits; - if (shift >= GMP_LIMB_BITS) - { - shift -= GMP_LIMB_BITS; - rp[rn++] = limb; - /* Next line is correct also if shift == 0, - bits == 8, and mp_limb_t == unsigned char. */ - limb = (unsigned int) sp[sn] >> (bits - shift); - } - } - if (limb != 0) - rp[rn++] = limb; - else - rn = mpn_normalized_size (rp, rn); - return rn; -} - -mp_size_t -mpn_set_str_other (mp_ptr rp, const unsigned char *sp, size_t sn, - mp_limb_t b, const struct mpn_base_info *info) -{ - mp_size_t rn; - mp_limb_t w; - unsigned k; - size_t j; - - assert (sn > 0); - - k = 1 + (sn - 1) % info->exp; - - j = 0; - w = sp[j++]; - while (--k != 0) - w = w * b + sp[j++]; - - rp[0] = w; - - for (rn = 1; j < sn;) - { - mp_limb_t cy; - - w = sp[j++]; - for (k = 1; k < info->exp; k++) - w = w * b + sp[j++]; - - cy = mpn_mul_1 (rp, rp, rn, info->bb); - cy += mpn_add_1 (rp, rp, rn, w); - if (cy > 0) - rp[rn++] = cy; - } - assert (j == sn); - - return rn; -} - - -int -mpz_set_str (mpz_t r, const char *sp, int base) -{ - unsigned bits, value_of_a; - mp_size_t rn, alloc; - mp_ptr rp; - size_t dn, sn; - int sign; - unsigned char dp[4096]; - - assert (base == 0 || (base >= 2 && base <= 62)); - - while (isspace_gpu( (unsigned char) *sp)) - sp++; - - sign = (*sp == '-'); - sp += sign; - - if (base == 0) - { - if (sp[0] == '0') - { - if (sp[1] == 'x' || sp[1] == 'X') - { - base = 16; - sp += 2; - } - else if (sp[1] == 'b' || sp[1] == 'B') - { - base = 2; - sp += 2; - } - else - base = 8; - } - else - base = 10; - } - - if (!*sp) - { - r->_mp_size = 0; - return -1; - } - sn = strlen_c(sp); - //dp = (unsigned char *) gmp_alloc (sn); - - - value_of_a = (base > 36) ? 36 : 10; - for (dn = 0; *sp; sp++) - { - unsigned digit; - - if (isspace_gpu ((unsigned char) *sp)) - continue; - else if (*sp >= '0' && *sp <= '9') - digit = *sp - '0'; - else if (*sp >= 'a' && *sp <= 'z') - digit = *sp - 'a' + value_of_a; - else if (*sp >= 'A' && *sp <= 'Z') - digit = *sp - 'A' + 10; - else - digit = base; /* fail */ - - if (digit >= (unsigned) base) - { - //gmp_free (dp, sn); - r->_mp_size = 0; - return -1; - } - - dp[dn++] = digit; - } - - if (!dn) - { - //gmp_free (dp, sn); - r->_mp_size = 0; - return -1; - } - bits = mpn_base_power_of_two_p (base); - - if (bits > 0) - { - alloc = (dn * bits + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS; - rp = MPZ_REALLOC (r, alloc); - rn = mpn_set_str_bits (rp, dp, dn, bits); - } - else - { - struct mpn_base_info info; - mpn_get_base_info (&info, base); - alloc = (dn + info.exp - 1) / info.exp; - rp = MPZ_REALLOC (r, alloc); - rn = mpn_set_str_other (rp, dp, dn, base, &info); - /* Normalization, needed for all-zero input. */ - assert (rn > 0); - rn -= rp[rn-1] == 0; - } - assert (rn <= alloc); - //gmp_free (dp, sn); - - r->_mp_size = sign ? - rn : rn; - - return 0; -} - - - -int -mpz_init_set_str (mpz_t r, const char *sp, int base) -{ - mpz_init (r); - return mpz_set_str (r, sp, base); -} - - -// Montgomery multiplication - -void mont_prepare(mpz_t b, mpz_t e, mpz_t m, - mpz_t r, mpz_t r_1, - mpz_t ni, mpz_t M, mpz_t x - ); - -void mont_product(mpz_t ret, - const mpz_t a, const mpz_t b, - const mpz_t r, const mpz_t r_1, - const mpz_t n, const mpz_t ni - ); - -void mont_modexp(mpz_t ret, - mpz_t a, mpz_t e, - const mpz_t M, - const mpz_t n, const mpz_t ni, - const mpz_t r, const mpz_t r_1 - ); - -void mont_finish(mpz_t ret, - const mpz_t xx, - const mpz_t n, const mpz_t ni, - const mpz_t r, const mpz_t r_1 - ); - -void mont_prepare_even_modulus(mpz_t m, mpz_t q, mpz_t powj); - -void mont_mulmod(mpz_t res, const mpz_t a, const mpz_t b, const mpz_t mod); - - - - -void mont_prepare_even_modulus(mpz_t m, mpz_t q, mpz_t powj) { - - mpz_t two; // powj == 2^j - - mpz_init_set_ui(two, 2); - - mp_bitcnt_t j = mpz_scan1(m, 0); - - mpz_tdiv_q_2exp(q,m,j); - mpz_mul_2exp(powj,two,j - 1); - - mpz_clear(two); - -} - -// CPU -void mont_prepare(mpz_t b, mpz_t e, mpz_t m, - mpz_t r, mpz_t r_1, - mpz_t ni, mpz_t M, mpz_t x) { - - // MARK: break this up, reduce the amount of temporary variables - - // r and n (modulus) must be relatively prime (this is a given if n (modulus) is odd) - - // calculate r, which must be larger than the modulo and also a power of 2 - - mpz_t one, oo; // some helper variables - mpz_init_set_si(one,1); - mpz_init_set_si(oo,0); - - size_t len = mpz_sizeinbase(m,2); - - mpz_mul_2exp(r,one,len); - - mpz_set_si(one, 0); - - - mpz_gcdext(one, r_1, ni, r, m); // set r_1 and ni - - int sgn = mpz_sgn(r_1); - - mpz_abs(r_1, r_1); - mpz_abs(ni, ni); - - if (sgn == -1) { - mpz_sub(ni, r, ni); - mpz_sub(r_1, m, r_1); - } - - if (mpz_cmp_ui(one, 1)) - assert(0); - - mpz_mul(one, r, r_1); - mpz_mul(oo,ni,m); - - mpz_sub(one, one, oo); // oo must be one - - if (mpz_cmp_ui(one, 1)) - assert(0); - - mpz_mul(M, b, r); - mpz_mod(M, M, m); // set M - - mpz_mod(x, r, m); // set x - - mpz_clear(one); - mpz_clear(oo); - -} - -// maybe GPU? -// MARK: n MUST be an odd number -void mont_modexp(mpz_t ret, - mpz_t a, mpz_t e, - const mpz_t M, - const mpz_t n, const mpz_t ni, - const mpz_t r, const mpz_t r_1 - ) { - - mpz_t aa,xx; - - mpz_init_set(aa, M); - mpz_init_set(xx, a); - - int k = (int)mpz_sizeinbase(e,2); - - for (int i = k - 1; i >= 0; i--) { - - mont_product(xx, xx, xx, r, r_1, n, ni); - - if (mpz_tstbit(e, i)) - mont_product(xx, aa, xx, r, r_1, n, ni); - - } - - mpz_set(ret, xx); - - mpz_clear(aa); - mpz_clear(xx); - -} - -void mont_finish(mpz_t ret, - const mpz_t xx, - const mpz_t n, const mpz_t ni, - const mpz_t r, const mpz_t r_1 - ) { - - - mpz_t x,one; - - mpz_init(x); - mpz_init_set_ui(one, 1); - - mont_product(x, xx, one, r, r_1, n, ni); - - mpz_set(ret, x); - - mpz_clear(x); - mpz_clear(one); - -} - - -// GPU -void mont_product(mpz_t ret, - const mpz_t a, const mpz_t b, - const mpz_t r, const mpz_t r_1, - const mpz_t n, const mpz_t ni - ) { - - mpz_t t,m,u; - - mpz_init(t); - mpz_init(m); - mpz_init(u); - - - - mont_mulmod(t, b, a, r); - - mont_mulmod(m, ni, t, r); - - mpz_t ab,mn; - - mpz_init(ab); - mpz_init(mn); - - mpz_mul(ab, a, b); - mpz_mul(mn, m, n); - - mpz_add(ab, ab, mn); - - unsigned long sz = mpz_sizeinbase(r,2) - 1; - mpz_tdiv_q_2exp(u, ab, sz); // this is essentially a bit shift, instead of a division - - if (mpz_cmp(u, n) >= 0) - mpz_sub(u, u, n); - - mpz_set(ret, u); - - mpz_clear(ab); - mpz_clear(mn); - mpz_clear(t); - mpz_clear(m); - mpz_clear(u); - -} - -// not the fastest... but it does not increase the variable sizes -void mont_mulmod(mpz_t res, const mpz_t a, const mpz_t b, const mpz_t mod) { - - mpz_t aa, bb; - mpz_init_set(aa, a); - mpz_init_set(bb,b); - - mpz_mod(aa, aa, mod); // in case a is bigger - - while (mpz_cmp_ui(bb, 0) > 0) { - if (mpz_odd_p(bb)) { - mpz_add(res, res, aa); - mpz_mod(res, res, mod); - } - - mpz_mul_2exp(aa,aa,1); - mpz_mod(aa, aa, mod); - mpz_tdiv_q_2exp(bb, bb, 1); - } -} - - - -void montgomery(const char *signature, - const char *exponent, - const char *modulus, - const char *base, - unsigned long *valid) -{ - - - int radix = 16; - - mpz_t b,e,m,res; - - - - mpz_init(res); - - mpz_init_set_str(b,base,radix); // M - mpz_init_set_str(e,exponent,radix); - mpz_init_set_str(m,modulus,radix); // n - - mpz_t r, r_1, ni, M, x; - mpz_init(r); // MARK: I think I have to destroy these myself - mpz_init(r_1); - mpz_init(ni); - mpz_init(M); - mpz_init(x); - - - mpz_t xx; - mpz_init(xx); - - - - - if (mpz_even_p(m)) { - - mpz_t bb, x1, x2, q, powj; - mpz_init(bb); - mpz_init(x1); - mpz_init(x2); - mpz_init(q); - mpz_init(powj); - - mont_prepare_even_modulus(m, q, powj); - - // q is uneven, so we can use regular modexp - // MARK: we can improve the efficiency here by doing simple reductions - - mpz_mod(bb, b, q); // reductions like this - - mont_prepare(bb, e, q, r, r_1, ni, M, x); - mont_modexp(xx, x, e, M, q, ni, r, r_1); - mont_finish(x1, xx, q, ni, r, r_1); - - - // MARK: we can also reduce and really speed this up as well -> binary method? - mpz_powm(x2, b, e, powj); - - mpz_t y, q_1; - mpz_init(y); - mpz_init(q_1); - - mpz_sub(y, x2, x1); - - mpz_invert(q_1, q, powj); - - mpz_mul(y, y, q_1); - mpz_mod(y, y, powj); - - mpz_addmul(x1, q, y); - - mpz_set(res, x1); - - - - } else { - - mont_prepare(b, e, m, r, r_1, ni, M, x); - - mont_modexp(xx, x, e, M, m, ni, r, r_1); - - mont_finish(res, xx, m, ni, r, r_1); - - - } - - - - - - - - mpz_t sig; - mpz_init_set_str(sig,signature,radix); - - if (mpz_cmp(sig,res) == 0) { - - *valid = 1; - - } else { - - } - - -} diff --git a/source/gmp_GPU.h b/source/gmp_GPU.h @@ -1,27 +0,0 @@ -// -// gmp_GPU.h -// lib-gpu-verify -// -// Created by Cedric Zwahlen on 25.11.2023. -// - -#ifndef gmp_GPU_h -#define gmp_GPU_h - -//#include <stdio.h> - -//#include <assert.h> -#include <ctype.h> -#include <limits.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - - -void montgomery(const char *signature, - const char *exponent, - const char *modulus, - const char *base, - unsigned long *valid); - -#endif /* gmp_GPU_h */ diff --git a/source/lib-gpu-verify.c b/source/lib-gpu-verify.c @@ -7,23 +7,75 @@ #include "montgomery.h" +#include "time.h" + int main(int argc, char** argv) { + mont_rsa_tests(); + mpz_t res; mpz_init(res); //mont_go(res,"13", "0F", "C7",16); - //mont_go(res, "5BD6158BDE0AC0655B6FCEA57994011D18B6B3C9E5FF75C45FC1E5EC2C1F26D6AB8547A17C0BC15D40F4346CFE74CF4EB417E6850D45C3B49E9389DAF400BC5E5B3F5D8E1E45A23DD042A87E82703209F9EA9808A002FEC00C96A5F0D9B7673B4B0A224438D81C0A9CEAD0DD22802B409230072768E73688D63EAB1C9BC242FAEDCFE0C8478B38254BAAC07AD6F82A27A0C3893FCB604BB57158F9125027AECC91D55B364B5C2BB9FE07FB6AB69F5A65112A2B7D5A805CA9B2C1CB75D315DE345BA68100DD5E46FA3BA54B614C298E60EBAF95CEC738DA2513736ECE051D153CECAC29F4A432A5FEB287E2A1B8C4640C58FF9E9E7DB6889E4865D1F1C8CF4E47", "010001", "00BB5175E55C2F1BBAE52B0C1225F43385FF54B3BFEA88B42B21044328815B8742E303C843ABE76D147861AE92D563592EFD748BF2E5BE4D76793FB32FCF6B38F755D408D114C9DF89B3FAA77EDF0C9358AC3BC23C90CDAA8337927A3530DCF2AD6EFC023C96A7932F8A7935B9B3F5C84668B41FB39059A1B723A40D59A7B1BD03F56933D641409F2A49E614BBAA9F2573ED24899840585B73329A01071793332BA92A0C9033D7004B45FD01C3A850125FA2E4A40818F8E233B7B7595ABAB04B84AE88E4F7B516359EAB7C285F399A3EFF467113DDBDB17981F2F4F2DE405BA18863046570C1621AD9446CE8A3884893CEF50933CB60053B6862E2443CC8554121", 16); + struct timespec t1, t2; + + clock_gettime(CLOCK_REALTIME, &t1); + + mont_go(res, "5BD6158BDE0AC0655B6FCEA57994011D18B6B3C9E5FF75C45FC1E5EC2C1F26D6AB8547A17C0BC15D40F4346CFE74CF4EB417E6850D45C3B49E9389DAF400BC5E5B3F5D8E1E45A23DD042A87E82703209F9EA9808A002FEC00C96A5F0D9B7673B4B0A224438D81C0A9CEAD0DD22802B409230072768E73688D63EAB1C9BC242FAEDCFE0C8478B38254BAAC07AD6F82A27A0C3893FCB604BB57158F9125027AECC91D55B364B5C2BB9FE07FB6AB69F5A65112A2B7D5A805CA9B2C1CB75D315DE345BA68100DD5E46FA3BA54B614C298E60EBAF95CEC738DA2513736ECE051D153CECAC29F4A432A5FEB287E2A1B8C4640C58FF9E9E7DB6889E4865D1F1C8CF4E47", "010001", "00BB5175E55C2F1BBAE52B0C1225F43385FF54B3BFEA88B42B21044328815B8742E303C843ABE76D147861AE92D563592EFD748BF2E5BE4D76793FB32FCF6B38F755D408D114C9DF89B3FAA77EDF0C9358AC3BC23C90CDAA8337927A3530DCF2AD6EFC023C96A7932F8A7935B9B3F5C84668B41FB39059A1B723A40D59A7B1BD03F56933D641409F2A49E614BBAA9F2573ED24899840585B73329A01071793332BA92A0C9033D7004B45FD01C3A850125FA2E4A40818F8E233B7B7595ABAB04B84AE88E4F7B516359EAB7C285F399A3EFF467113DDBDB17981F2F4F2DE405BA18863046570C1621AD9446CE8A3884893CEF50933CB60053B6862E2443CC8554121", 16); //mont_go(res, "13", "05", "31",10); - //char str[2048]; - //mpz_get_str(str, 16, res); // result is base 10! + char str[2048]; + mpz_get_str(str, 16, res); // result is base 10! + + printf("%s\n",str); + + clock_gettime(CLOCK_REALTIME, &t2); + + + printf("\nCPU verification (with montgomery) took %ld ms\n", (t2.tv_nsec - t1.tv_nsec) / 1000000); + + + + + char *template = "(genkey(rsa(nbits 4:2048)))"; + gcry_sexp_t parms; + + gcry_sexp_new(&parms, template, strlen(template), 1); + + + gcry_sexp_t key; + + gcry_pk_genkey(&key,parms); + + char *val = "1234567890ABCDEF"; // MARK: try random values as well + gcry_mpi_t m_mpi = gcry_mpi_new((int)strlen(val) * 8); + size_t scanned = 0; + + gcry_mpi_scan(&m_mpi, GCRYMPI_FMT_HEX, val, 0, &scanned); + + gcry_sexp_t toSign; + size_t errOff = 0; + char *dataformat = "(data (flags raw) (value %m))"; + + gcry_sexp_build(&toSign,&errOff,dataformat,m_mpi); + + gcry_sexp_t resSign; + + gcry_pk_sign(&resSign, toSign, key); + + clock_gettime(CLOCK_REALTIME, &t1); + + int gkvy = gcry_pk_verify(resSign, toSign, key); - //printf("%s\n",str); + clock_gettime(CLOCK_REALTIME, &t2); + + if (gkvy == 0) { + printf("\ngcry verification took %ld micro seconds\n", (t2.tv_nsec - t1.tv_nsec) / 1000); + } //mont_go(res, "00956E3E7B09F7FECEF26CA44FFD69F19DC8DB6C3A29A707C2CDAD56994A58D6ACB8B275678D0D8670D3C716AC5C98398C8067943C7292F787F5451E8202F4C8BAEFA6CA787BC79B73A99CC4C85743EC7320E17195D560A380356A9D32AA81EF276A9DE8B9F6728647851AAD0090A458FB928BCE86884BD7CC7AC3CF226CE546E596135A948B820E1865D6A3395DF2BD5EB26FE5259B2B950CC61F887C0D5A81F77549D8F792D32552870358EC5B2B45552C35829D732CC1A08898FD2FFDFF5EBFE0BEE7D5702FCA240B377BFE7D2821E123F2A146725D01A5CF0A6C89FB7E73CA6F3B8640C44B0FA1A51B429BB3D4668495F20A25FB4185831C3B479C5041713C", "010001", "00BB5175E55C2F1BBAE52B0C1225F43385FF54B3BFEA88B42B21044328815B8742E303C843ABE76D147861AE92D563592EFD748BF2E5BE4D76793FB32FCF6B38F755D408D114C9DF89B3FAA77EDF0C9358AC3BC23C90CDAA8337927A3530DCF2AD6EFC023C96A7932F8A7935B9B3F5C84668B41FB39059A1B723A40D59A7B1BD03F56933D641409F2A49E614BBAA9F2573ED24899840585B73329A01071793332BA92A0C9033D7004B45FD01C3A850125FA2E4A40818F8E233B7B7595ABAB04B84AE88E4F7B516359EAB7C285F399A3EFF467113DDBDB17981F2F4F2DE405BA18863046570C1621AD9446CE8A3884893CEF50933CB60053B6862E2443CC8554121", 16); @@ -39,7 +91,7 @@ int main(int argc, char** argv) // montgomery_test(); - mont_rsa_tests(); + diff --git a/source/rsa-test.c b/source/rsa-test.c @@ -21,7 +21,7 @@ //#include "gmp.h" - #include "gmp_GPU.h" + //#include "gmp_GPU.h" // //#include "RSA-Montgomery.h" @@ -641,12 +641,6 @@ int rsa_tests(void) { printf("--"); -/* - generate_random_pairs(q, u, - r, v, - s, w, - t, x, gen_n_pairs); -*/ unsigned long result = 0; @@ -680,10 +674,10 @@ int rsa_tests(void) { } // returns how many public keys were read – either 1 or n -int mont_pairs_from_files(char *bases, - char *exponents, - char *moduli, - char *signatures, +int mont_pairs_from_files(char *bases, unsigned long *b_off, + char *exponents, unsigned long *e_off, + char *moduli, unsigned long *m_off, + char *signatures, unsigned long *s_off, unsigned int *n) { FILE *pkfile; @@ -700,23 +694,37 @@ int mont_pairs_from_files(char *bases, int i = 0; + unsigned long b_offset = 0; + unsigned long e_offset = 0; + unsigned long m_offset = 0; + unsigned long s_offset = 0; + while (1) { - char n_buf[2048]; + char n_buf[2048]; // need to be 0 char e_buf[2048]; + memset(n_buf, 0, 2048); + memset(e_buf, 0, 2048); + if (fscanf(pkfile, "%s %s ", n_buf,e_buf) == -1) break; unsigned long n_buf_len = strlen(n_buf); unsigned long e_buf_len = strlen(e_buf); - memcpy(moduli, n_buf, n_buf_len); - memcpy(exponents, e_buf, e_buf_len); + memcpy(&moduli[m_offset], n_buf, n_buf_len); + memcpy(&exponents[e_offset], e_buf, e_buf_len); + + m_off[i] = m_offset; + e_off[i] = e_offset; + + m_offset += n_buf_len + 1; + e_offset += e_buf_len + 1; i++; - break; // testing with just one + // break; // testing with just one } int j = 0; @@ -726,18 +734,27 @@ int mont_pairs_from_files(char *bases, char m_buf[2048]; // temp storage, large enough char s_buf[2048]; + memset(m_buf, 0, 2048); + memset(s_buf, 0, 2048); + if (fscanf(msfile, "%s %s ", m_buf,s_buf) == -1) break; unsigned long m_buf_len = strlen(m_buf); unsigned long s_buf_len = strlen(s_buf); - memcpy(bases, m_buf, m_buf_len); - memcpy(signatures, s_buf, s_buf_len); + memcpy(&bases[b_offset], m_buf, m_buf_len); + memcpy(&signatures[s_offset], s_buf, s_buf_len); + + b_off[j] = b_offset; + s_off[j] = s_offset; + + b_offset += m_buf_len + 1; + s_offset += s_buf_len + 1; j++; - break; // testing with just one + // break; // testing with just one } @@ -751,10 +768,10 @@ int mont_pairs_from_files(char *bases, } -int mont_verify_pairs_with_opencl(char *bases, - char *exponents, - char *moduli, - char *signatures, +int mont_verify_pairs_with_opencl(char *bases, unsigned long *b_off, + char *exponents, unsigned long *e_off, + char *moduli, unsigned long *m_off, + char *signatures, unsigned long *s_off, const unsigned int n, const unsigned int pks, unsigned long *result) { @@ -895,21 +912,13 @@ int mont_verify_pairs_with_opencl(char *bases, if (err != CL_SUCCESS) { size_t len; - char buffer[352323]; + char buffer[4096]; printf("Error: Failed to build program executable!\n"); clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); printf("%s\n", buffer); exit(1); } - - size_t len; - char buffer[3523]; - - clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); - printf("%s\n", buffer); - - // Create the compute kernel in the program we wish to run // @@ -927,43 +936,32 @@ int mont_verify_pairs_with_opencl(char *bases, cl_mem mod_mem; cl_mem comp_mem; -// cl_mem sig_len; -// cl_mem exp_len; -// cl_mem mod_len; -// cl_mem comp_len; + cl_mem sig_len; + cl_mem exp_len; + cl_mem mod_len; + cl_mem comp_len; cl_mem valid; // needs to be a buffer because it goes out unsigned long signature_is_valid = 0; - size_t moduli_len = strlen(moduli) + 1; - size_t exponents_len = strlen(exponents) + 1; - size_t signatures_len = strlen(signatures) + 1; - size_t bases_len = strlen(bases) + 1; - - - + unsigned long moduli_len = m_off[pks-1] + strlen(&moduli[m_off[pks-1]]) + 1; + unsigned long exponents_len = e_off[pks-1] + strlen(&exponents[e_off[pks-1]]) + 1; + unsigned long signatures_len = s_off[n-1] + strlen(&signatures[s_off[n-1]]) + 1; + unsigned long bases_len = b_off[n-1] + strlen(&bases[b_off[n-1]]) + 1; - if (pks == 1) { - mod_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, moduli_len, NULL, NULL); - exp_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, exponents_len, NULL, NULL); - } else { - - } + mod_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, moduli_len, NULL, NULL); + exp_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, exponents_len, NULL, NULL); sig_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, signatures_len , NULL, NULL); comp_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, bases_len, NULL, NULL); // the base, to compare whether we get the same signature -// -// if (pks == 1) { -// mod_len = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned long), NULL, NULL); -// exp_len = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned long), NULL, NULL); -// } else { -// -// } -// -// sig_len = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned long) * n, NULL, NULL); -// comp_len = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned long) * n, NULL, NULL); + + mod_len = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned long) * pks, NULL, NULL); + exp_len = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned long) * pks, NULL, NULL); + + sig_len = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned long) * n, NULL, NULL); + comp_len = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned long) * n, NULL, NULL); valid = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(unsigned long) ,NULL, NULL); @@ -978,19 +976,16 @@ int mont_verify_pairs_with_opencl(char *bases, // Write our data set into the input array in device memory // err = clEnqueueWriteBuffer(commands, sig_mem, CL_TRUE, 0, signatures_len, signatures, 0, NULL, NULL); -// err |= clEnqueueWriteBuffer(commands, sig_len, CL_TRUE, 0,sizeof(unsigned long) * n, s_len, 0, NULL, NULL); + err |= clEnqueueWriteBuffer(commands, sig_len, CL_TRUE, 0,sizeof(unsigned long) * n, s_off, 0, NULL, NULL); - if (pks == 1) { - err |= clEnqueueWriteBuffer(commands, exp_mem, CL_TRUE, 0, exponents_len, exponents, 0, NULL, NULL); -// err |= clEnqueueWriteBuffer(commands, exp_len, CL_TRUE, 0,sizeof(unsigned long), e_len, 0, NULL, NULL); - err |= clEnqueueWriteBuffer(commands, mod_mem, CL_TRUE, 0, moduli_len, moduli, 0, NULL, NULL); -// err |= clEnqueueWriteBuffer(commands, mod_len, CL_TRUE, 0,sizeof(unsigned long), m_len, 0, NULL, NULL); - } else { - - } + err |= clEnqueueWriteBuffer(commands, exp_mem, CL_TRUE, 0, exponents_len, exponents, 0, NULL, NULL); + err |= clEnqueueWriteBuffer(commands, exp_len, CL_TRUE, 0, sizeof(unsigned long) * pks, e_off, 0, NULL, NULL); + err |= clEnqueueWriteBuffer(commands, mod_mem, CL_TRUE, 0, moduli_len, moduli, 0, NULL, NULL); + err |= clEnqueueWriteBuffer(commands, mod_len, CL_TRUE, 0, sizeof(unsigned long) * pks, m_off, 0, NULL, NULL); err |= clEnqueueWriteBuffer(commands, comp_mem, CL_TRUE, 0, bases_len, bases, 0, NULL, NULL); -// err |= clEnqueueWriteBuffer(commands, comp_len, CL_TRUE, 0,sizeof(unsigned long) * n, b_len, 0, NULL, NULL); + err |= clEnqueueWriteBuffer(commands, comp_len, CL_TRUE, 0,sizeof(unsigned long) * n, b_off, 0, NULL, NULL); + err |= clEnqueueWriteBuffer(commands, valid, CL_TRUE, 0, sizeof(unsigned long), &signature_is_valid, 0, NULL, NULL); if (err != CL_SUCCESS) { @@ -1002,21 +997,21 @@ int mont_verify_pairs_with_opencl(char *bases, // err = 0; err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &sig_mem); -// err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &sig_len); - err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &exp_mem); -// err |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &exp_len); - err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &mod_mem); -// err |= clSetKernelArg(kernel, 5, sizeof(cl_mem), &mod_len); - err |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &comp_mem); -// err |= clSetKernelArg(kernel, 7, sizeof(cl_mem), &comp_len); - err |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &valid); - //err |= clSetKernelArg(kernel, 5, sizeof(unsigned int), &n); - //err |= clSetKernelArg(kernel, 6, sizeof(unsigned int), &pks); + err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &sig_len); + err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &exp_mem); + err |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &exp_len); + err |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &mod_mem); + err |= clSetKernelArg(kernel, 5, sizeof(cl_mem), &mod_len); + err |= clSetKernelArg(kernel, 6, sizeof(cl_mem), &comp_mem); + err |= clSetKernelArg(kernel, 7, sizeof(cl_mem), &comp_len); + err |= clSetKernelArg(kernel, 8, sizeof(cl_mem), &valid); + err |= clSetKernelArg(kernel, 9, sizeof(unsigned int), &n); + err |= clSetKernelArg(kernel, 10, sizeof(unsigned int), &pks); //err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count); if (err != CL_SUCCESS) { - printf("RSA-Error: Failed to set kernel arguments! %d\n", err); + printf("Error: Failed to set kernel arguments! %d\n", err); exit(1); } @@ -1042,6 +1037,11 @@ int mont_verify_pairs_with_opencl(char *bases, printf("KERNEL IS EXECUTING...\n"); + struct timespec t1, t2; + + clock_gettime(CLOCK_REALTIME, &t1); + + // Wait for the command commands to get serviced before reading back results // clFinish(commands); @@ -1055,6 +1055,12 @@ int mont_verify_pairs_with_opencl(char *bases, printf("Error: Failed to read output array! %d\n", err); exit(1); } + + clock_gettime(CLOCK_REALTIME, &t2); + + + + printf("\nGPU verification took %ld ms\n", (t2.tv_nsec - t1.tv_nsec) / 1000000); *result = signature_is_valid; @@ -1065,10 +1071,10 @@ int mont_verify_pairs_with_opencl(char *bases, clReleaseMemObject(mod_mem); clReleaseMemObject(sig_mem); -// clReleaseMemObject(comp_len); -// clReleaseMemObject(exp_len); -// clReleaseMemObject(mod_len); -// clReleaseMemObject(sig_len); + clReleaseMemObject(comp_len); + clReleaseMemObject(exp_len); + clReleaseMemObject(mod_len); + clReleaseMemObject(sig_len); clReleaseProgram(program); clReleaseKernel(kernel); @@ -1084,47 +1090,37 @@ int mont_rsa_tests(void) { setup_gcry(); - unsigned int pairs = 1; - + unsigned int pairs = number_of_pairs();; - int str_sz = (2048); + int str_sz = (2048) * pairs; - char *b = malloc(str_sz); char *e = malloc(str_sz); char *m = malloc(str_sz); char *s = malloc(str_sz); - unsigned int pks = pairs; + unsigned long *b_off = malloc(str_sz); + unsigned long *e_off = malloc(str_sz); + unsigned long *m_off = malloc(str_sz); + unsigned long *s_off = malloc(str_sz); - pks = mont_pairs_from_files(b, e, m, s, &pairs); + unsigned int pks = mont_pairs_from_files(b, b_off, e, e_off, m, m_off, s, s_off, + &pairs); unsigned long result = 0; - struct timespec t1, t2; - - clock_gettime(CLOCK_REALTIME, &t1); - - - - - //montgomery(b, e, m, s, &result); - + - mont_verify_pairs_with_opencl(s,e,m,b, + mont_verify_pairs_with_opencl(s, s_off, e, e_off, m, m_off, b, b_off, pairs, pks, &result); - clock_gettime(CLOCK_REALTIME, &t2); - - printf("VERIFICATION RESULT: %lu\n",result); - - printf("\nGPU verification took %ld ms\n", (t2.tv_nsec - t1.tv_nsec) / 1000000); + printf("VERIFICATION RESULT: %lu\n\n",result); free(b); @@ -1135,19 +1131,3 @@ int mont_rsa_tests(void) { } - -/* -static void show_sexp(const char *prefix, gcry_sexp_t a) { - char *buf; - size_t size; - - if (prefix) - fputs(prefix, stderr); - size = gcry_sexp_sprint(a, GCRYSEXP_FMT_ADVANCED, NULL, 0); - buf = gcry_xmalloc(size); - - gcry_sexp_sprint(a, GCRYSEXP_FMT_ADVANCED, buf, size); - fprintf(stderr, "%.*s", (int) size, buf); - gcry_free(buf); - } -*/ diff --git a/xcode/.DS_Store b/xcode/.DS_Store Binary files differ. diff --git a/xcode/lib-gpu-verify.xcodeproj/project.pbxproj b/xcode/lib-gpu-verify.xcodeproj/project.pbxproj @@ -9,7 +9,8 @@ /* Begin PBXBuildFile section */ 6A36F8892B0F938E00AB772D /* montgomery.cl in Sources */ = {isa = PBXBuildFile; fileRef = 6A36F8882B0F938E00AB772D /* montgomery.cl */; }; 6A8A795F2A89672700116D7D /* verify.cl in Sources */ = {isa = PBXBuildFile; fileRef = 6A8A795E2A89672700116D7D /* verify.cl */; }; - 6A99B06D2B1275760004E4B7 /* gmp_GPU.c in Sources */ = {isa = PBXBuildFile; fileRef = 6A99B06C2B1275760004E4B7 /* gmp_GPU.c */; }; + 6A99B06E2B1293DA0004E4B7 /* gmp.c in Sources */ = {isa = PBXBuildFile; fileRef = 6A7914CB2B0CF320001EDCC1 /* gmp.c */; }; + 6A99B06F2B1297220004E4B7 /* montgomery.c in Sources */ = {isa = PBXBuildFile; fileRef = 6A7914CD2B0CF320001EDCC1 /* montgomery.c */; }; 6AA38E5B2B0A97FC00E85243 /* main.c in Sources */ = {isa = PBXBuildFile; fileRef = 6AA38E5A2B0A97FC00E85243 /* main.c */; }; 6AD85E072AF71AD900662919 /* big-int-test.c in Sources */ = {isa = PBXBuildFile; fileRef = 6AF7487D2ADADF4500D58E08 /* big-int-test.c */; }; 6AD85E0C2AFA510C00662919 /* openssl-test.c in Sources */ = {isa = PBXBuildFile; fileRef = 6AD85E0B2AFA510C00662919 /* openssl-test.c */; }; @@ -48,8 +49,6 @@ 6A7914CD2B0CF320001EDCC1 /* montgomery.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = montgomery.c; path = ../source/montgomery.c; sourceTree = "<group>"; }; 6A7914CE2B0CF320001EDCC1 /* gmp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = gmp.h; path = ../source/gmp.h; sourceTree = "<group>"; }; 6A8A795E2A89672700116D7D /* verify.cl */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.opencl; path = verify.cl; sourceTree = "<group>"; }; - 6A99B06B2B1275760004E4B7 /* gmp_GPU.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = gmp_GPU.h; path = ../source/gmp_GPU.h; sourceTree = "<group>"; }; - 6A99B06C2B1275760004E4B7 /* gmp_GPU.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = gmp_GPU.c; path = ../source/gmp_GPU.c; sourceTree = "<group>"; }; 6AA38E582B0A97FC00E85243 /* lib-gpu-generate */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "lib-gpu-generate"; sourceTree = BUILT_PRODUCTS_DIR; }; 6AA38E5A2B0A97FC00E85243 /* main.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = main.c; sourceTree = "<group>"; }; 6AA38E612B0A9B2100E85243 /* lib-gpu-generate.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = "lib-gpu-generate.entitlements"; sourceTree = "<group>"; }; @@ -138,8 +137,6 @@ 6AF748852ADADFAD00D58E08 /* opencl-test.c */, 6A7914CB2B0CF320001EDCC1 /* gmp.c */, 6A7914CD2B0CF320001EDCC1 /* montgomery.c */, - 6A99B06C2B1275760004E4B7 /* gmp_GPU.c */, - 6A99B06B2B1275760004E4B7 /* gmp_GPU.h */, ); name = Sources; sourceTree = "<group>"; @@ -230,8 +227,9 @@ buildActionMask = 2147483647; files = ( 6AD85E0C2AFA510C00662919 /* openssl-test.c in Sources */, - 6A99B06D2B1275760004E4B7 /* gmp_GPU.c in Sources */, 6AD85E072AF71AD900662919 /* big-int-test.c in Sources */, + 6A99B06F2B1297220004E4B7 /* montgomery.c in Sources */, + 6A99B06E2B1293DA0004E4B7 /* gmp.c in Sources */, 6AF7487A2ADADEBD00D58E08 /* lib-gpu-verify.c in Sources */, 6A8A795F2A89672700116D7D /* verify.cl in Sources */, 6AF748832ADADF4500D58E08 /* rsa-test.c in Sources */, diff --git a/xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcuserdata/cedriczwahlen.xcuserdatad/UserInterfaceState.xcuserstate b/xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcuserdata/cedriczwahlen.xcuserdatad/UserInterfaceState.xcuserstate Binary files differ. diff --git a/xcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist b/xcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist @@ -1930,8 +1930,8 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "657" - endingLineNumber = "657" + startingLineNumber = "651" + endingLineNumber = "651" landmarkName = "rsa_tests()" landmarkType = "9"> </BreakpointContent> @@ -2913,22 +2913,6 @@ <BreakpointProxy BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> <BreakpointContent - uuid = "A4D3CD3E-C63D-4684-A93A-EE22635022EE" - shouldBeEnabled = "No" - ignoreCount = "0" - continueAfterRunningActions = "No" - filePath = "../source/rsa-test.c" - startingColumnNumber = "9223372036854775807" - endingColumnNumber = "9223372036854775807" - startingLineNumber = "703" - endingLineNumber = "703" - landmarkName = "mont_pairs_from_files(bases, exponents, moduli, signatures, n)" - landmarkType = "9"> - </BreakpointContent> - </BreakpointProxy> - <BreakpointProxy - BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> - <BreakpointContent uuid = "4184BD65-645D-4022-94C1-79216BEF6823" shouldBeEnabled = "No" ignoreCount = "0" @@ -2936,9 +2920,9 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "898" - endingLineNumber = "898" - landmarkName = "mont_verify_pairs_with_opencl(bases, exponents, moduli, signatures, n, pks, result)" + startingLineNumber = "915" + endingLineNumber = "915" + landmarkName = "mont_verify_pairs_with_opencl(bases, b_off, exponents, e_off, moduli, m_off, signatures, s_off, n, pks, result)" landmarkType = "9"> </BreakpointContent> </BreakpointProxy> @@ -2954,7 +2938,7 @@ endingColumnNumber = "9223372036854775807" startingLineNumber = "1052" endingLineNumber = "1052" - landmarkName = "mont_verify_pairs_with_opencl(bases, exponents, moduli, signatures, n, pks, result)" + landmarkName = "mont_verify_pairs_with_opencl(bases, b_off, exponents, e_off, moduli, m_off, signatures, s_off, n, pks, result)" landmarkType = "9"> <Locations> <Location @@ -3047,7 +3031,7 @@ endingColumnNumber = "9223372036854775807" startingLineNumber = "1047" endingLineNumber = "1047" - landmarkName = "mont_verify_pairs_with_opencl(bases, exponents, moduli, signatures, n, pks, result)" + landmarkName = "mont_verify_pairs_with_opencl(bases, b_off, exponents, e_off, moduli, m_off, signatures, s_off, n, pks, result)" landmarkType = "9"> <Locations> <Location @@ -3116,38 +3100,6 @@ <BreakpointProxy BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> <BreakpointContent - uuid = "8BB8F280-7EFB-48F8-A762-C497F0136785" - shouldBeEnabled = "No" - ignoreCount = "0" - continueAfterRunningActions = "No" - filePath = "../source/rsa-test.c" - startingColumnNumber = "9223372036854775807" - endingColumnNumber = "9223372036854775807" - startingLineNumber = "910" - endingLineNumber = "910" - landmarkName = "mont_verify_pairs_with_opencl(bases, exponents, moduli, signatures, n, pks, result)" - landmarkType = "9"> - </BreakpointContent> - </BreakpointProxy> - <BreakpointProxy - BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> - <BreakpointContent - uuid = "F6E2F2AB-21A2-4DC2-8B9C-59FF6326A2DC" - shouldBeEnabled = "No" - ignoreCount = "0" - continueAfterRunningActions = "No" - filePath = "../source/rsa-test.c" - startingColumnNumber = "9223372036854775807" - endingColumnNumber = "9223372036854775807" - startingLineNumber = "948" - endingLineNumber = "948" - landmarkName = "mont_verify_pairs_with_opencl(bases, exponents, moduli, signatures, n, pks, result)" - landmarkType = "9"> - </BreakpointContent> - </BreakpointProxy> - <BreakpointProxy - BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> - <BreakpointContent uuid = "C8B7770D-BAC1-4203-AF94-A85245DE1081" shouldBeEnabled = "No" ignoreCount = "0" @@ -3155,9 +3107,9 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "943" - endingLineNumber = "943" - landmarkName = "mont_verify_pairs_with_opencl(bases, exponents, moduli, signatures, n, pks, result)" + startingLineNumber = "952" + endingLineNumber = "952" + landmarkName = "mont_verify_pairs_with_opencl(bases, b_off, exponents, e_off, moduli, m_off, signatures, s_off, n, pks, result)" landmarkType = "9"> <Locations> <Location @@ -3212,22 +3164,6 @@ <BreakpointProxy BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> <BreakpointContent - uuid = "2123D5D5-C635-404E-91CB-F962716A9093" - shouldBeEnabled = "No" - ignoreCount = "0" - continueAfterRunningActions = "No" - filePath = "../source/rsa-test.c" - startingColumnNumber = "9223372036854775807" - endingColumnNumber = "9223372036854775807" - startingLineNumber = "1114" - endingLineNumber = "1114" - landmarkName = "mont_rsa_tests()" - landmarkType = "9"> - </BreakpointContent> - </BreakpointProxy> - <BreakpointProxy - BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> - <BreakpointContent uuid = "8ECD43E8-C440-47F2-92EE-5C0EFBCD0487" shouldBeEnabled = "No" ignoreCount = "0" @@ -3935,5 +3871,195 @@ landmarkType = "9"> </BreakpointContent> </BreakpointProxy> + <BreakpointProxy + BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> + <BreakpointContent + uuid = "CF9E9C13-FF32-4C51-9757-FE0110A980AF" + shouldBeEnabled = "No" + ignoreCount = "0" + continueAfterRunningActions = "No" + filePath = "../source/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "695" + endingLineNumber = "695" + landmarkName = "mont_pairs_from_files(bases, b_off, exponents, e_off, moduli, m_off, signatures, s_off, n)" + landmarkType = "9"> + </BreakpointContent> + </BreakpointProxy> + <BreakpointProxy + BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> + <BreakpointContent + uuid = "371A23E1-484F-42F6-B2EE-46D44AE6083B" + shouldBeEnabled = "No" + ignoreCount = "0" + continueAfterRunningActions = "No" + filePath = "../source/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "730" + endingLineNumber = "730" + landmarkName = "mont_pairs_from_files(bases, b_off, exponents, e_off, moduli, m_off, signatures, s_off, n)" + landmarkType = "9"> + <Locations> + <Location + uuid = "371A23E1-484F-42F6-B2EE-46D44AE6083B - 2e7ee53289851cae" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "mont_pairs_from_files" + moduleName = "lib-gpu-verify" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/libgpuverify/source/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "729" + endingLineNumber = "729" + offsetFromSymbolStart = "517"> + </Location> + <Location + uuid = "371A23E1-484F-42F6-B2EE-46D44AE6083B - 2e7ee53289851cae" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "mont_pairs_from_files" + moduleName = "lib-gpu-verify" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/libgpuverify/source/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "729" + endingLineNumber = "729" + offsetFromSymbolStart = "525"> + </Location> + <Location + uuid = "371A23E1-484F-42F6-B2EE-46D44AE6083B - 2e7ee53289851f4f" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "mont_pairs_from_files" + moduleName = "lib-gpu-verify" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/libgpuverify/source/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "730" + endingLineNumber = "730" + offsetFromSymbolStart = "563"> + </Location> + </Locations> + </BreakpointContent> + </BreakpointProxy> + <BreakpointProxy + BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> + <BreakpointContent + uuid = "7FF43810-FAE0-46C6-9842-A8D405C02B2A" + shouldBeEnabled = "No" + ignoreCount = "0" + continueAfterRunningActions = "No" + filePath = "../source/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "755" + endingLineNumber = "755" + landmarkName = "mont_pairs_from_files(bases, b_off, exponents, e_off, moduli, m_off, signatures, s_off, n)" + landmarkType = "9"> + <Locations> + <Location + uuid = "7FF43810-FAE0-46C6-9842-A8D405C02B2A - 2e7ee53289851998" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "mont_pairs_from_files" + moduleName = "lib-gpu-verify" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/libgpuverify/source/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "751" + endingLineNumber = "751" + offsetFromSymbolStart = "780"> + </Location> + <Location + uuid = "7FF43810-FAE0-46C6-9842-A8D405C02B2A - 2e7ee53289851998" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "mont_pairs_from_files" + moduleName = "lib-gpu-verify" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/libgpuverify/source/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "751" + endingLineNumber = "751" + offsetFromSymbolStart = "788"> + </Location> + <Location + uuid = "7FF43810-FAE0-46C6-9842-A8D405C02B2A - 2e7ee53289851998" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "mont_pairs_from_files" + moduleName = "lib-gpu-verify" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/libgpuverify/source/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "751" + endingLineNumber = "751" + offsetFromSymbolStart = "796"> + </Location> + </Locations> + </BreakpointContent> + </BreakpointProxy> + <BreakpointProxy + BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> + <BreakpointContent + uuid = "096C0BB2-DC22-4EC5-8BD2-796A1B196075" + shouldBeEnabled = "No" + ignoreCount = "0" + continueAfterRunningActions = "No" + filePath = "../source/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "766" + endingLineNumber = "766" + landmarkName = "mont_pairs_from_files(bases, b_off, exponents, e_off, moduli, m_off, signatures, s_off, n)" + landmarkType = "9"> + </BreakpointContent> + </BreakpointProxy> + <BreakpointProxy + BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> + <BreakpointContent + uuid = "4156F803-B4B2-4ACF-BBB1-7E71EC1C7D77" + shouldBeEnabled = "No" + ignoreCount = "0" + continueAfterRunningActions = "No" + filePath = "../source/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "1119" + endingLineNumber = "1119" + landmarkName = "mont_rsa_tests()" + landmarkType = "9"> + </BreakpointContent> + </BreakpointProxy> + <BreakpointProxy + BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> + <BreakpointContent + uuid = "BE22DB13-E105-456F-8405-4A890569B49D" + shouldBeEnabled = "No" + ignoreCount = "0" + continueAfterRunningActions = "No" + filePath = "../source/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "1065" + endingLineNumber = "1065" + landmarkName = "mont_verify_pairs_with_opencl(bases, b_off, exponents, e_off, moduli, m_off, signatures, s_off, n, pks, result)" + landmarkType = "9"> + </BreakpointContent> + </BreakpointProxy> </Breakpoints> </Bucket> diff --git a/xcode/montgomery.cl b/xcode/montgomery.cl @@ -2836,31 +2836,40 @@ void printmpz(mpz_t n) { } -__kernel void montgomery(__constant char *signature, - __constant char *exponent, - __constant char *modulus, - __constant char *base, - __global unsigned long *valid) +__kernel void montgomery(__constant char *signature, __constant unsigned long *s_offsets, + __constant char *exponent, __constant unsigned long *e_offsets, + __constant char *modulus, __constant unsigned long *m_offsets, + __constant char *base, __constant unsigned long *b_offsets, + __global unsigned long *valid, + unsigned int n, + unsigned int pks) { int index = get_global_id(0); - if (index == 0) { + if (index < n) { int radix = 16; - mpz_t b,e,m,res; + mpz_t b,e,m,sig,res; + mpz_init(res); + + mpz_init_set_str(b,&base[b_offsets[0]],radix); // M + mpz_init_set_str(sig,&signature[s_offsets[0]],radix); + - mpz_init(res); - - mpz_init_set_str(b,base,radix); // M - mpz_init_set_str(e,exponent,radix); - mpz_init_set_str(m,modulus,radix); // n + if (pks == 1) { + mpz_init_set_str(e,exponent,radix); + mpz_init_set_str(m,modulus,radix); // n + } else { + mpz_init_set_str(e,&exponent[e_offsets[0]],radix); + mpz_init_set_str(m,&modulus[m_offsets[0]],radix); // n + } mpz_t r, r_1, ni, M, x; mpz_init(r); // MARK: I think I have to destroy these myself @@ -2874,10 +2883,6 @@ __kernel void montgomery(__constant char *signature, mpz_init(xx); - - printf((char __constant *)"%lu\n",GMP_LIMB_BITS); - - if (mpz_even_p(m)) { mpz_t bb, x1, x2, q, powj; @@ -2921,40 +2926,18 @@ __kernel void montgomery(__constant char *signature, } else { - - mont_prepare(b, e, m, r, r_1, ni, M, x); - - mont_modexp(xx, x, e, M, m, ni, r, r_1); - - // printf((char __constant *)"--\n"); - - //printmpz(xx); - mont_finish(res, xx, m, ni, r, r_1); - // printf((char __constant *)"--\n"); - - // printmpz(res); - - } - - - - - - - - mpz_t sig; - mpz_init_set_str(sig,signature,radix); - + + if (mpz_cmp(sig,res) == 0) { - *valid = 1; + *valid += 1; } else {