libgpuverify

Signature verification on GPUs (WiP)
Log | Files | Refs | README | LICENSE

commit 29caf798449ce6ff1412b3141ba9ab448ba45a43
parent 2c83ad2b16fbabc9ac90576e81324eb529a02e2c
Author: Christian Grothoff <grothoff@gnunet.org>
Date:   Wed,  6 Dec 2023 23:12:17 +0900

-a bit of DCE

Diffstat:
Msource/montgomery-test.c | 2+-
Msource/montgomery.cl | 2270++++++++++++++++++++++++++++++++-----------------------------------------------
Msource/rsa-test.c | 28++++++++++++----------------
3 files changed, 922 insertions(+), 1378 deletions(-)

diff --git a/source/montgomery-test.c b/source/montgomery-test.c @@ -435,7 +435,7 @@ int mont_rsa_tests(void) { mpz_pairs_from_files(b, b_off, e, e_off, m, m_off, s, s_off, pks, &pairs); - + pairs = 1; // FIXME: forced to 1 to make this fast printf("VERIFYING %lu SIGNATURES...\n", pairs); unsigned long result = 0; diff --git a/source/montgomery.cl b/source/montgomery.cl @@ -14,7 +14,7 @@ #define GMP_LIMB_BITS (sizeof(mp_limb_t) * CHAR_BIT) -#define GMP_LIMB_MAX ((mp_limb_t) ~(mp_limb_t) 0) +#define GMP_LIMB_MAX ((mp_limb_t) ~ (mp_limb_t) 0) #define GMP_LIMB_HIGHBIT ((mp_limb_t) 1 << (GMP_LIMB_BITS - 1)) #define GMP_HLIMB_BIT ((mp_limb_t) 1 << (GMP_LIMB_BITS / 2)) @@ -24,7 +24,7 @@ #define GMP_ULONG_HIGHBIT ((unsigned long) 1 << (GMP_ULONG_BITS - 1)) #define GMP_ABS(x) ((x) >= 0 ? (x) : -(x)) -#define GMP_NEG_CAST(T,x) (-((T) ((x) + 1) - 1)) +#define GMP_NEG_CAST(T,x) (-((T)((x) + 1) - 1)) #define GMP_MIN(a, b) ((a) < (b) ? (a) : (b)) #define GMP_MAX(a, b) ((a) > (b) ? (a) : (b)) @@ -40,33 +40,33 @@ unsigned __clz_c = 0; \ int LOCAL_SHIFT_BITS = 8; \ if (GMP_LIMB_BITS > LOCAL_SHIFT_BITS) \ - for (; \ - (__clz_x & ((mp_limb_t) 0xff << (GMP_LIMB_BITS - 8))) == 0; \ - __clz_c += 8) \ + for (; \ + (__clz_x & ((mp_limb_t) 0xff << (GMP_LIMB_BITS - 8))) == 0; \ + __clz_c += 8) \ { __clz_x <<= LOCAL_SHIFT_BITS; } \ for (; (__clz_x & GMP_LIMB_HIGHBIT) == 0; __clz_c++) \ - __clz_x <<= 1; \ + __clz_x <<= 1; \ (count) = __clz_c; \ -} while (0) + } while (0) #define gmp_umullo_limb(u, v) \ - ((sizeof(mp_limb_t) >= sizeof(int)) ? (u) * (v) : (unsigned int) (u) * (v)) + ((sizeof(mp_limb_t) >= sizeof(int)) ? (u)*(v) : (unsigned int)(u) * (v)) #define gmp_umul_ppmm(w1, w0, u, v) \ do { \ int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS; \ if (sizeof(unsigned int) * CHAR_BIT >= 2 * GMP_LIMB_BITS) \ - { \ - unsigned int __ww = (unsigned int) (u) * (v); \ - w0 = (mp_limb_t) __ww; \ - w1 = (mp_limb_t) (__ww >> LOCAL_GMP_LIMB_BITS); \ - } \ + { \ + unsigned int __ww = (unsigned int) (u) * (v); \ + w0 = (mp_limb_t) __ww; \ + w1 = (mp_limb_t) (__ww >> LOCAL_GMP_LIMB_BITS); \ + } \ else if (GMP_ULONG_BITS >= 2 * GMP_LIMB_BITS) \ - { \ - unsigned long int __ww = (unsigned long int) (u) * (v); \ - w0 = (mp_limb_t) __ww; \ - w1 = (mp_limb_t) (__ww >> LOCAL_GMP_LIMB_BITS); \ - } \ + { \ + unsigned long int __ww = (unsigned long int) (u) * (v); \ + w0 = (mp_limb_t) __ww; \ + w1 = (mp_limb_t) (__ww >> LOCAL_GMP_LIMB_BITS); \ + } \ else { \ mp_limb_t __x0, __x1, __x2, __x3; \ unsigned __ul, __vl, __uh, __vh; \ @@ -86,7 +86,7 @@ __x1 += __x0 >> (GMP_LIMB_BITS / 2);/* this can't give carry */ \ __x1 += __x2; /* but this indeed can */ \ if (__x1 < __x2) /* did we get it? */ \ - __x3 += GMP_HLIMB_BIT; /* yes, add it in the proper pos. */ \ + __x3 += GMP_HLIMB_BIT; /* yes, add it in the proper pos. */ \ \ (w1) = __x3 + (__x1 >> (GMP_LIMB_BITS / 2)); \ (w0) = (__x1 << (GMP_LIMB_BITS / 2)) + (__x0 & GMP_LLIMB_MASK); \ @@ -97,7 +97,7 @@ mp_limb_t __cy = (x); \ assert (__cy == 0); \ (void) (__cy); \ -} while (0) + } while (0) #define gmp_add_ssaaaa(sh, sl, ah, al, bh, bl) \ do { \ @@ -126,10 +126,10 @@ _qh += _mask; \ _r += _mask & (d); \ if (_r >= (d)) \ - { \ - _r -= (d); \ - _qh++; \ - } \ + { \ + _r -= (d); \ + _qh++; \ + } \ \ (r) = _r; \ (q) = _qh; \ @@ -149,25 +149,25 @@ (q)++; \ \ /* Conditionally adjust q and the remainders */ \ - _mask = -(mp_limb_t) ((r1) >= _q0); \ + _mask = - (mp_limb_t) ((r1) >= _q0); \ (q) += _mask; \ gmp_add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0)); \ if ((r1) >= (d1)) \ - { \ - if ((r1) > (d1) || (r0) >= (d0)) \ + { \ + if ((r1) > (d1) || (r0) >= (d0)) \ { \ (q)++; \ gmp_sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0)); \ } \ - } \ + } \ } while (0) #define gmp_ctz(count, x) do { \ mp_limb_t __ctz_x = (x); \ unsigned __ctz_c = 0; \ - gmp_clz (__ctz_c, __ctz_x & -__ctz_x); \ + gmp_clz (__ctz_c, __ctz_x & - __ctz_x); \ (count) = GMP_LIMB_BITS - 1 - __ctz_c; \ -} while (0) + } while (0) #define MPZ_SRCPTR_SWAP(x, y) \ @@ -199,10 +199,9 @@ } while (0) -#define assert(x){if ((x)==0) {printf ((char __constant *) "assert reached\n"); \ - }} +#define assert(x){if((x)==0){printf((char __constant *)"assert reached\n");}} -#define NULL ((void*) 0) +#define NULL ((void*)0) typedef unsigned MINI_GMP_LIMB_TYPE mp_limb_t; typedef long mp_size_t; @@ -218,10 +217,10 @@ typedef struct int _mp_size; /* abs(_mp_size) is the number of limbs the last field points to. If _mp_size is negative this is a negative number. */ - // mp_limb_t *_mp_d; /* Pointer to the limbs. */ - - mp_limb_t _mp_d[256]; + //mp_limb_t *_mp_d; /* Pointer to the limbs. */ + mp_limb_t _mp_d[256]; + } __mpz_struct; typedef __mpz_struct mpz_t[1]; @@ -253,284 +252,190 @@ struct mpn_base_info enum mpz_div_round_mode { GMP_DIV_FLOOR, GMP_DIV_CEIL, GMP_DIV_TRUNC }; void mpz_init (mpz_t r); - void mpn_copyi (mp_ptr d, mp_srcptr s, mp_size_t n); - void mpz_set (mpz_t r, const mpz_t x); - void mpz_set (mpz_t r, const mpz_t x); - void mpz_set_ui (mpz_t r, unsigned long int x); - void mpz_set_si (mpz_t r, signed long int x); - void mpz_init_set_si (mpz_t r, signed long int x); - void mpz_init_set (mpz_t r, const mpz_t x); - void mpz_init2 (mpz_t r, mp_bitcnt_t bits); - void mpz_init_set_ui (mpz_t r, unsigned long int x); - void mpz_clear (mpz_t r); - void gmp_die (const char *msg); mp_size_t mpn_normalized_size (mp_srcptr xp, mp_size_t n); - void mpz_add_ui (mpz_t r, const mpz_t a, unsigned long b); - void mpz_ui_sub (mpz_t r, unsigned long a, const mpz_t b); - void mpz_sub_ui (mpz_t r, const mpz_t a, unsigned long b); - int mpn_absfits_ulong_p (mp_srcptr up, mp_size_t un); - unsigned long int mpz_get_ui (const mpz_t u); - int mpz_cmpabs_ui (const mpz_t u, unsigned long v); - mp_limb_t mpn_sub_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b); - mp_limb_t mpn_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n); - mp_limb_t mpn_sub (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn); - mp_limb_t mpn_invert_3by2 (mp_limb_t u1, mp_limb_t u0); - int mpz_div_qr (mpz_t q, mpz_t r, - const mpz_t n, const mpz_t d, enum mpz_div_round_mode mode); + const mpz_t n, const mpz_t d, enum mpz_div_round_mode mode); void mpz_mod (mpz_t r, const mpz_t n, const mpz_t d); - void mpn_div_qr_1_invert (struct gmp_div_inverse *inv, mp_limb_t d); void mpn_div_qr_2_invert (struct gmp_div_inverse *inv, - mp_limb_t d1, mp_limb_t d0); + mp_limb_t d1, mp_limb_t d0); void mpn_div_qr_invert (struct gmp_div_inverse *inv, - mp_srcptr dp, mp_size_t dn); - + mp_srcptr dp, mp_size_t dn); int mpz_cmp_ui (const mpz_t u, unsigned long v); - int mpn_cmp (mp_srcptr ap, mp_srcptr bp, mp_size_t n); - mp_limb_t mpn_lshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt); - mp_limb_t mpn_rshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt); - int mpz_invert (mpz_t r, const mpz_t u, const mpz_t m); - mp_limb_t mpn_div_qr_1_preinv (mp_ptr qp, mp_srcptr np, mp_size_t nn, - const struct gmp_div_inverse *inv); + const struct gmp_div_inverse *inv); mp_limb_t mpn_add_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n); - void mpn_div_qr_2_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn, - const struct gmp_div_inverse *inv); + const struct gmp_div_inverse *inv); mp_limb_t mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl); - void mpn_div_qr_pi1 (mp_ptr qp, - mp_ptr np, mp_size_t nn, mp_limb_t n1, - mp_srcptr dp, mp_size_t dn, - mp_limb_t dinv); - + mp_ptr np, mp_size_t nn, mp_limb_t n1, + mp_srcptr dp, mp_size_t dn, + mp_limb_t dinv); void mpn_div_qr_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn, - mp_srcptr dp, mp_size_t dn, - const struct gmp_div_inverse *inv); + mp_srcptr dp, mp_size_t dn, + const struct gmp_div_inverse *inv); void mpz_powm (mpz_t r, const mpz_t b, const mpz_t e, const mpz_t m); - int mpn_cmp4 (mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn); - mp_size_t mpz_abs_sub (mpz_t r, const mpz_t a, const mpz_t b); - mp_limb_t mpn_add_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b); - mp_limb_t mpn_add (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn); - mp_size_t mpz_abs_add (mpz_t r, const mpz_t a, const mpz_t b); - void mpz_sub (mpz_t r, const mpz_t a, const mpz_t b); - mp_limb_t mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl); - mp_limb_t mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl); - mp_limb_t mpn_mul (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn); - void mpz_mul (mpz_t r, const mpz_t u, const mpz_t v); - void mpn_copyd (mp_ptr d, mp_srcptr s, mp_size_t n); - void mpn_zero (mp_ptr rp, mp_size_t n); - void mpz_mul_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bits); - int -mpn_zero_p (mp_srcptr rp, mp_size_t n); - +mpn_zero_p(mp_srcptr rp, mp_size_t n); void mpz_div_q_2exp (mpz_t q, const mpz_t u, mp_bitcnt_t bit_index, - enum mpz_div_round_mode mode); + enum mpz_div_round_mode mode); void mpz_tdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt); - int mpz_cmp (const mpz_t a, const mpz_t b); - void mpz_add (mpz_t r, const mpz_t a, const mpz_t b); - int mpz_tstbit (const mpz_t d, mp_bitcnt_t bit_index); - mp_bitcnt_t mpn_limb_size_in_base_2 (mp_limb_t u); - size_t mpz_sizeinbase (const mpz_t u, int base); - int mpz_sgn (const mpz_t u); - mp_bitcnt_t mpn_common_scan (mp_limb_t limb, mp_size_t i, mp_srcptr up, mp_size_t un, - mp_limb_t ux); - + mp_limb_t ux); mp_bitcnt_t mpn_scan1 (mp_srcptr ptr, mp_bitcnt_t bit); - mp_bitcnt_t mpz_scan1 (mpz_t u, mp_bitcnt_t starting_bit); - mp_bitcnt_t mpz_make_odd (mpz_t r); - void mpz_tdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d); - void mpz_abs_add_bit (mpz_t d, mp_bitcnt_t bit_index); - void mpz_abs_sub_bit (mpz_t d, mp_bitcnt_t bit_index); - void mpz_setbit (mpz_t d, mp_bitcnt_t bit_index); - void mpz_divexact (mpz_t q, const mpz_t n, const mpz_t d); - int mpz_cmpabs (const mpz_t u, const mpz_t v); - void mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v); - void mpz_addmul_ui (mpz_t r, const mpz_t u, unsigned long int v); unsigned mpn_base_power_of_two_p (unsigned b); - void mpn_get_base_info (struct mpn_base_info *info, mp_limb_t b); - -int isspace_gpu (unsigned char c); - -int strlen_c (__global char *c); - +int isspace_gpu(unsigned char c); +int strlen_c(__global char *c); mp_size_t mpn_set_str_bits (mp_ptr rp, const unsigned char *sp, size_t sn, - unsigned bits); - + unsigned bits); mp_size_t mpn_set_str_other (mp_ptr rp, const unsigned char *sp, size_t sn, - mp_limb_t b, const struct mpn_base_info *info); + mp_limb_t b, const struct mpn_base_info *info); int mpz_set_str (mpz_t r, __global char *sp, int base); - int mpz_init_set_str (mpz_t r, __global char *sp, int base); -// void mpz_sub (mpz_t r, const mpz_t a, const mpz_t b); -////void mpz_add (mpz_t, const mpz_t, const mpz_t); - void mpz_abs (mpz_t, const mpz_t); void mpz_neg (mpz_t, const mpz_t); - void mpz_swap (mpz_t, mpz_t); -// void mpz_mod (mpz_t, const mpz_t, const mpz_t); -// -////int mpz_sgn (const mpz_t); -// -////void mpz_mul (mpz_t, const mpz_t, const mpz_t); -// void mpz_mul_2exp (mpz_t, const mpz_t, mp_bitcnt_t); -// -// void mpz_gcdext (mpz_t, mpz_t, mpz_t, const mpz_t, const mpz_t); -////void mpz_powm (mpz_t, const mpz_t, const mpz_t, const mpz_t); -// void mpz_addmul (mpz_t, const mpz_t, const mpz_t); -// -// int mpz_tstbit (const mpz_t, mp_bitcnt_t); -// -// int mpz_cmp_ui (const mpz_t u, unsigned long v); -// -void mpn_div_qr (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t - dn); - -// -// mp_limb_t mpn_invert_3by2 (mp_limb_t, mp_limb_t); +void mpn_div_qr (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn); void mpz_set_lg (unsigned long *r, __global unsigned long *x); @@ -542,22 +447,13 @@ mpz_set_lg (unsigned long *r, __global unsigned long *x); void mpz_init (mpz_t r) { - const mp_limb_t dummy_limb = GMP_LIMB_MAX & 0xc1a0; - r->_mp_alloc = 0; r->_mp_size = 0; - - // memset(r->_mp_d, 0, 256); - - for (int i = 0; i < 256; i++) - { - r->_mp_d[i] = 0; - } - - // r->_mp_d = (mp_ptr) &dummy_limb; + for (int i = 0; i < 256; i++) { + r->_mp_d[i] = 0; + } } - void mpn_copyi (mp_ptr d, mp_srcptr s, mp_size_t n) { @@ -566,51 +462,34 @@ mpn_copyi (mp_ptr d, mp_srcptr s, mp_size_t n) d[i] = s[i]; } - void mpz_set (mpz_t r, const mpz_t x) { /* Allow the NOP r == x */ if (r != x) - { - mp_size_t n; - mp_ptr rp; + { + mp_size_t n; + mp_ptr rp; - n = GMP_ABS (x->_mp_size); - rp = MPZ_REALLOC (r, n); + n = GMP_ABS (x->_mp_size); + rp = MPZ_REALLOC (r, n); - mpn_copyi (rp, x->_mp_d, n); - r->_mp_size = x->_mp_size; - } + mpn_copyi (rp, x->_mp_d, n); + r->_mp_size = x->_mp_size; + } } - void mpz_set_lg (unsigned long *r, __global unsigned long *x) { - - - // event_t wait; - - // wait = async_work_group_strided_copy(r,x, 256 + 2, 4,0); - - - // wait_group_events(0,&wait); - - r[0] = x[0]; - r[1] = x[1]; - - for (int i = 2; i < 256; i++) - { - - r[i] = x[i]; - - } - - // printf((__constant char *)"%i\n",r->_mp_size); - - // memcpy(r->_mp_d,(*(mpz_t *)x)->_mp_d,256); - + r[0] = x[0]; + r[1] = x[1]; + + for (int i = 2; i < 256; i++) { + + r[i] = x[i]; + + } } @@ -618,19 +497,19 @@ void mpz_set_ui (mpz_t r, unsigned long int x) { if (x > 0) - { - r->_mp_size = 1; - MPZ_REALLOC (r, 1)[0] = x; - if (GMP_LIMB_BITS < GMP_ULONG_BITS) + { + r->_mp_size = 1; + MPZ_REALLOC (r, 1)[0] = x; + if (GMP_LIMB_BITS < GMP_ULONG_BITS) { int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS; while (x >>= LOCAL_GMP_LIMB_BITS) - { - ++r->_mp_size; - MPZ_REALLOC (r, r->_mp_size)[r->_mp_size - 1] = x; - } + { + ++ r->_mp_size; + MPZ_REALLOC (r, r->_mp_size)[r->_mp_size - 1] = x; + } + } } - } else r->_mp_size = 0; } @@ -650,19 +529,18 @@ mpz_set_si (mpz_t r, signed long int x) if (x >= 0) mpz_set_ui (r, x); else /* (x < 0) */ - if (GMP_LIMB_BITS < GMP_ULONG_BITS) - { + if (GMP_LIMB_BITS < GMP_ULONG_BITS) + { mpz_set_ui (r, GMP_NEG_CAST (unsigned long int, x)); mpz_neg (r, r); - } + } else - { - r->_mp_size = -1; - MPZ_REALLOC (r, 1)[0] = GMP_NEG_CAST (unsigned long int, x); - } + { + r->_mp_size = -1; + MPZ_REALLOC (r, 1)[0] = GMP_NEG_CAST (unsigned long int, x); + } } - void mpz_init_set_si (mpz_t r, signed long int x) { @@ -678,7 +556,6 @@ mpz_init_set (mpz_t r, const mpz_t x) mpz_set (r, x); } - void mpz_init2 (mpz_t r, mp_bitcnt_t bits) { @@ -689,10 +566,8 @@ mpz_init2 (mpz_t r, mp_bitcnt_t bits) r->_mp_alloc = rn; r->_mp_size = 0; - // r->_mp_d = gmp_alloc_limbs (rn); } - void mpz_init_set_ui (mpz_t r, unsigned long int x) { @@ -700,32 +575,28 @@ mpz_init_set_ui (mpz_t r, unsigned long int x) mpz_set_ui (r, x); } - void mpz_clear (mpz_t r) { - // if (r->_mp_alloc) - // gmp_free_limbs (r->_mp_d, r->_mp_alloc); + //if (r->_mp_alloc) + //gmp_free_limbs (r->_mp_d, r->_mp_alloc); } void gmp_die (const char *msg) { - // fprintf (stderr, "%s\n", msg); - // abort(); + //fprintf (stderr, "%s\n", msg); + //abort(); } - -mp_size_t -mpn_normalized_size (mp_srcptr xp, mp_size_t n) +mp_size_t mpn_normalized_size (mp_srcptr xp, mp_size_t n) { - while (n > 0 && xp[n - 1] == 0) + while (n > 0 && xp[n-1] == 0) --n; return n; } - void mpz_add_ui (mpz_t r, const mpz_t a, unsigned long b) { @@ -735,7 +606,6 @@ mpz_add_ui (mpz_t r, const mpz_t a, unsigned long b) mpz_clear (bb); } - void mpz_ui_sub (mpz_t r, unsigned long a, const mpz_t b) { @@ -751,7 +621,6 @@ mpz_sub_ui (mpz_t r, const mpz_t a, unsigned long b) mpz_neg (r, r); } - int mpn_absfits_ulong_p (mp_srcptr up, mp_size_t un) { @@ -764,25 +633,23 @@ mpn_absfits_ulong_p (mp_srcptr up, mp_size_t un) return un <= ulongsize || (up[ulongsize] < ulongrem && un == ulongsize + 1); } - unsigned long int mpz_get_ui (const mpz_t u) { if (GMP_LIMB_BITS < GMP_ULONG_BITS) - { - int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS; - unsigned long r = 0; - mp_size_t n = GMP_ABS (u->_mp_size); - n = GMP_MIN (n, 1 + (mp_size_t) (GMP_ULONG_BITS - 1) / GMP_LIMB_BITS); - while (--n >= 0) - r = (r << LOCAL_GMP_LIMB_BITS) + u->_mp_d[n]; - return r; - } + { + int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS; + unsigned long r = 0; + mp_size_t n = GMP_ABS (u->_mp_size); + n = GMP_MIN (n, 1 + (mp_size_t) (GMP_ULONG_BITS - 1) / GMP_LIMB_BITS); + while (--n >= 0) + r = (r << LOCAL_GMP_LIMB_BITS) + u->_mp_d[n]; + return r; + } return u->_mp_size == 0 ? 0 : u->_mp_d[0]; } - int mpz_cmpabs_ui (const mpz_t u, unsigned long v) { @@ -791,13 +658,12 @@ mpz_cmpabs_ui (const mpz_t u, unsigned long v) if (! mpn_absfits_ulong_p (u->_mp_d, un)) return 1; else - { - unsigned long uu = mpz_get_ui (u); - return GMP_CMP (uu, v); - } + { + unsigned long uu = mpz_get_ui (u); + return GMP_CMP(uu, v); + } } - mp_limb_t mpn_sub_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b) { @@ -807,19 +673,18 @@ mpn_sub_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b) i = 0; do - { - mp_limb_t a = ap[i]; - /* Carry out */ - mp_limb_t cy = a < b; - rp[i] = a - b; - b = cy; - } + { + mp_limb_t a = ap[i]; + /* Carry out */ + mp_limb_t cy = a < b; + rp[i] = a - b; + b = cy; + } while (++i < n); return b; } - mp_limb_t mpn_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n) { @@ -827,18 +692,17 @@ mpn_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n) mp_limb_t cy; for (i = 0, cy = 0; i < n; i++) - { - mp_limb_t a, b; - a = ap[i]; b = bp[i]; - b += cy; - cy = (b < cy); - cy += (a < b); - rp[i] = a - b; - } + { + mp_limb_t a, b; + a = ap[i]; b = bp[i]; + b += cy; + cy = (b < cy); + cy += (a < b); + rp[i] = a - b; + } return cy; } - mp_limb_t mpn_sub (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn) { @@ -893,16 +757,16 @@ mpn_invert_3by2 (mp_limb_t u1, mp_limb_t u0) p = (mp_limb_t) qh * ul; /* Adjustment steps taken from udiv_qrnnd_c */ if (r < p) - { - qh--; - r += u1; - if (r >= u1) /* i.e. we didn't get carry when adding to r */ - if (r < p) + { + qh--; + r += u1; + if (r >= u1) /* i.e. we didn't get carry when adding to r */ + if (r < p) { qh--; r += u1; } - } + } r -= p; /* Low half of the quotient is @@ -921,70 +785,68 @@ mpn_invert_3by2 (mp_limb_t u1, mp_limb_t u0) r = (r << (GMP_LIMB_BITS / 2)) + GMP_LLIMB_MASK - ql * u1; if (r >= (GMP_LIMB_MAX & (p << (GMP_LIMB_BITS / 2)))) - { - ql--; - r += u1; - } + { + ql--; + r += u1; + } m = ((mp_limb_t) qh << (GMP_LIMB_BITS / 2)) + ql; if (r >= u1) - { - m++; - r -= u1; - } + { + m++; + r -= u1; + } } /* Now m is the 2/1 inverse of u1. If u0 > 0, adjust it to become a 3/2 inverse. */ if (u0 > 0) - { - mp_limb_t th, tl; - r = ~r; - r += u0; - if (r < u0) + { + mp_limb_t th, tl; + r = ~r; + r += u0; + if (r < u0) { m--; if (r >= u1) - { - m--; - r -= u1; - } + { + m--; + r -= u1; + } r -= u1; } - gmp_umul_ppmm (th, tl, u0, m); - r += th; - if (r < th) + gmp_umul_ppmm (th, tl, u0, m); + r += th; + if (r < th) { m--; m -= ((r > u1) | ((r == u1) & (tl > u0))); } - } + } return m; } - int mpz_div_qr (mpz_t q, mpz_t r, - const mpz_t n, const mpz_t d, enum mpz_div_round_mode mode) + const mpz_t n, const mpz_t d, enum mpz_div_round_mode mode) { mp_size_t ns, ds, nn, dn, qs; ns = n->_mp_size; ds = d->_mp_size; - if (ds == 0) - { - + if (ds == 0) { + } - // gmp_die("mpz_div_qr: Divide by zero."); + //gmp_die("mpz_div_qr: Divide by zero."); if (ns == 0) - { - if (q) - q->_mp_size = 0; - if (r) - r->_mp_size = 0; - return 0; - } + { + if (q) + q->_mp_size = 0; + if (r) + r->_mp_size = 0; + return 0; + } nn = GMP_ABS (ns); dn = GMP_ABS (ds); @@ -992,8 +854,8 @@ mpz_div_qr (mpz_t q, mpz_t r, qs = ds ^ ns; if (nn < dn) - { - if (mode == GMP_DIV_CEIL && qs >= 0) + { + if (mode == GMP_DIV_CEIL && qs >= 0) { /* q = 1, r = n - d */ if (r) @@ -1001,7 +863,7 @@ mpz_div_qr (mpz_t q, mpz_t r, if (q) mpz_set_ui (q, 1); } - else if (mode == GMP_DIV_FLOOR && qs < 0) + else if (mode == GMP_DIV_FLOOR && qs < 0) { /* q = -1, r = n + d */ if (r) @@ -1009,7 +871,7 @@ mpz_div_qr (mpz_t q, mpz_t r, if (q) mpz_set_si (q, -1); } - else + else { /* q = 0, r = d */ if (r) @@ -1017,46 +879,46 @@ mpz_div_qr (mpz_t q, mpz_t r, if (q) q->_mp_size = 0; } - return 1; - } + return 1; + } else - { - mp_ptr np, qp; - mp_size_t qn, rn; - mpz_t tq, tr; + { + mp_ptr np, qp; + mp_size_t qn, rn; + mpz_t tq, tr; - mpz_init_set (tr, n); - np = tr->_mp_d; + mpz_init_set (tr, n); + np = tr->_mp_d; - qn = nn - dn + 1; + qn = nn - dn + 1; - if (q) + if (q) { mpz_init2 (tq, qn * GMP_LIMB_BITS); qp = tq->_mp_d; } - else - qp = NULL; + else + qp = NULL; - mpn_div_qr (qp, np, nn, d->_mp_d, dn); + mpn_div_qr (qp, np, nn, d->_mp_d, dn); - if (qp) + if (qp) { - qn -= (qp[qn - 1] == 0); + qn -= (qp[qn-1] == 0); tq->_mp_size = qs < 0 ? -qn : qn; } - rn = mpn_normalized_size (np, dn); - tr->_mp_size = ns < 0 ? -rn : rn; + rn = mpn_normalized_size (np, dn); + tr->_mp_size = ns < 0 ? - rn : rn; - if (mode == GMP_DIV_FLOOR && qs < 0 && rn != 0) + if (mode == GMP_DIV_FLOOR && qs < 0 && rn != 0) { if (q) mpz_sub_ui (tq, tq, 1); if (r) mpz_add (tr, tr, d); } - else if (mode == GMP_DIV_CEIL && qs >= 0 && rn != 0) + else if (mode == GMP_DIV_CEIL && qs >= 0 && rn != 0) { if (q) mpz_add_ui (tq, tq, 1); @@ -1064,48 +926,45 @@ mpz_div_qr (mpz_t q, mpz_t r, mpz_sub (tr, tr, d); } - if (q) + if (q) { mpz_swap (tq, q); mpz_clear (tq); } - if (r) - mpz_swap (tr, r); + if (r) + mpz_swap (tr, r); - mpz_clear (tr); + mpz_clear (tr); - return rn != 0; - } + return rn != 0; + } } - void mpn_div_qr (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn) { struct gmp_div_inverse inv; - // mp_ptr tp = NULL; - - mpz_t tp; - + // mp_ptr tp = NULL; + mpz_t tp; + + + assert (dn > 0); assert (nn >= dn); mpn_div_qr_invert (&inv, dp, dn); if (dn > 2 && inv.shift > 0) - { - // tp = gmp_alloc_limbs (dn); - gmp_assert_nocarry (mpn_lshift (tp->_mp_d, dp, dn, inv.shift)); - dp = tp->_mp_d; - } + { + //tp = gmp_alloc_limbs (dn); + gmp_assert_nocarry (mpn_lshift (tp->_mp_d, dp, dn, inv.shift)); + dp = tp->_mp_d; + } mpn_div_qr_preinv (qp, np, nn, dp, dn, &inv); - if (tp) - { - } - // gmp_free_limbs (tp, dn); + if (tp) {} + //gmp_free_limbs (tp, dn); } - void mpz_addmul (mpz_t r, const mpz_t u, const mpz_t v) { @@ -1116,30 +975,27 @@ mpz_addmul (mpz_t r, const mpz_t u, const mpz_t v) mpz_clear (t); } - void mpz_swap (mpz_t u, mpz_t v) { - // MP_SIZE_T_SWAP (u->_mp_alloc, v->_mp_alloc); - // MPN_PTR_SWAP (u->_mp_d, u->_mp_size, v->_mp_d, v->_mp_size); - - mpz_t temp; - mpz_init (temp); - - *temp = *u; - *u = *v; - *v = *temp; - + //MP_SIZE_T_SWAP (u->_mp_alloc, v->_mp_alloc); + //MPN_PTR_SWAP (u->_mp_d, u->_mp_size, v->_mp_d, v->_mp_size); + + mpz_t temp; + mpz_init(temp); + + *temp = *u; + *u = *v; + *v = *temp; + } - void mpz_mod (mpz_t r, const mpz_t n, const mpz_t d) { mpz_div_qr (NULL, r, n, d, d->_mp_size >= 0 ? GMP_DIV_FLOOR : GMP_DIV_CEIL); } - void mpn_div_qr_1_invert (struct gmp_div_inverse *inv, mp_limb_t d) { @@ -1152,10 +1008,9 @@ mpn_div_qr_1_invert (struct gmp_div_inverse *inv, mp_limb_t d) inv->di = mpn_invert_limb (inv->d1); } - void mpn_div_qr_2_invert (struct gmp_div_inverse *inv, - mp_limb_t d1, mp_limb_t d0) + mp_limb_t d1, mp_limb_t d0) { unsigned shift; @@ -1163,19 +1018,18 @@ mpn_div_qr_2_invert (struct gmp_div_inverse *inv, gmp_clz (shift, d1); inv->shift = shift; if (shift > 0) - { - d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift)); - d0 <<= shift; - } + { + d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift)); + d0 <<= shift; + } inv->d1 = d1; inv->d0 = d0; inv->di = mpn_invert_3by2 (d1, d0); } - void mpn_div_qr_invert (struct gmp_div_inverse *inv, - mp_srcptr dp, mp_size_t dn) + mp_srcptr dp, mp_size_t dn) { assert (dn > 0); @@ -1184,24 +1038,24 @@ mpn_div_qr_invert (struct gmp_div_inverse *inv, else if (dn == 2) mpn_div_qr_2_invert (inv, dp[1], dp[0]); else - { - unsigned shift; - mp_limb_t d1, d0; - - d1 = dp[dn - 1]; - d0 = dp[dn - 2]; - assert (d1 > 0); - gmp_clz (shift, d1); - inv->shift = shift; - if (shift > 0) + { + unsigned shift; + mp_limb_t d1, d0; + + d1 = dp[dn-1]; + d0 = dp[dn-2]; + assert (d1 > 0); + gmp_clz (shift, d1); + inv->shift = shift; + if (shift > 0) { d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift)); - d0 = (d0 << shift) | (dp[dn - 3] >> (GMP_LIMB_BITS - shift)); + d0 = (d0 << shift) | (dp[dn-3] >> (GMP_LIMB_BITS - shift)); + } + inv->d1 = d1; + inv->d0 = d0; + inv->di = mpn_invert_3by2 (d1, d0); } - inv->d1 = d1; - inv->d0 = d0; - inv->di = mpn_invert_3by2 (d1, d0); - } } @@ -1216,19 +1070,17 @@ mpz_cmp_ui (const mpz_t u, unsigned long v) return mpz_cmpabs_ui (u, v); } - int mpn_cmp (mp_srcptr ap, mp_srcptr bp, mp_size_t n) { while (--n >= 0) - { - if (ap[n] != bp[n]) - return ap[n] > bp[n] ? 1 : -1; - } + { + if (ap[n] != bp[n]) + return ap[n] > bp[n] ? 1 : -1; + } return 0; } - mp_limb_t mpn_lshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt) { @@ -1249,17 +1101,16 @@ mpn_lshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt) high_limb = (low_limb << cnt); while (--n != 0) - { - low_limb = *--up; - *--rp = high_limb | (low_limb >> tnc); - high_limb = (low_limb << cnt); - } + { + low_limb = *--up; + *--rp = high_limb | (low_limb >> tnc); + high_limb = (low_limb << cnt); + } *--rp = high_limb; return retval; } - mp_limb_t mpn_rshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt) { @@ -1277,17 +1128,16 @@ mpn_rshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt) low_limb = high_limb >> cnt; while (--n != 0) - { - high_limb = *up++; - *rp++ = low_limb | (high_limb << tnc); - low_limb = high_limb >> cnt; - } + { + high_limb = *up++; + *rp++ = low_limb | (high_limb << tnc); + low_limb = high_limb >> cnt; + } *rp = low_limb; return retval; } - int mpz_invert (mpz_t r, const mpz_t u, const mpz_t m) { @@ -1304,28 +1154,27 @@ mpz_invert (mpz_t r, const mpz_t u, const mpz_t m) invertible = (mpz_cmp_ui (g, 1) == 0); if (invertible) - { - if (tr->_mp_size < 0) + { + if (tr->_mp_size < 0) { if (m->_mp_size >= 0) mpz_add (tr, tr, m); else mpz_sub (tr, tr, m); } - mpz_swap (r, tr); - } + mpz_swap (r, tr); + } mpz_clear (g); mpz_clear (tr); return invertible; } - /* Not matching current public gmp interface, rather corresponding to the sbpi1_div_* functions. */ mp_limb_t mpn_div_qr_1_preinv (mp_ptr qp, mp_srcptr np, mp_size_t nn, - const struct gmp_div_inverse *inv) + const struct gmp_div_inverse *inv) { mp_limb_t d, di; mp_limb_t r; @@ -1333,38 +1182,37 @@ mpn_div_qr_1_preinv (mp_ptr qp, mp_srcptr np, mp_size_t nn, mp_size_t tn = 0; if (inv->shift > 0) - { - /* Shift, reusing qp area if possible. In-place shift if qp == np. */ - tp = qp; - if (! tp) { + /* Shift, reusing qp area if possible. In-place shift if qp == np. */ + tp = qp; + if (!tp) + { tn = nn; - + // tp = gmp_alloc_limbs (tn); + } + r = mpn_lshift (tp, np, nn, inv->shift); + np = tp; } - r = mpn_lshift (tp, np, nn, inv->shift); - np = tp; - } else r = 0; d = inv->d1; di = inv->di; while (--nn >= 0) - { - mp_limb_t q; + { + mp_limb_t q; - gmp_udiv_qrnnd_preinv (q, r, r, np[nn], d, di); - if (qp) - qp[nn] = q; - } - // if (tn) - // gmp_free_limbs (tp, tn); + gmp_udiv_qrnnd_preinv (q, r, r, np[nn], d, di); + if (qp) + qp[nn] = q; + } + //if (tn) + //gmp_free_limbs (tp, tn); return r >> inv->shift; } - mp_limb_t mpn_add_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n) { @@ -1372,22 +1220,21 @@ mpn_add_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n) mp_limb_t cy; for (i = 0, cy = 0; i < n; i++) - { - mp_limb_t a, b, r; - a = ap[i]; b = bp[i]; - r = a + cy; - cy = (r < cy); - r += b; - cy += (r < b); - rp[i] = r; - } + { + mp_limb_t a, b, r; + a = ap[i]; b = bp[i]; + r = a + cy; + cy = (r < cy); + r += b; + cy += (r < b); + rp[i] = r; + } return cy; } - void mpn_div_qr_2_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn, - const struct gmp_div_inverse *inv) + const struct gmp_div_inverse *inv) { unsigned shift; mp_size_t i; @@ -1408,28 +1255,27 @@ mpn_div_qr_2_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn, i = nn - 2; do - { - mp_limb_t n0, q; - n0 = np[i]; - gmp_udiv_qr_3by2 (q, r1, r0, r1, r0, n0, d1, d0, di); + { + mp_limb_t n0, q; + n0 = np[i]; + gmp_udiv_qr_3by2 (q, r1, r0, r1, r0, n0, d1, d0, di); - if (qp) - qp[i] = q; - } + if (qp) + qp[i] = q; + } while (--i >= 0); if (shift > 0) - { - assert ((r0 & (GMP_LIMB_MAX >> (GMP_LIMB_BITS - shift))) == 0); - r0 = (r0 >> shift) | (r1 << (GMP_LIMB_BITS - shift)); - r1 >>= shift; - } + { + assert ((r0 & (GMP_LIMB_MAX >> (GMP_LIMB_BITS - shift))) == 0); + r0 = (r0 >> shift) | (r1 << (GMP_LIMB_BITS - shift)); + r1 >>= shift; + } np[1] = r1; np[0] = r0; } - mp_limb_t mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl) { @@ -1439,29 +1285,28 @@ mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl) cl = 0; do - { - ul = *up++; - gmp_umul_ppmm (hpl, lpl, ul, vl); + { + ul = *up++; + gmp_umul_ppmm (hpl, lpl, ul, vl); - lpl += cl; - cl = (lpl < cl) + hpl; + lpl += cl; + cl = (lpl < cl) + hpl; - rl = *rp; - lpl = rl - lpl; - cl += lpl > rl; - *rp++ = lpl; - } + rl = *rp; + lpl = rl - lpl; + cl += lpl > rl; + *rp++ = lpl; + } while (--n != 0); return cl; } - void mpn_div_qr_pi1 (mp_ptr qp, - mp_ptr np, mp_size_t nn, mp_limb_t n1, - mp_srcptr dp, mp_size_t dn, - mp_limb_t dinv) + mp_ptr np, mp_size_t nn, mp_limb_t n1, + mp_srcptr dp, mp_size_t dn, + mp_limb_t dinv) { mp_size_t i; @@ -1484,47 +1329,46 @@ mpn_div_qr_pi1 (mp_ptr qp, i = nn - dn; do - { - mp_limb_t n0 = np[dn - 1 + i]; + { + mp_limb_t n0 = np[dn-1+i]; - if (n1 == d1 && n0 == d0) + if (n1 == d1 && n0 == d0) { q = GMP_LIMB_MAX; - mpn_submul_1 (np + i, dp, dn, q); - n1 = np[dn - 1 + i]; /* update n1, last loop's value will now be invalid */ + mpn_submul_1 (np+i, dp, dn, q); + n1 = np[dn-1+i]; /* update n1, last loop's value will now be invalid */ } - else + else { - gmp_udiv_qr_3by2 (q, n1, n0, n1, n0, np[dn - 2 + i], d1, d0, dinv); + gmp_udiv_qr_3by2 (q, n1, n0, n1, n0, np[dn-2+i], d1, d0, dinv); - cy = mpn_submul_1 (np + i, dp, dn - 2, q); + cy = mpn_submul_1 (np + i, dp, dn-2, q); cy1 = n0 < cy; n0 = n0 - cy; cy = n1 < cy1; n1 = n1 - cy1; - np[dn - 2 + i] = n0; + np[dn-2+i] = n0; if (cy != 0) - { - n1 += d1 + mpn_add_n (np + i, np + i, dp, dn - 1); - q--; - } + { + n1 += d1 + mpn_add_n (np + i, np + i, dp, dn - 1); + q--; + } } - if (qp) - qp[i] = q; - } + if (qp) + qp[i] = q; + } while (--i >= 0); np[dn - 1] = n1; } - void mpn_div_qr_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn, - mp_srcptr dp, mp_size_t dn, - const struct gmp_div_inverse *inv) + mp_srcptr dp, mp_size_t dn, + const struct gmp_div_inverse *inv) { assert (dn > 0); assert (nn >= dn); @@ -1534,28 +1378,27 @@ mpn_div_qr_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn, else if (dn == 2) mpn_div_qr_2_preinv (qp, np, nn, inv); else - { - mp_limb_t nh; - unsigned shift; + { + mp_limb_t nh; + unsigned shift; - assert (inv->d1 == dp[dn - 1]); - assert (inv->d0 == dp[dn - 2]); - assert ((inv->d1 & GMP_LIMB_HIGHBIT) != 0); + assert (inv->d1 == dp[dn-1]); + assert (inv->d0 == dp[dn-2]); + assert ((inv->d1 & GMP_LIMB_HIGHBIT) != 0); - shift = inv->shift; - if (shift > 0) - nh = mpn_lshift (np, np, nn, shift); - else - nh = 0; + shift = inv->shift; + if (shift > 0) + nh = mpn_lshift (np, np, nn, shift); + else + nh = 0; - mpn_div_qr_pi1 (qp, np, nn, nh, dp, dn, inv->di); + mpn_div_qr_pi1 (qp, np, nn, nh, dp, dn, inv->di); - if (shift > 0) - gmp_assert_nocarry (mpn_rshift (np, np, dn, shift)); - } + if (shift > 0) + gmp_assert_nocarry (mpn_rshift (np, np, dn, shift)); + } } - void mpz_powm (mpz_t r, const mpz_t b, const mpz_t e, const mpz_t m) { @@ -1565,110 +1408,105 @@ mpz_powm (mpz_t r, const mpz_t b, const mpz_t e, const mpz_t m) mp_srcptr mp; struct gmp_div_inverse minv; unsigned shift; - // mp_ptr tp = NULL; - mpz_t tp; - - // mpz_init(tp); + //mp_ptr tp = NULL; + mpz_t tp; + + //mpz_init(tp); en = GMP_ABS (e->_mp_size); mn = GMP_ABS (m->_mp_size); - if (mn == 0) - { - } - // gmp_die ("mpz_powm: Zero modulo."); + if (mn == 0) {} + //gmp_die ("mpz_powm: Zero modulo."); if (en == 0) - { - mpz_set_ui (r, mpz_cmpabs_ui (m, 1)); - return; - } + { + mpz_set_ui (r, mpz_cmpabs_ui (m, 1)); + return; + } mp = m->_mp_d; mpn_div_qr_invert (&minv, mp, mn); shift = minv.shift; if (shift > 0) - { - /* To avoid shifts, we do all our reductions, except the final - one, using a *normalized* m. */ - minv.shift = 0; + { + /* To avoid shifts, we do all our reductions, except the final + one, using a *normalized* m. */ + minv.shift = 0; - // tp = gmp_alloc_limbs (mn); - gmp_assert_nocarry (mpn_lshift (tp->_mp_d, mp, mn, shift)); - mp = tp->_mp_d; - } + // tp = gmp_alloc_limbs (mn); + gmp_assert_nocarry (mpn_lshift (tp->_mp_d, mp, mn, shift)); + mp = tp->_mp_d; + } mpz_init (base); if (e->_mp_size < 0) - { - if (! mpz_invert (base, b, m)) { + if (!mpz_invert (base, b, m)) {} + //gmp_die ("mpz_powm: Negative exponent and non-invertible base."); } - // gmp_die ("mpz_powm: Negative exponent and non-invertible base."); - } else - { - mp_size_t bn; - mpz_abs (base, b); + { + mp_size_t bn; + mpz_abs (base, b); - bn = base->_mp_size; - if (bn >= mn) + bn = base->_mp_size; + if (bn >= mn) { mpn_div_qr_preinv (NULL, base->_mp_d, base->_mp_size, mp, mn, &minv); bn = mn; } - /* We have reduced the absolute value. Now take care of the - sign. Note that we get zero represented non-canonically as - m. */ - if (b->_mp_size < 0) + /* We have reduced the absolute value. Now take care of the + sign. Note that we get zero represented non-canonically as + m. */ + if (b->_mp_size < 0) { mp_ptr bp = MPZ_REALLOC (base, mn); gmp_assert_nocarry (mpn_sub (bp, mp, mn, bp, bn)); bn = mn; } - base->_mp_size = mpn_normalized_size (base->_mp_d, bn); - } + base->_mp_size = mpn_normalized_size (base->_mp_d, bn); + } mpz_init_set_ui (tr, 1); while (--en >= 0) - { - mp_limb_t w = e->_mp_d[en]; - mp_limb_t bit; + { + mp_limb_t w = e->_mp_d[en]; + mp_limb_t bit; - bit = GMP_LIMB_HIGHBIT; - do + bit = GMP_LIMB_HIGHBIT; + do { mpz_mul (tr, tr, tr); if (w & bit) mpz_mul (tr, tr, base); if (tr->_mp_size > mn) - { - mpn_div_qr_preinv (NULL, tr->_mp_d, tr->_mp_size, mp, mn, &minv); - tr->_mp_size = mpn_normalized_size (tr->_mp_d, mn); - } + { + mpn_div_qr_preinv (NULL, tr->_mp_d, tr->_mp_size, mp, mn, &minv); + tr->_mp_size = mpn_normalized_size (tr->_mp_d, mn); + } bit >>= 1; } - while (bit > 0); - } + while (bit > 0); + } /* Final reduction */ if (tr->_mp_size >= mn) - { - minv.shift = shift; - mpn_div_qr_preinv (NULL, tr->_mp_d, tr->_mp_size, mp, mn, &minv); - tr->_mp_size = mpn_normalized_size (tr->_mp_d, mn); - } - // if (tp) - // gmp_free_limbs (tp, mn); + { + minv.shift = shift; + mpn_div_qr_preinv (NULL, tr->_mp_d, tr->_mp_size, mp, mn, &minv); + tr->_mp_size = mpn_normalized_size (tr->_mp_d, mn); + } + //if (tp) + //gmp_free_limbs (tp, mn); mpz_swap (r, tr); mpz_clear (tr); mpz_clear (base); } - int mpn_cmp4 (mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn) { @@ -1689,22 +1527,21 @@ mpz_abs_sub (mpz_t r, const mpz_t a, const mpz_t b) cmp = mpn_cmp4 (a->_mp_d, an, b->_mp_d, bn); if (cmp > 0) - { - rp = MPZ_REALLOC (r, an); - gmp_assert_nocarry (mpn_sub (rp, a->_mp_d, an, b->_mp_d, bn)); - return mpn_normalized_size (rp, an); - } + { + rp = MPZ_REALLOC (r, an); + gmp_assert_nocarry (mpn_sub (rp, a->_mp_d, an, b->_mp_d, bn)); + return mpn_normalized_size (rp, an); + } else if (cmp < 0) - { - rp = MPZ_REALLOC (r, bn); - gmp_assert_nocarry (mpn_sub (rp, b->_mp_d, bn, a->_mp_d, an)); - return -mpn_normalized_size (rp, bn); - } + { + rp = MPZ_REALLOC (r, bn); + gmp_assert_nocarry (mpn_sub (rp, b->_mp_d, bn, a->_mp_d, an)); + return -mpn_normalized_size (rp, bn); + } else return 0; } - mp_limb_t mpn_add_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b) { @@ -1713,12 +1550,12 @@ mpn_add_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b) assert (n > 0); i = 0; do - { - mp_limb_t r = ap[i] + b; - /* Carry out */ - b = (r < b); - rp[i] = r; - } + { + mp_limb_t r = ap[i] + b; + /* Carry out */ + b = (r < b); + rp[i] = r; + } while (++i < n); return b; @@ -1738,7 +1575,6 @@ mpn_add (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn) return cy; } - mp_size_t mpz_abs_add (mpz_t r, const mpz_t a, const mpz_t b) { @@ -1748,10 +1584,10 @@ mpz_abs_add (mpz_t r, const mpz_t a, const mpz_t b) mp_limb_t cy; if (an < bn) - { - MPZ_SRCPTR_SWAP (a, b); - MP_SIZE_T_SWAP (an, bn); - } + { + MPZ_SRCPTR_SWAP (a, b); + MP_SIZE_T_SWAP (an, bn); + } rp = MPZ_REALLOC (r, an + 1); cy = mpn_add (rp, a->_mp_d, an, b->_mp_d, bn); @@ -1761,7 +1597,6 @@ mpz_abs_add (mpz_t r, const mpz_t a, const mpz_t b) return an + cy; } - void mpz_sub (mpz_t r, const mpz_t a, const mpz_t b) { @@ -1772,10 +1607,9 @@ mpz_sub (mpz_t r, const mpz_t a, const mpz_t b) else rn = mpz_abs_add (r, a, b); - r->_mp_size = a->_mp_size >= 0 ? rn : -rn; + r->_mp_size = a->_mp_size >= 0 ? rn : - rn; } - mp_limb_t mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl) { @@ -1785,24 +1619,23 @@ mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl) cl = 0; do - { - ul = *up++; - gmp_umul_ppmm (hpl, lpl, ul, vl); + { + ul = *up++; + gmp_umul_ppmm (hpl, lpl, ul, vl); - lpl += cl; - cl = (lpl < cl) + hpl; + lpl += cl; + cl = (lpl < cl) + hpl; - rl = *rp; - lpl = rl + lpl; - cl += lpl < rl; - *rp++ = lpl; - } + rl = *rp; + lpl = rl + lpl; + cl += lpl < rl; + *rp++ = lpl; + } while (--n != 0); return cl; } - mp_limb_t mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl) { @@ -1812,15 +1645,15 @@ mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl) cl = 0; do - { - ul = *up++; - gmp_umul_ppmm (hpl, lpl, ul, vl); + { + ul = *up++; + gmp_umul_ppmm (hpl, lpl, ul, vl); - lpl += cl; - cl = (lpl < cl) + hpl; + lpl += cl; + cl = (lpl < cl) + hpl; - *rp++ = lpl; - } + *rp++ = lpl; + } while (--n != 0); return cl; @@ -1832,8 +1665,8 @@ mpn_mul (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn) { assert (un >= vn); assert (vn >= 1); - assert (! GMP_MPN_OVERLAP_P (rp, un + vn, up, un)); - assert (! GMP_MPN_OVERLAP_P (rp, un + vn, vp, vn)); + assert (!GMP_MPN_OVERLAP_P(rp, un + vn, up, un)); + assert (!GMP_MPN_OVERLAP_P(rp, un + vn, vp, vn)); /* We first multiply by the low order limb. This result can be stored, not added, to rp. We also avoid a loop for zeroing this @@ -1845,10 +1678,10 @@ mpn_mul (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn) vp[]. */ while (--vn >= 1) - { - rp += 1, vp += 1; - rp[un] = mpn_addmul_1 (rp, up, un, vp[0]); - } + { + rp += 1, vp += 1; + rp[un] = mpn_addmul_1 (rp, up, un, vp[0]); + } return rp[un]; } @@ -1865,10 +1698,10 @@ mpz_mul (mpz_t r, const mpz_t u, const mpz_t v) vn = v->_mp_size; if (un == 0 || vn == 0) - { - r->_mp_size = 0; - return; - } + { + r->_mp_size = 0; + return; + } sign = (un ^ vn) < 0; @@ -1884,14 +1717,13 @@ mpz_mul (mpz_t r, const mpz_t u, const mpz_t v) mpn_mul (tp, v->_mp_d, vn, u->_mp_d, un); rn = un + vn; - rn -= tp[rn - 1] == 0; + rn -= tp[rn-1] == 0; - t->_mp_size = sign ? -rn : rn; + t->_mp_size = sign ? - rn : rn; mpz_swap (r, t); mpz_clear (t); } - void mpn_copyd (mp_ptr d, mp_srcptr s, mp_size_t n) { @@ -1899,7 +1731,6 @@ mpn_copyd (mp_ptr d, mp_srcptr s, mp_size_t n) d[n] = s[n]; } - void mpn_zero (mp_ptr rp, mp_size_t n) { @@ -1918,10 +1749,10 @@ mpz_mul_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bits) un = GMP_ABS (u->_mp_size); if (un == 0) - { - r->_mp_size = 0; - return; - } + { + r->_mp_size = 0; + return; + } limbs = bits / GMP_LIMB_BITS; shift = bits % GMP_LIMB_BITS; @@ -1929,22 +1760,21 @@ mpz_mul_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bits) rn = un + limbs + (shift > 0); rp = MPZ_REALLOC (r, rn); if (shift > 0) - { - mp_limb_t cy = mpn_lshift (rp + limbs, u->_mp_d, un, shift); - rp[rn - 1] = cy; - rn -= (cy == 0); - } + { + mp_limb_t cy = mpn_lshift (rp + limbs, u->_mp_d, un, shift); + rp[rn-1] = cy; + rn -= (cy == 0); + } else mpn_copyd (rp + limbs, u->_mp_d, un); mpn_zero (rp, limbs); - r->_mp_size = (u->_mp_size < 0) ? -rn : rn; + r->_mp_size = (u->_mp_size < 0) ? - rn : rn; } - int -mpn_zero_p (mp_srcptr rp, mp_size_t n) +mpn_zero_p(mp_srcptr rp, mp_size_t n) { return mpn_normalized_size (rp, n) == 0; } @@ -1952,7 +1782,7 @@ mpn_zero_p (mp_srcptr rp, mp_size_t n) void mpz_div_q_2exp (mpz_t q, const mpz_t u, mp_bitcnt_t bit_index, - enum mpz_div_round_mode mode) + enum mpz_div_round_mode mode) { mp_size_t un, qn; mp_size_t limb_cnt; @@ -1961,10 +1791,10 @@ mpz_div_q_2exp (mpz_t q, const mpz_t u, mp_bitcnt_t bit_index, un = u->_mp_size; if (un == 0) - { - q->_mp_size = 0; - return; - } + { + q->_mp_size = 0; + return; + } limb_cnt = bit_index / GMP_LIMB_BITS; qn = GMP_ABS (un) - limb_cnt; bit_index %= GMP_LIMB_BITS; @@ -1973,28 +1803,28 @@ mpz_div_q_2exp (mpz_t q, const mpz_t u, mp_bitcnt_t bit_index, /* Note: Below, the final indexing at limb_cnt is valid because at that point we have qn > 0. */ adjust = (qn <= 0 - || ! mpn_zero_p (u->_mp_d, limb_cnt) - || (u->_mp_d[limb_cnt] - & (((mp_limb_t) 1 << bit_index) - 1))); + || !mpn_zero_p (u->_mp_d, limb_cnt) + || (u->_mp_d[limb_cnt] + & (((mp_limb_t) 1 << bit_index) - 1))); else adjust = 0; if (qn <= 0) qn = 0; else - { - qp = MPZ_REALLOC (q, qn); + { + qp = MPZ_REALLOC (q, qn); - if (bit_index != 0) + if (bit_index != 0) { mpn_rshift (qp, u->_mp_d + limb_cnt, qn, bit_index); qn -= qp[qn - 1] == 0; } - else + else { mpn_copyi (qp, u->_mp_d + limb_cnt, qn); } - } + } q->_mp_size = qn; @@ -2004,14 +1834,12 @@ mpz_div_q_2exp (mpz_t q, const mpz_t u, mp_bitcnt_t bit_index, mpz_neg (q, q); } - void mpz_tdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt) { mpz_div_q_2exp (r, u, cnt, GMP_DIV_TRUNC); } - int mpz_cmp (const mpz_t a, const mpz_t b) { @@ -2026,7 +1854,6 @@ mpz_cmp (const mpz_t a, const mpz_t b) return mpn_cmp (b->_mp_d, a->_mp_d, -asize); } - void mpz_add (mpz_t r, const mpz_t a, const mpz_t b) { @@ -2037,7 +1864,7 @@ mpz_add (mpz_t r, const mpz_t a, const mpz_t b) else rn = mpz_abs_sub (r, a, b); - r->_mp_size = a->_mp_size >= 0 ? rn : -rn; + r->_mp_size = a->_mp_size >= 0 ? rn : - rn; } @@ -2062,19 +1889,18 @@ mpz_tstbit (const mpz_t d, mp_bitcnt_t bit_index) bit = (w >> shift) & 1; if (ds < 0) - { - /* d < 0. Check if any of the bits below is set: If so, our bit - must be complemented. */ - if (shift > 0 && (mp_limb_t) (w << (GMP_LIMB_BITS - shift)) > 0) + { + /* d < 0. Check if any of the bits below is set: If so, our bit + must be complemented. */ + if (shift > 0 && (mp_limb_t) (w << (GMP_LIMB_BITS - shift)) > 0) + return bit ^ 1; + while (--limb_index >= 0) + if (d->_mp_d[limb_index] > 0) return bit ^ 1; - while (--limb_index >= 0) - if (d->_mp_d[limb_index] > 0) - return bit ^ 1; - } + } return bit; } - mp_bitcnt_t mpn_limb_size_in_base_2 (mp_limb_t u) { @@ -2085,23 +1911,22 @@ mpn_limb_size_in_base_2 (mp_limb_t u) return GMP_LIMB_BITS - shift; } - size_t mpz_sizeinbase (const mpz_t u, int base) { mp_size_t un, tn; mp_srcptr up; - // mp_ptr tp; - mpz_t tp; - + //mp_ptr tp; + mpz_t tp; + mp_bitcnt_t bits; struct gmp_div_inverse bi; size_t ndigits; - - mpz_init (tp); + + mpz_init(tp); assert (base >= 2); - assert (base <= 62); +assert (base <= 62); un = GMP_ABS (u->_mp_size); if (un == 0) @@ -2109,71 +1934,68 @@ mpz_sizeinbase (const mpz_t u, int base) up = u->_mp_d; - bits = (un - 1) * GMP_LIMB_BITS + mpn_limb_size_in_base_2 (up[un - 1]); + bits = (un - 1) * GMP_LIMB_BITS + mpn_limb_size_in_base_2 (up[un-1]); switch (base) - { - case 2: - return bits; - case 4: - return (bits + 1) / 2; - case 8: - return (bits + 2) / 3; - case 16: - return (bits + 3) / 4; - case 32: - return (bits + 4) / 5; - /* FIXME: Do something more clever for the common case of base - 10. */ - } - - // tp = gmp_alloc_limbs (un); + { + case 2: + return bits; + case 4: + return (bits + 1) / 2; + case 8: + return (bits + 2) / 3; + case 16: + return (bits + 3) / 4; + case 32: + return (bits + 4) / 5; + /* FIXME: Do something more clever for the common case of base + 10. */ + } + //tp = gmp_alloc_limbs (un); + mpn_copyi (tp->_mp_d, up, un); mpn_div_qr_1_invert (&bi, base); tn = un; ndigits = 0; do - { - ndigits++; - mpn_div_qr_1_preinv (tp->_mp_d, tp->_mp_d, tn, &bi); - tn -= (tp->_mp_d[tn - 1] == 0); - } + { + ndigits++; + mpn_div_qr_1_preinv (tp->_mp_d, tp->_mp_d, tn, &bi); + tn -= (tp->_mp_d[tn-1] == 0); + } while (tn > 0); - // gmp_free_limbs (tp, un); + //gmp_free_limbs (tp, un); return ndigits; } - int mpz_sgn (const mpz_t u) { return GMP_CMP (u->_mp_size, 0); } - mp_bitcnt_t mpn_common_scan (mp_limb_t limb, mp_size_t i, mp_srcptr up, mp_size_t un, - mp_limb_t ux) + mp_limb_t ux) { unsigned cnt; assert (ux == 0 || ux == GMP_LIMB_MAX); - assert (0 <= i && i <= un); + assert (0 <= i && i <= un ); while (limb == 0) - { - i++; - if (i == un) - return (ux == 0 ? ~(mp_bitcnt_t) 0 : un *GMP_LIMB_BITS); - limb = ux ^ up[i]; - } + { + i++; + if (i == un) + return (ux == 0 ? ~(mp_bitcnt_t) 0 : un * GMP_LIMB_BITS); + limb = ux ^ up[i]; + } gmp_ctz (cnt, limb); return (mp_bitcnt_t) i * GMP_LIMB_BITS + cnt; } - void mpz_abs (mpz_t r, const mpz_t u) { @@ -2188,11 +2010,10 @@ mpn_scan1 (mp_srcptr ptr, mp_bitcnt_t bit) mp_size_t i; i = bit / GMP_LIMB_BITS; - return mpn_common_scan (ptr[i] & (GMP_LIMB_MAX << (bit % GMP_LIMB_BITS)), - i, ptr, i, 0); + return mpn_common_scan ( ptr[i] & (GMP_LIMB_MAX << (bit % GMP_LIMB_BITS)), + i, ptr, i, 0); } - mp_bitcnt_t mpz_scan1 (mpz_t u, mp_bitcnt_t starting_bit) { @@ -2214,17 +2035,17 @@ mpz_scan1 (mpz_t u, mp_bitcnt_t starting_bit) limb = up[i]; if (starting_bit != 0) - { - if (us < 0) + { + if (us < 0) { ux = mpn_zero_p (up, i); - limb = ~limb + ux; - ux = -(mp_limb_t) (limb >= ux); + limb = ~ limb + ux; + ux = - (mp_limb_t) (limb >= ux); } - /* Mask to 0 all bits before starting_bit, thus ignoring them. */ - limb &= GMP_LIMB_MAX << (starting_bit % GMP_LIMB_BITS); - } + /* Mask to 0 all bits before starting_bit, thus ignoring them. */ + limb &= GMP_LIMB_MAX << (starting_bit % GMP_LIMB_BITS); + } return mpn_common_scan (limb, i, up, un, ux); } @@ -2243,14 +2064,12 @@ mpz_make_odd (mpz_t r) return shift; } - void mpz_tdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d) { mpz_div_qr (q, r, n, d, GMP_DIV_TRUNC); } - void mpz_abs_add_bit (mpz_t d, mp_bitcnt_t bit_index) { @@ -2264,35 +2083,34 @@ mpz_abs_add_bit (mpz_t d, mp_bitcnt_t bit_index) bit = (mp_limb_t) 1 << (bit_index % GMP_LIMB_BITS); if (limb_index >= dn) - { - mp_size_t i; - /* The bit should be set outside of the end of the number. - We have to increase the size of the number. */ - dp = MPZ_REALLOC (d, limb_index + 1); - - dp[limb_index] = bit; - for (i = dn; i < limb_index; i++) - dp[i] = 0; - dn = limb_index + 1; - } + { + mp_size_t i; + /* The bit should be set outside of the end of the number. + We have to increase the size of the number. */ + dp = MPZ_REALLOC (d, limb_index + 1); + + dp[limb_index] = bit; + for (i = dn; i < limb_index; i++) + dp[i] = 0; + dn = limb_index + 1; + } else - { - mp_limb_t cy; + { + mp_limb_t cy; - dp = d->_mp_d; + dp = d->_mp_d; - cy = mpn_add_1 (dp + limb_index, dp + limb_index, dn - limb_index, bit); - if (cy > 0) + cy = mpn_add_1 (dp + limb_index, dp + limb_index, dn - limb_index, bit); + if (cy > 0) { dp = MPZ_REALLOC (d, dn + 1); dp[dn++] = cy; } - } + } - d->_mp_size = (d->_mp_size < 0) ? -dn : dn; + d->_mp_size = (d->_mp_size < 0) ? - dn : dn; } - void mpz_abs_sub_bit (mpz_t d, mp_bitcnt_t bit_index) { @@ -2309,32 +2127,29 @@ mpz_abs_sub_bit (mpz_t d, mp_bitcnt_t bit_index) assert (limb_index < dn); gmp_assert_nocarry (mpn_sub_1 (dp + limb_index, dp + limb_index, - dn - limb_index, bit)); + dn - limb_index, bit)); dn = mpn_normalized_size (dp, dn); - d->_mp_size = (d->_mp_size < 0) ? -dn : dn; + d->_mp_size = (d->_mp_size < 0) ? - dn : dn; } - void mpz_setbit (mpz_t d, mp_bitcnt_t bit_index) { - if (! mpz_tstbit (d, bit_index)) - { - if (d->_mp_size >= 0) - mpz_abs_add_bit (d, bit_index); - else - mpz_abs_sub_bit (d, bit_index); - } + if (!mpz_tstbit (d, bit_index)) + { + if (d->_mp_size >= 0) + mpz_abs_add_bit (d, bit_index); + else + mpz_abs_sub_bit (d, bit_index); + } } - void mpz_divexact (mpz_t q, const mpz_t n, const mpz_t d) { gmp_assert_nocarry (mpz_div_qr (q, NULL, n, d, GMP_DIV_TRUNC)); } - #define mpz_odd_p(z) (((z)->_mp_size != 0) & (int) (z)->_mp_d[0]) #define mpz_even_p(z) (! mpz_odd_p (z)) @@ -2342,10 +2157,9 @@ int mpz_cmpabs (const mpz_t u, const mpz_t v) { return mpn_cmp4 (u->_mp_d, GMP_ABS (u->_mp_size), - v->_mp_d, GMP_ABS (v->_mp_size)); + v->_mp_d, GMP_ABS (v->_mp_size)); } - void mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v) { @@ -2354,28 +2168,28 @@ mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v) mp_bitcnt_t power; if (u->_mp_size == 0) - { - /* g = 0 u + sgn(v) v */ - signed long sign = mpz_sgn (v); - mpz_abs (g, v); - if (s) - s->_mp_size = 0; - if (t) - mpz_set_si (t, sign); - return; - } + { + /* g = 0 u + sgn(v) v */ + signed long sign = mpz_sgn (v); + mpz_abs (g, v); + if (s) + s->_mp_size = 0; + if (t) + mpz_set_si (t, sign); + return; + } if (v->_mp_size == 0) - { - /* g = sgn(u) u + 0 v */ - signed long sign = mpz_sgn (u); - mpz_abs (g, u); - if (s) - mpz_set_si (s, sign); - if (t) - t->_mp_size = 0; - return; - } + { + /* g = sgn(u) u + 0 v */ + signed long sign = mpz_sgn (u); + mpz_abs (g, u); + if (s) + mpz_set_si (s, sign); + if (t) + t->_mp_size = 0; + return; + } mpz_init (tu); mpz_init (tv); @@ -2388,19 +2202,19 @@ mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v) uz = mpz_make_odd (tu); mpz_abs (tv, v); vz = mpz_make_odd (tv); - gz = GMP_MIN (uz, vz); +gz = GMP_MIN (uz, vz); uz -= gz; vz -= gz; /* Cofactors corresponding to odd gcd. gz handled later. */ if (tu->_mp_size < tv->_mp_size) - { - mpz_swap (tu, tv); - MPZ_SRCPTR_SWAP (u, v); - MPZ_PTR_SWAP (s, t); - MP_BITCNT_T_SWAP (uz, vz); - } + { + mpz_swap (tu, tv); + MPZ_SRCPTR_SWAP (u, v); + MPZ_PTR_SWAP (s, t); + MP_BITCNT_T_SWAP (uz, vz); + } /* Maintain * @@ -2432,13 +2246,13 @@ mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v) power = uz + vz; if (tu->_mp_size > 0) - { - mp_bitcnt_t shift; - shift = mpz_make_odd (tu); - mpz_setbit (t0, uz + shift); - power += shift; + { + mp_bitcnt_t shift; + shift = mpz_make_odd (tu); + mpz_setbit (t0, uz + shift); + power += shift; - for (;;) + for (;;) { int c; c = mpz_cmp (tu, tv); @@ -2446,33 +2260,33 @@ mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v) break; if (c < 0) - { - /* tv = tv' + tu - * - * u = t0 tu + t1 (tv' + tu) = (t0 + t1) tu + t1 tv' - * v = s0 tu + s1 (tv' + tu) = (s0 + s1) tu + s1 tv' */ - - mpz_sub (tv, tv, tu); - mpz_add (t0, t0, t1); - mpz_add (s0, s0, s1); - - shift = mpz_make_odd (tv); - mpz_mul_2exp (t1, t1, shift); - mpz_mul_2exp (s1, s1, shift); - } + { + /* tv = tv' + tu + * + * u = t0 tu + t1 (tv' + tu) = (t0 + t1) tu + t1 tv' + * v = s0 tu + s1 (tv' + tu) = (s0 + s1) tu + s1 tv' */ + + mpz_sub (tv, tv, tu); + mpz_add (t0, t0, t1); + mpz_add (s0, s0, s1); + + shift = mpz_make_odd (tv); + mpz_mul_2exp (t1, t1, shift); + mpz_mul_2exp (s1, s1, shift); + } else - { - mpz_sub (tu, tu, tv); - mpz_add (t1, t0, t1); - mpz_add (s1, s0, s1); + { + mpz_sub (tu, tu, tv); + mpz_add (t1, t0, t1); + mpz_add (s1, s0, s1); - shift = mpz_make_odd (tu); - mpz_mul_2exp (t0, t0, shift); - mpz_mul_2exp (s0, s0, shift); - } + shift = mpz_make_odd (tu); + mpz_mul_2exp (t0, t0, shift); + mpz_mul_2exp (s0, s0, shift); + } power += shift; } - } + } else mpz_setbit (t0, uz); @@ -2491,25 +2305,25 @@ mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v) mpz_abs (t1, t1); while (power-- > 0) - { - /* s0 u + t0 v = (s0 - v/g) u - (t0 + u/g) v */ - if (mpz_odd_p (s0) || mpz_odd_p (t0)) + { + /* s0 u + t0 v = (s0 - v/g) u - (t0 + u/g) v */ + if (mpz_odd_p (s0) || mpz_odd_p (t0)) { mpz_sub (s0, s0, s1); mpz_add (t0, t0, t1); } - // assert (mpz_even_p (t0) && mpz_even_p (s0)); - mpz_tdiv_q_2exp (s0, s0, 1); - mpz_tdiv_q_2exp (t0, t0, 1); - } + //assert (mpz_even_p (t0) && mpz_even_p (s0)); + mpz_tdiv_q_2exp (s0, s0, 1); + mpz_tdiv_q_2exp (t0, t0, 1); + } /* Arrange so that |s| < |u| / 2g */ mpz_add (s1, s0, s1); if (mpz_cmpabs (s0, s1) > 0) - { - mpz_swap (s0, s1); - mpz_sub (t0, t0, t1); - } + { + mpz_swap (s0, s1); + mpz_sub (t0, t0, t1); + } if (u->_mp_size < 0) mpz_neg (s0, s0); if (v->_mp_size < 0) @@ -2541,26 +2355,6 @@ mpz_addmul_ui (mpz_t r, const mpz_t u, unsigned long int v) } -// STRING CONVERSION - -unsigned -mpn_base_power_of_two_p (unsigned b) -{ - switch (b) - { - case 2: return 1; - case 4: return 2; - case 8: return 3; - case 16: return 4; - case 32: return 5; - case 64: return 6; - case 128: return 7; - case 256: return 8; - default: return 0; - } -} - - void mpn_get_base_info (struct mpn_base_info *info, mp_limb_t b) { @@ -2577,582 +2371,336 @@ mpn_get_base_info (struct mpn_base_info *info, mp_limb_t b) } -int -isspace_gpu (unsigned char c) -{ - if (c == '\n' || c == ' ' || c == '\t' || c == '\r' || c == '\f' || c == '\v') - return 1; - return 0; -} - - -int -strlen_c (__global char *c) -{ - - // rather naive implementation – we assume a string is terminated, and is not 0 characters long. - - int i = 0; - while (1) - { - if (c[i] == '\0') - return i; - i++; - } - return i; -} - - -mp_size_t -mpn_set_str_bits (mp_ptr rp, const unsigned char *sp, size_t sn, - unsigned bits) -{ - mp_size_t rn; - mp_limb_t limb; - unsigned shift; - - for (limb = 0, rn = 0, shift = 0; sn-- > 0;) - { - limb |= (mp_limb_t) sp[sn] << shift; - shift += bits; - if (shift >= GMP_LIMB_BITS) - { - shift -= GMP_LIMB_BITS; - rp[rn++] = limb; - /* Next line is correct also if shift == 0, - bits == 8, and mp_limb_t == unsigned char. */ - limb = (unsigned int) sp[sn] >> (bits - shift); - } - } - if (limb != 0) - rp[rn++] = limb; - else - rn = mpn_normalized_size (rp, rn); - return rn; -} - - -mp_size_t -mpn_set_str_other (mp_ptr rp, const unsigned char *sp, size_t sn, - mp_limb_t b, const struct mpn_base_info *info) -{ - mp_size_t rn; - mp_limb_t w; - unsigned k; - size_t j; - - assert (sn > 0); - - k = 1 + (sn - 1) % info->exp; - - j = 0; - w = sp[j++]; - while (--k != 0) - w = w * b + sp[j++]; - - rp[0] = w; - - for (rn = 1; j < sn;) - { - mp_limb_t cy; - - w = sp[j++]; - for (k = 1; k < info->exp; k++) - w = w * b + sp[j++]; - - cy = mpn_mul_1 (rp, rp, rn, info->bb); - cy += mpn_add_1 (rp, rp, rn, w); - if (cy > 0) - rp[rn++] = cy; - } - assert (j == sn); - - return rn; -} - - -int -mpz_set_str (mpz_t r, __global char *sp, int base) -{ - unsigned bits, value_of_a; - mp_size_t rn, alloc; - mp_ptr rp; - size_t dn, sn; - int sign; - unsigned char dp[256]; - - assert (base == 0 || (base >= 2 && base <= 62)); - - while (isspace_gpu ( (unsigned char) *sp)) - sp++; - - sign = (*sp == '-'); - sp += sign; - - if (base == 0) - { - if (sp[0] == '0') - { - if (sp[1] == 'x' || sp[1] == 'X') - { - base = 16; - sp += 2; - } - else if (sp[1] == 'b' || sp[1] == 'B') - { - base = 2; - sp += 2; - } - else - base = 8; - } - else - base = 10; - } - - if (! *sp) - { - r->_mp_size = 0; - return -1; - } - sn = strlen_c (sp); - // dp = (unsigned char *) gmp_alloc (sn); - - - value_of_a = (base > 36) ? 36 : 10; - for (dn = 0; *sp; sp++) - { - unsigned digit; - - if (isspace_gpu ((unsigned char) *sp)) - continue; - else if (*sp >= '0' && *sp <= '9') - digit = *sp - '0'; - else if (*sp >= 'a' && *sp <= 'z') - digit = *sp - 'a' + value_of_a; - else if (*sp >= 'A' && *sp <= 'Z') - digit = *sp - 'A' + 10; - else - digit = base; /* fail */ - - if (digit >= (unsigned) base) - { - // gmp_free (dp, sn); - r->_mp_size = 0; - return -1; - } - - dp[dn++] = digit; - } - - if (! dn) - { - // gmp_free (dp, sn); - r->_mp_size = 0; - return -1; - } - bits = mpn_base_power_of_two_p (base); - - if (bits > 0) - { - alloc = (dn * bits + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS; - rp = MPZ_REALLOC (r, alloc); - rn = mpn_set_str_bits (rp, dp, dn, bits); - } - else - { - struct mpn_base_info info; - mpn_get_base_info (&info, base); - alloc = (dn + info.exp - 1) / info.exp; - rp = MPZ_REALLOC (r, alloc); - rn = mpn_set_str_other (rp, dp, dn, base, &info); - /* Normalization, needed for all-zero input. */ - assert (rn > 0); - rn -= rp[rn - 1] == 0; - } - assert (rn <= alloc); - // gmp_free (dp, sn); - - r->_mp_size = sign ? -rn : rn; - - return 0; -} - - -int -mpz_init_set_str (mpz_t r, __global char *sp, int base) -{ - mpz_init (r); - return mpz_set_str (r, sp, base); -} - - // Montgomery multiplication -void mont_prepare (mpz_t b, mpz_t e, mpz_t m, - mpz_t r, mpz_t r_1, - mpz_t ni, mpz_t M, mpz_t x - ); - -void mont_product (mpz_t ret, - const mpz_t a, const mpz_t b, - const mpz_t r, const mpz_t r_1, - const mpz_t n, const mpz_t ni - ); - -void mont_modexp (mpz_t ret, - mpz_t a, mpz_t e, - const mpz_t M, - const mpz_t n, const mpz_t ni, - const mpz_t r, const mpz_t r_1 +void mont_prepare(mpz_t b, mpz_t e, mpz_t m, + mpz_t r, mpz_t r_1, + mpz_t ni, mpz_t M, mpz_t x ); -void mont_finish (mpz_t ret, - const mpz_t xx, - const mpz_t n, const mpz_t ni, - const mpz_t r, const mpz_t r_1 +void mont_product(mpz_t ret, + const mpz_t a, const mpz_t b, + const mpz_t r, const mpz_t r_1, + const mpz_t n, const mpz_t ni ); -void mont_prepare_even_modulus (mpz_t m, mpz_t q, mpz_t powj); - -void mont_mulmod (mpz_t res, const mpz_t a, const mpz_t b, const mpz_t mod); +void mont_modexp(mpz_t ret, + mpz_t a, mpz_t e, + const mpz_t M, + const mpz_t n, const mpz_t ni, + const mpz_t r, const mpz_t r_1 + ); +void mont_finish(mpz_t ret, + const mpz_t xx, + const mpz_t n, const mpz_t ni, + const mpz_t r, const mpz_t r_1 + ); -void -mont_prepare_even_modulus (mpz_t m, mpz_t q, mpz_t powj) -{ - - mpz_t two; // powj == 2^j +void mont_prepare_even_modulus(mpz_t m, mpz_t q, mpz_t powj); - mpz_init_set_ui (two, 2); +void mont_mulmod(mpz_t res, const mpz_t a, const mpz_t b, const mpz_t mod); - mp_bitcnt_t j = mpz_scan1 (m, 0); - mpz_tdiv_q_2exp (q,m,j); - mpz_mul_2exp (powj,two,j - 1); - mpz_clear (two); +void mont_prepare_even_modulus(mpz_t m, mpz_t q, mpz_t powj) { + + mpz_t two; // powj == 2^j + + mpz_init_set_ui(two, 2); + + mp_bitcnt_t j = mpz_scan1(m, 0); + + mpz_tdiv_q_2exp(q,m,j); + mpz_mul_2exp(powj,two,j - 1); + + mpz_clear(two); + } - // CPU -void -mont_prepare (mpz_t b, mpz_t e, mpz_t m, - mpz_t r, mpz_t r_1, - mpz_t ni, mpz_t M, mpz_t x) -{ - - - // r and n (modulus) must be relatively prime (this is a given if n (modulus) is odd) - - // calculate r, which must be larger than the modulo and also a power of 2 - - mpz_t one, oo; // some helper variables - mpz_init_set_si (one,1); - mpz_init_set_si (oo,0); - - // unsigned long len = mpz_sizeinbase(m,2); - - unsigned long len = 2049; - - mpz_mul_2exp (r,one,len); - - mpz_set_si (one, 0); - - - mpz_gcdext (one, r_1, ni, r, m); // set r_1 and ni - - int sgn = mpz_sgn (r_1); - - mpz_abs (r_1, r_1); - mpz_abs (ni, ni); - - if (sgn == -1) - { - mpz_sub (ni, r, ni); - mpz_sub (r_1, m, r_1); - } - - if (mpz_cmp_ui (one, 1)) - assert (0); - - mpz_mul (one, r, r_1); - mpz_mul (oo,ni,m); - - mpz_sub (one, one, oo); // oo must be one - - if (mpz_cmp_ui (one, 1)) - assert (0); - - mpz_mul (M, b, r); - mpz_mod (M, M, m); // set M - - mpz_mod (x, r, m); // set x - - mpz_clear (one); - mpz_clear (oo); - +void mont_prepare(mpz_t b, mpz_t e, mpz_t m, + mpz_t r, mpz_t r_1, + mpz_t ni, mpz_t M, mpz_t x) { + + + // r and n (modulus) must be relatively prime (this is a given if n (modulus) is odd) + + // calculate r, which must be larger than the modulo and also a power of 2 + + mpz_t one, oo; // some helper variables + mpz_init_set_si(one,1); + mpz_init_set_si(oo,0); + + //unsigned long len = mpz_sizeinbase(m,2); + + unsigned long len = 2049; + + mpz_mul_2exp(r,one,len); + + mpz_set_si(one, 0); + + + mpz_gcdext(one, r_1, ni, r, m); // set r_1 and ni + + int sgn = mpz_sgn(r_1); + + mpz_abs(r_1, r_1); + mpz_abs(ni, ni); + + if (sgn == -1) { + mpz_sub(ni, r, ni); + mpz_sub(r_1, m, r_1); + } + + if (mpz_cmp_ui(one, 1)) + assert(0); + + mpz_mul(one, r, r_1); + mpz_mul(oo,ni,m); + + mpz_sub(one, one, oo); // oo must be one + + if (mpz_cmp_ui(one, 1)) + assert(0); + + mpz_mul(M, b, r); + mpz_mod(M, M, m); // set M + + mpz_mod(x, r, m); // set x + + mpz_clear(one); + mpz_clear(oo); + } - // maybe GPU? // MARK: n MUST be an odd number -void -mont_modexp (mpz_t ret, - mpz_t a, mpz_t e, - const mpz_t M, - const mpz_t n, const mpz_t ni, - const mpz_t r, const mpz_t r_1 - ) -{ - - mpz_t aa,xx; - - mpz_init_set (aa, M); - mpz_init_set (xx, a); - - int k = (int) mpz_sizeinbase (e,2); - - for (int i = k - 1; i >= 0; i--) - { - - mont_product (xx, xx, xx, r, r_1, n, ni); - - if (mpz_tstbit (e, i)) - mont_product (xx, aa, xx, r, r_1, n, ni); - - } - - mpz_set (ret, xx); - - mpz_clear (aa); - mpz_clear (xx); - -} - - -void -mont_finish (mpz_t ret, - const mpz_t xx, - const mpz_t n, const mpz_t ni, - const mpz_t r, const mpz_t r_1 - ) -{ - - - mpz_t x,one; - - mpz_init (x); - mpz_init_set_ui (one, 1); - - mont_product (x, xx, one, r, r_1, n, ni); - - mpz_set (ret, x); - - mpz_clear (x); - mpz_clear (one); - +void mont_modexp(mpz_t ret, + mpz_t a, mpz_t e, + const mpz_t M, + const mpz_t n, const mpz_t ni, + const mpz_t r, const mpz_t r_1 + ) { + + mpz_t aa,xx; + + mpz_init_set(aa, M); + mpz_init_set(xx, a); + + int k = (int)mpz_sizeinbase(e,2); + + for (int i = k - 1; i >= 0; i--) { + + mont_product(xx, xx, xx, r, r_1, n, ni); + + if (mpz_tstbit(e, i)) + mont_product(xx, aa, xx, r, r_1, n, ni); + + } + + mpz_set(ret, xx); + + mpz_clear(aa); + mpz_clear(xx); + +} + +void mont_finish(mpz_t ret, + const mpz_t xx, + const mpz_t n, const mpz_t ni, + const mpz_t r, const mpz_t r_1 + ) { + + + mpz_t x,one; + + mpz_init(x); + mpz_init_set_ui(one, 1); + + mont_product(x, xx, one, r, r_1, n, ni); + + mpz_set(ret, x); + + mpz_clear(x); + mpz_clear(one); + } // GPU -void -mont_product (mpz_t ret, - const mpz_t a, const mpz_t b, - const mpz_t r, const mpz_t r_1, - const mpz_t n, const mpz_t ni - ) -{ - - mpz_t t,m,u; - - mpz_init (t); - mpz_init (m); - mpz_init (u); - - - mont_mulmod (t, b, a, r); - - mont_mulmod (m, ni, t, r); - - mpz_t ab,mn; - - mpz_init (ab); - mpz_init (mn); - - mpz_mul (ab, a, b); - mpz_mul (mn, m, n); - - mpz_add (ab, ab, mn); - - unsigned long sz = mpz_sizeinbase (r,2) - 1; - mpz_tdiv_q_2exp (u, ab, sz); // this is essentially a bit shift, instead of a division - - if (mpz_cmp (u, n) >= 0) - mpz_sub (u, u, n); - - mpz_set (ret, u); - - mpz_clear (ab); - mpz_clear (mn); - mpz_clear (t); - mpz_clear (m); - mpz_clear (u); - +void mont_product(mpz_t ret, + const mpz_t a, const mpz_t b, + const mpz_t r, const mpz_t r_1, + const mpz_t n, const mpz_t ni + ) { + + mpz_t t,m,u; + + mpz_init(t); + mpz_init(m); + mpz_init(u); + + + + mont_mulmod(t, b, a, r); + + mont_mulmod(m, ni, t, r); + + mpz_t ab,mn; + + mpz_init(ab); + mpz_init(mn); + + mpz_mul(ab, a, b); + mpz_mul(mn, m, n); + + mpz_add(ab, ab, mn); + + unsigned long sz = mpz_sizeinbase(r,2) - 1; + mpz_tdiv_q_2exp(u, ab, sz); // this is essentially a bit shift, instead of a division + + if (mpz_cmp(u, n) >= 0) + mpz_sub(u, u, n); + + mpz_set(ret, u); + + mpz_clear(ab); + mpz_clear(mn); + mpz_clear(t); + mpz_clear(m); + mpz_clear(u); + } - // not the fastest... but it does not increase the variable sizes -void -mont_mulmod (mpz_t res, const mpz_t a, const mpz_t b, const mpz_t mod) -{ - - mpz_t aa, bb; - mpz_init_set (aa, a); - mpz_init_set (bb,b); - - mpz_mod (aa, aa, mod); // in case a is bigger - - while (mpz_cmp_ui (bb, 0) > 0) - { - if (mpz_odd_p (bb)) - { - mpz_add (res, res, aa); - mpz_mod (res, res, mod); +void mont_mulmod(mpz_t res, const mpz_t a, const mpz_t b, const mpz_t mod) { + + mpz_t aa, bb; + mpz_init_set(aa, a); + mpz_init_set(bb,b); + + mpz_mod(aa, aa, mod); // in case a is bigger + + while (mpz_cmp_ui(bb, 0) > 0) { + if (mpz_odd_p(bb)) { + mpz_add(res, res, aa); + mpz_mod(res, res, mod); + } + + mpz_mul_2exp(aa,aa,1); + mpz_mod(aa, aa, mod); + mpz_tdiv_q_2exp(bb, bb, 1); } - - mpz_mul_2exp (aa,aa,1); - mpz_mod (aa, aa, mod); - mpz_tdiv_q_2exp (bb, bb, 1); - } } -void -printmpz (mpz_t n) -{ - - for (int i = 0; i < n->_mp_size; i++) - { - - printf ((char __constant *) "%lu", n->_mp_d[i]); - - } - printf ((char __constant *) "\n\n"); - -} - - -__kernel void -montgomery (__global void *signature, __global unsigned long *s_offsets, - __global void *exponent, __global unsigned long *e_offsets, - __global void *modulus, __global unsigned long *m_offsets, - __global void *base, __global unsigned long *b_offsets, - __global unsigned long *valid, - __global unsigned long *pks, - unsigned long n) -{ - - - int index = get_global_id (0); - - int pk = 0; - - while (1) - { - if (pks[pk] >= index) - break; - pk++; - } - - mpz_t b,e,m,sig,res; - mpz_init (res); - - mpz_set_lg ((unsigned long *) b,&base[b_offsets[index]]); // this is sacrilegious really... - - mpz_set_lg ((unsigned long *) sig,&signature[s_offsets[index]]); - - mpz_set_lg ((unsigned long *) e,&exponent[e_offsets[pk]]); - mpz_set_lg ((unsigned long *) m,&modulus[m_offsets[pk]]); // n - - - mpz_t r, r_1, ni, M, x; - mpz_init (r); - mpz_init (r_1); - mpz_init (ni); - mpz_init (M); - mpz_init (x); - - - mpz_t xx; - mpz_init (xx); - - // the modulus can be assumed to be uneven – always - if (mpz_even_p (m)) - { - /* - mpz_t bb, x1, x2, q, powj; - mpz_init(bb); - mpz_init(x1); - mpz_init(x2); - mpz_init(q); - mpz_init(powj); - - mont_prepare_even_modulus(m, q, powj); - - // q is uneven, so we can use regular modexp - // MARK: we can improve the efficiency here by doing simple reductions - - mpz_mod(bb, b, q); // reductions like this - - mont_prepare(bb, e, q, r, r_1, ni, M, x); - mont_modexp(xx, x, e, M, q, ni, r, r_1); - mont_finish(x1, xx, q, ni, r, r_1); - - - // MARK: we can also reduce and really speed this up as well -> binary method? - mpz_powm(x2, b, e, powj); - - mpz_t y, q_1; - mpz_init(y); - mpz_init(q_1); - - mpz_sub(y, x2, x1); - - mpz_invert(q_1, q, powj); - - mpz_mul(y, y, q_1); - mpz_mod(y, y, powj); - - mpz_addmul(x1, q, y); - - mpz_set(res, x1); - - - */ - - printf ((__constant char *) "An even modulus is not allowed here."); - - } - else - { - - // MARK: prepare might not have to run individually on each kernel (prepare might even run on CPU) - mont_prepare (b, e, m, r, r_1, ni, M, x); - - - mont_modexp (xx, x, e, M, m, ni, r, r_1); - mont_finish (res, xx, m, ni, r, r_1); - - } - - if (mpz_cmp (sig,res) != 0) - { - - *valid += 1; - - } - - +__kernel void montgomery(__global void *signature, __global unsigned long *s_offsets, + __global void *exponent, __global unsigned long *e_offsets, + __global void *modulus, __global unsigned long *m_offsets, + __global void *base, __global unsigned long *b_offsets, + __global unsigned long *valid, + __global unsigned long *pks, + unsigned long n) +{ + + + int index = get_global_id(0); + + int pk = 0; + + while (1) { + if (pks[pk] >= index) + break; + pk++; + } + + mpz_t b,e,m,sig,res; + mpz_init(res); + + mpz_set_lg((unsigned long *)b,&base[b_offsets[index]]); // this is sacrilegious really... + + mpz_set_lg((unsigned long *)sig,&signature[s_offsets[index]]); + + mpz_set_lg((unsigned long *)e,&exponent[e_offsets[pk]]); + mpz_set_lg((unsigned long *)m,&modulus[m_offsets[pk]]); // n + + + mpz_t r, r_1, ni, M, x; + mpz_init(r); + mpz_init(r_1); + mpz_init(ni); + mpz_init(M); + mpz_init(x); + + + mpz_t xx; + mpz_init(xx); + + // the modulus can be assumed to be uneven – always + if (mpz_even_p(m)) { + /* + mpz_t bb, x1, x2, q, powj; + mpz_init(bb); + mpz_init(x1); + mpz_init(x2); + mpz_init(q); + mpz_init(powj); + + mont_prepare_even_modulus(m, q, powj); + + // q is uneven, so we can use regular modexp + // MARK: we can improve the efficiency here by doing simple reductions + + mpz_mod(bb, b, q); // reductions like this + + mont_prepare(bb, e, q, r, r_1, ni, M, x); + mont_modexp(xx, x, e, M, q, ni, r, r_1); + mont_finish(x1, xx, q, ni, r, r_1); + + + // MARK: we can also reduce and really speed this up as well -> binary method? + mpz_powm(x2, b, e, powj); + + mpz_t y, q_1; + mpz_init(y); + mpz_init(q_1); + + mpz_sub(y, x2, x1); + + mpz_invert(q_1, q, powj); + + mpz_mul(y, y, q_1); + mpz_mod(y, y, powj); + + mpz_addmul(x1, q, y); + + mpz_set(res, x1); + + + */ + + printf((__constant char *)"An even modulus is not allowed here."); + + } else { + + // MARK: prepare might not have to run individually on each kernel (prepare might even run on CPU) + mont_prepare(b, e, m, r, r_1, ni, M, x); + + + mont_modexp(xx, x, e, M, m, ni, r, r_1); + mont_finish(res, xx, m, ni, r, r_1); + + } + + if (mpz_cmp(sig,res) != 0) { + + *valid += 1; + + } + + + + } diff --git a/source/rsa-test.c b/source/rsa-test.c @@ -350,19 +350,14 @@ create_kernel (cl_program program, - - - - - - -void pairs_from_files(void *bases, unsigned long *b_len, - void *exponents, unsigned long *e_len, - void *moduli, unsigned long *m_len, - void *signatures, unsigned long *s_len, - unsigned long *pks, - unsigned long *n - ) { +void +pairs_from_files(void *bases, unsigned long *b_len, + void *exponents, unsigned long *e_len, + void *moduli, unsigned long *m_len, + void *signatures, unsigned long *s_len, + unsigned long *pks, + unsigned long *n + ) { FILE *pkfile; FILE *msfile; @@ -454,9 +449,9 @@ void pairs_from_files(void *bases, unsigned long *b_len, fclose(msfile); *n = j; - } + unsigned long number_of_pairs(void) { struct stat ss; @@ -670,7 +665,7 @@ int verify_pairs_with_opencl(void *bases, unsigned long *b_len, clReleaseContext(context); return 0; - + } int rsa_tests(void) { @@ -713,7 +708,8 @@ int rsa_tests(void) { t, x, pks, &pairs); // this returns the actual amount of pairs unsigned long result = 0; - + + pairs = 1; // FIXME: forced to 1 to not delay forever! printf("VERIFYING %lu SIGNATURES...\n", pairs); verify_pairs_with_opencl(q, u,