libgpuverify

Signature verification on GPUs (WiP)
Log | Files | Refs | README | LICENSE

commit e496a5a3755148a391d018e3174401a8f894ea9b
parent 421a712d4743971c39d81c71edd9beadb56f3776
Author: Cedric <cedric.zwahlen@students.bfh.ch>
Date:   Sat, 25 Nov 2023 20:09:53 +0100

It does work

But I have made a mess of variable names, going to have to fix that

Diffstat:
M.DS_Store | 0
Msource/gmp.c | 17-----------------
Msource/gmp.h | 19+++++++++++++++++++
Asource/gmp_GPU.c | 2923+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asource/gmp_GPU.h | 27+++++++++++++++++++++++++++
Msource/rsa-test.c | 15++++++++++-----
Mxcode/.DS_Store | 0
Mxcode/lib-gpu-verify.xcodeproj/project.pbxproj | 8++++++--
Mxcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcuserdata/cedriczwahlen.xcuserdatad/UserInterfaceState.xcuserstate | 0
Mxcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist | 278++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------
Mxcode/montgomery.cl | 196+++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
11 files changed, 3280 insertions(+), 203 deletions(-)

diff --git a/.DS_Store b/.DS_Store Binary files differ. diff --git a/source/gmp.c b/source/gmp.c @@ -68,24 +68,7 @@ see https://www.gnu.org/licenses/. */ /* Macros */ -#define GMP_LIMB_BITS (sizeof(mp_limb_t) * CHAR_BIT) -#define GMP_LIMB_MAX ((mp_limb_t) ~ (mp_limb_t) 0) -#define GMP_LIMB_HIGHBIT ((mp_limb_t) 1 << (GMP_LIMB_BITS - 1)) - -#define GMP_HLIMB_BIT ((mp_limb_t) 1 << (GMP_LIMB_BITS / 2)) -#define GMP_LLIMB_MASK (GMP_HLIMB_BIT - 1) - -#define GMP_ULONG_BITS (sizeof(unsigned long) * CHAR_BIT) -#define GMP_ULONG_HIGHBIT ((unsigned long) 1 << (GMP_ULONG_BITS - 1)) - -#define GMP_ABS(x) ((x) >= 0 ? (x) : -(x)) -#define GMP_NEG_CAST(T,x) (-((T)((x) + 1) - 1)) - -#define GMP_MIN(a, b) ((a) < (b) ? (a) : (b)) -#define GMP_MAX(a, b) ((a) > (b) ? (a) : (b)) - -#define GMP_CMP(a,b) (((a) > (b)) - ((a) < (b))) #if defined(DBL_MANT_DIG) && FLT_RADIX == 2 #define GMP_DBL_MANT_BITS DBL_MANT_DIG diff --git a/source/gmp.h b/source/gmp.h @@ -315,5 +315,24 @@ size_t mpz_out_str (FILE *, int, const mpz_t); void mpz_import (mpz_t, size_t, int, size_t, int, size_t, const void *); void *mpz_export (void *, size_t *, int, size_t, int, size_t, const mpz_t); +#define GMP_LIMB_BITS (sizeof(mp_limb_t) * CHAR_BIT) + +#define GMP_LIMB_MAX ((mp_limb_t) ~ (mp_limb_t) 0) +#define GMP_LIMB_HIGHBIT ((mp_limb_t) 1 << (GMP_LIMB_BITS - 1)) + +#define GMP_HLIMB_BIT ((mp_limb_t) 1 << (GMP_LIMB_BITS / 2)) +#define GMP_LLIMB_MASK (GMP_HLIMB_BIT - 1) + +#define GMP_ULONG_BITS (sizeof(unsigned long) * CHAR_BIT) +#define GMP_ULONG_HIGHBIT ((unsigned long) 1 << (GMP_ULONG_BITS - 1)) + +#define GMP_ABS(x) ((x) >= 0 ? (x) : -(x)) +#define GMP_NEG_CAST(T,x) (-((T)((x) + 1) - 1)) + +#define GMP_MIN(a, b) ((a) < (b) ? (a) : (b)) +#define GMP_MAX(a, b) ((a) > (b) ? (a) : (b)) + +#define GMP_CMP(a,b) (((a) > (b)) - ((a) < (b))) + #endif /* gmp_h */ diff --git a/source/gmp_GPU.c b/source/gmp_GPU.c @@ -0,0 +1,2923 @@ +// +// gmp_GPU.c +// lib-gpu-verify +// +// Created by Cedric Zwahlen on 25.11.2023. +// + +#include "gmp_GPU.h" + +#define MINI_GMP_LIMB_TYPE long + +#define GMP_LIMB_BITS (sizeof(mp_limb_t) * CHAR_BIT) + +#define GMP_LIMB_MAX ((mp_limb_t) ~ (mp_limb_t) 0) +#define GMP_LIMB_HIGHBIT ((mp_limb_t) 1 << (GMP_LIMB_BITS - 1)) + +#define GMP_HLIMB_BIT ((mp_limb_t) 1 << (GMP_LIMB_BITS / 2)) +#define GMP_LLIMB_MASK (GMP_HLIMB_BIT - 1) + +#define GMP_ULONG_BITS (sizeof(unsigned long) * CHAR_BIT) +#define GMP_ULONG_HIGHBIT ((unsigned long) 1 << (GMP_ULONG_BITS - 1)) + +#define GMP_ABS(x) ((x) >= 0 ? (x) : -(x)) +#define GMP_NEG_CAST(T,x) (-((T)((x) + 1) - 1)) + +#define GMP_MIN(a, b) ((a) < (b) ? (a) : (b)) +#define GMP_MAX(a, b) ((a) > (b) ? (a) : (b)) + +#define GMP_CMP(a,b) (((a) > (b)) - ((a) < (b))) + +#define GMP_MPN_OVERLAP_P(xp, xsize, yp, ysize) \ + ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp)) + + +#define gmp_clz(count, x) do { \ + mp_limb_t __clz_x = (x); \ + unsigned __clz_c = 0; \ + int LOCAL_SHIFT_BITS = 8; \ + if (GMP_LIMB_BITS > LOCAL_SHIFT_BITS) \ + for (; \ + (__clz_x & ((mp_limb_t) 0xff << (GMP_LIMB_BITS - 8))) == 0; \ + __clz_c += 8) \ + { __clz_x <<= LOCAL_SHIFT_BITS; } \ + for (; (__clz_x & GMP_LIMB_HIGHBIT) == 0; __clz_c++) \ + __clz_x <<= 1; \ + (count) = __clz_c; \ + } while (0) + +#define gmp_umullo_limb(u, v) \ + ((sizeof(mp_limb_t) >= sizeof(int)) ? (u)*(v) : (unsigned int)(u) * (v)) + +#define gmp_umul_ppmm(w1, w0, u, v) \ + do { \ + int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS; \ + if (sizeof(unsigned int) * CHAR_BIT >= 2 * GMP_LIMB_BITS) \ + { \ + unsigned int __ww = (unsigned int) (u) * (v); \ + w0 = (mp_limb_t) __ww; \ + w1 = (mp_limb_t) (__ww >> LOCAL_GMP_LIMB_BITS); \ + } \ + else if (GMP_ULONG_BITS >= 2 * GMP_LIMB_BITS) \ + { \ + unsigned long int __ww = (unsigned long int) (u) * (v); \ + w0 = (mp_limb_t) __ww; \ + w1 = (mp_limb_t) (__ww >> LOCAL_GMP_LIMB_BITS); \ + } \ + else { \ + mp_limb_t __x0, __x1, __x2, __x3; \ + unsigned __ul, __vl, __uh, __vh; \ + mp_limb_t __u = (u), __v = (v); \ + assert (sizeof (unsigned) * 2 >= sizeof (mp_limb_t)); \ + \ + __ul = __u & GMP_LLIMB_MASK; \ + __uh = __u >> (GMP_LIMB_BITS / 2); \ + __vl = __v & GMP_LLIMB_MASK; \ + __vh = __v >> (GMP_LIMB_BITS / 2); \ + \ + __x0 = (mp_limb_t) __ul * __vl; \ + __x1 = (mp_limb_t) __ul * __vh; \ + __x2 = (mp_limb_t) __uh * __vl; \ + __x3 = (mp_limb_t) __uh * __vh; \ + \ + __x1 += __x0 >> (GMP_LIMB_BITS / 2);/* this can't give carry */ \ + __x1 += __x2; /* but this indeed can */ \ + if (__x1 < __x2) /* did we get it? */ \ + __x3 += GMP_HLIMB_BIT; /* yes, add it in the proper pos. */ \ + \ + (w1) = __x3 + (__x1 >> (GMP_LIMB_BITS / 2)); \ + (w0) = (__x1 << (GMP_LIMB_BITS / 2)) + (__x0 & GMP_LLIMB_MASK); \ + } \ + } while (0) + +#define gmp_assert_nocarry(x) do { \ + mp_limb_t __cy = (x); \ + assert (__cy == 0); \ + (void) (__cy); \ + } while (0) + +#define gmp_add_ssaaaa(sh, sl, ah, al, bh, bl) \ + do { \ + mp_limb_t __x; \ + __x = (al) + (bl); \ + (sh) = (ah) + (bh) + (__x < (al)); \ + (sl) = __x; \ + } while (0) + +#define gmp_sub_ddmmss(sh, sl, ah, al, bh, bl) \ + do { \ + mp_limb_t __x; \ + __x = (al) - (bl); \ + (sh) = (ah) - (bh) - ((al) < (bl)); \ + (sl) = __x; \ + } while (0) + + +#define gmp_udiv_qrnnd_preinv(q, r, nh, nl, d, di) \ + do { \ + mp_limb_t _qh, _ql, _r, _mask; \ + gmp_umul_ppmm (_qh, _ql, (nh), (di)); \ + gmp_add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \ + _r = (nl) - gmp_umullo_limb (_qh, (d)); \ + _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \ + _qh += _mask; \ + _r += _mask & (d); \ + if (_r >= (d)) \ + { \ + _r -= (d); \ + _qh++; \ + } \ + \ + (r) = _r; \ + (q) = _qh; \ + } while (0) + +#define gmp_udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv) \ + do { \ + mp_limb_t _q0, _t1, _t0, _mask; \ + gmp_umul_ppmm ((q), _q0, (n2), (dinv)); \ + gmp_add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1)); \ + \ + /* Compute the two most significant limbs of n - q'd */ \ + (r1) = (n1) - gmp_umullo_limb ((d1), (q)); \ + gmp_sub_ddmmss ((r1), (r0), (r1), (n0), (d1), (d0)); \ + gmp_umul_ppmm (_t1, _t0, (d0), (q)); \ + gmp_sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0); \ + (q)++; \ + \ + /* Conditionally adjust q and the remainders */ \ + _mask = - (mp_limb_t) ((r1) >= _q0); \ + (q) += _mask; \ + gmp_add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0)); \ + if ((r1) >= (d1)) \ + { \ + if ((r1) > (d1) || (r0) >= (d0)) \ + { \ + (q)++; \ + gmp_sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0)); \ + } \ + } \ + } while (0) + +#define gmp_ctz(count, x) do { \ + mp_limb_t __ctz_x = (x); \ + unsigned __ctz_c = 0; \ + gmp_clz (__ctz_c, __ctz_x & - __ctz_x); \ + (count) = GMP_LIMB_BITS - 1 - __ctz_c; \ + } while (0) + + +#define MPZ_SRCPTR_SWAP(x, y) \ + do { \ + mpz_srcptr __mpz_srcptr_swap__tmp = (x); \ + (x) = (y); \ + (y) = __mpz_srcptr_swap__tmp; \ + } while (0) + +#define MP_SIZE_T_SWAP(x, y) \ + do { \ + mp_size_t __mp_size_t_swap__tmp = (x); \ + (x) = (y); \ + (y) = __mp_size_t_swap__tmp; \ + } while (0) + +#define MPZ_PTR_SWAP(x, y) \ + do { \ + mpz_ptr __mpz_ptr_swap__tmp = (x); \ + (x) = (y); \ + (y) = __mpz_ptr_swap__tmp; \ + } while (0) + +#define MP_BITCNT_T_SWAP(x,y) \ + do { \ + mp_bitcnt_t __mp_bitcnt_t_swap__tmp = (x); \ + (x) = (y); \ + (y) = __mp_bitcnt_t_swap__tmp; \ + } while (0) + + +#define assert(x){if((x)==0){printf("assert reached\n");}} + +typedef unsigned MINI_GMP_LIMB_TYPE mp_limb_t; +typedef long mp_size_t; +typedef unsigned long mp_bitcnt_t; + +typedef mp_limb_t *mp_ptr; +typedef const mp_limb_t *mp_srcptr; + +typedef struct +{ + int _mp_alloc; /* Number of *limbs* allocated and pointed + to by the _mp_d field. */ + int _mp_size; /* abs(_mp_size) is the number of limbs the + last field points to. If _mp_size is + negative this is a negative number. */ + //mp_limb_t *_mp_d; /* Pointer to the limbs. */ + + mp_limb_t _mp_d[256]; + +} __mpz_struct; + +typedef __mpz_struct mpz_t[1]; + +typedef __mpz_struct *mpz_ptr; + +typedef const __mpz_struct *mpz_srcptr; + +struct gmp_div_inverse +{ + /* Normalization shift count. */ + unsigned shift; + /* Normalized divisor (d0 unused for mpn_div_qr_1) */ + mp_limb_t d1, d0; + /* Inverse, for 2/1 or 3/2. */ + mp_limb_t di; +}; + + +struct mpn_base_info +{ + /* bb is the largest power of the base which fits in one limb, and + exp is the corresponding exponent. */ + unsigned exp; + mp_limb_t bb; +}; + + +enum mpz_div_round_mode { GMP_DIV_FLOOR, GMP_DIV_CEIL, GMP_DIV_TRUNC }; + +void mpz_init (mpz_t r); +void mpn_copyi (mp_ptr d, mp_srcptr s, mp_size_t n); +void mpz_set (mpz_t r, const mpz_t x); +void +mpz_set (mpz_t r, const mpz_t x); +void +mpz_set_ui (mpz_t r, unsigned long int x); +void +mpz_set_si (mpz_t r, signed long int x); +void +mpz_init_set_si (mpz_t r, signed long int x); +void +mpz_init_set (mpz_t r, const mpz_t x); +void +mpz_init2 (mpz_t r, mp_bitcnt_t bits); +void +mpz_init_set_ui (mpz_t r, unsigned long int x); +void +mpz_clear (mpz_t r); +void +gmp_die (const char *msg); + + +mp_size_t mpn_normalized_size (mp_srcptr xp, mp_size_t n); +void +mpz_add_ui (mpz_t r, const mpz_t a, unsigned long b); +void +mpz_ui_sub (mpz_t r, unsigned long a, const mpz_t b); +void +mpz_sub_ui (mpz_t r, const mpz_t a, unsigned long b); +int +mpn_absfits_ulong_p (mp_srcptr up, mp_size_t un); +unsigned long int +mpz_get_ui (const mpz_t u); +int +mpz_cmpabs_ui (const mpz_t u, unsigned long v); +mp_limb_t +mpn_sub_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b); +mp_limb_t +mpn_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n); +mp_limb_t +mpn_sub (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn); +mp_limb_t +mpn_invert_3by2 (mp_limb_t u1, mp_limb_t u0); +int +mpz_div_qr (mpz_t q, mpz_t r, + const mpz_t n, const mpz_t d, enum mpz_div_round_mode mode); +void +mpz_mod (mpz_t r, const mpz_t n, const mpz_t d); +void +mpn_div_qr_1_invert (struct gmp_div_inverse *inv, mp_limb_t d); + +void +mpn_div_qr_2_invert (struct gmp_div_inverse *inv, + mp_limb_t d1, mp_limb_t d0); + +void +mpn_div_qr_invert (struct gmp_div_inverse *inv, + mp_srcptr dp, mp_size_t dn); +int +mpz_cmp_ui (const mpz_t u, unsigned long v); +int +mpn_cmp (mp_srcptr ap, mp_srcptr bp, mp_size_t n); +mp_limb_t +mpn_lshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt); +mp_limb_t +mpn_rshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt); +int +mpz_invert (mpz_t r, const mpz_t u, const mpz_t m); +mp_limb_t +mpn_div_qr_1_preinv (mp_ptr qp, mp_srcptr np, mp_size_t nn, + const struct gmp_div_inverse *inv); +mp_limb_t +mpn_add_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n); +void +mpn_div_qr_2_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn, + const struct gmp_div_inverse *inv); +mp_limb_t +mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl); +void +mpn_div_qr_pi1 (mp_ptr qp, + mp_ptr np, mp_size_t nn, mp_limb_t n1, + mp_srcptr dp, mp_size_t dn, + mp_limb_t dinv); +void +mpn_div_qr_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn, + mp_srcptr dp, mp_size_t dn, + const struct gmp_div_inverse *inv); +void +mpz_powm (mpz_t r, const mpz_t b, const mpz_t e, const mpz_t m); +int +mpn_cmp4 (mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn); +mp_size_t +mpz_abs_sub (mpz_t r, const mpz_t a, const mpz_t b); +mp_limb_t +mpn_add_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b); +mp_limb_t +mpn_add (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn); +mp_size_t +mpz_abs_add (mpz_t r, const mpz_t a, const mpz_t b); +void +mpz_sub (mpz_t r, const mpz_t a, const mpz_t b); +mp_limb_t +mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl); +mp_limb_t +mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl); +mp_limb_t +mpn_mul (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn); +void +mpz_mul (mpz_t r, const mpz_t u, const mpz_t v); +void +mpn_copyd (mp_ptr d, mp_srcptr s, mp_size_t n); +void +mpn_zero (mp_ptr rp, mp_size_t n); +void +mpz_mul_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bits); +int +mpn_zero_p(mp_srcptr rp, mp_size_t n); +void +mpz_div_q_2exp (mpz_t q, const mpz_t u, mp_bitcnt_t bit_index, + enum mpz_div_round_mode mode); +void +mpz_tdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt); +int +mpz_cmp (const mpz_t a, const mpz_t b); +void +mpz_add (mpz_t r, const mpz_t a, const mpz_t b); +int +mpz_tstbit (const mpz_t d, mp_bitcnt_t bit_index); +mp_bitcnt_t +mpn_limb_size_in_base_2 (mp_limb_t u); +size_t +mpz_sizeinbase (const mpz_t u, int base); +int +mpz_sgn (const mpz_t u); +mp_bitcnt_t +mpn_common_scan (mp_limb_t limb, mp_size_t i, mp_srcptr up, mp_size_t un, + mp_limb_t ux); +mp_bitcnt_t +mpn_scan1 (mp_srcptr ptr, mp_bitcnt_t bit); +mp_bitcnt_t +mpz_scan1 (mpz_t u, mp_bitcnt_t starting_bit); +mp_bitcnt_t +mpz_make_odd (mpz_t r); +void +mpz_tdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d); +void +mpz_abs_add_bit (mpz_t d, mp_bitcnt_t bit_index); +void +mpz_abs_sub_bit (mpz_t d, mp_bitcnt_t bit_index); +void +mpz_setbit (mpz_t d, mp_bitcnt_t bit_index); +void +mpz_divexact (mpz_t q, const mpz_t n, const mpz_t d); +int +mpz_cmpabs (const mpz_t u, const mpz_t v); +void +mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v); +void +mpz_addmul_ui (mpz_t r, const mpz_t u, unsigned long int v); + +unsigned +mpn_base_power_of_two_p (unsigned b); +void +mpn_get_base_info (struct mpn_base_info *info, mp_limb_t b); +int isspace_gpu(unsigned char c); +int strlen_c(const char *c); +mp_size_t mpn_set_str_bits (mp_ptr rp, const unsigned char *sp, size_t sn, + unsigned bits); +mp_size_t +mpn_set_str_other (mp_ptr rp, const unsigned char *sp, size_t sn, + mp_limb_t b, const struct mpn_base_info *info); +int +mpz_set_str (mpz_t r, const char *sp, int base); +int +mpz_init_set_str (mpz_t r, const char *sp, int base); + +//void mpz_sub (mpz_t r, const mpz_t a, const mpz_t b); +////void mpz_add (mpz_t, const mpz_t, const mpz_t); + +void mpz_abs (mpz_t, const mpz_t); + +void mpz_neg (mpz_t, const mpz_t); +void mpz_swap (mpz_t, mpz_t); +//void mpz_mod (mpz_t, const mpz_t, const mpz_t); +// +////int mpz_sgn (const mpz_t); +// +////void mpz_mul (mpz_t, const mpz_t, const mpz_t); +//void mpz_mul_2exp (mpz_t, const mpz_t, mp_bitcnt_t); +// +//void mpz_gcdext (mpz_t, mpz_t, mpz_t, const mpz_t, const mpz_t); +////void mpz_powm (mpz_t, const mpz_t, const mpz_t, const mpz_t); +// +void mpz_addmul (mpz_t, const mpz_t, const mpz_t); +// +//int mpz_tstbit (const mpz_t, mp_bitcnt_t); +// +//int mpz_cmp_ui (const mpz_t u, unsigned long v); +// +void mpn_div_qr (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn); +// +//mp_limb_t mpn_invert_3by2 (mp_limb_t, mp_limb_t); + +#define mpn_invert_limb(x) mpn_invert_3by2 ((x), 0) + +#define MPZ_REALLOC(z,n) (z)->_mp_d + +void +mpz_init (mpz_t r) +{ + const mp_limb_t dummy_limb = GMP_LIMB_MAX & 0xc1a0; + + r->_mp_alloc = 0; + r->_mp_size = 0; + + // memset(r->_mp_d, 0, 256); + + // r->_mp_d = (mp_ptr) &dummy_limb; +} + +void +mpn_copyi (mp_ptr d, mp_srcptr s, mp_size_t n) +{ + mp_size_t i; + for (i = 0; i < n; i++) + d[i] = s[i]; +} + +void +mpz_set (mpz_t r, const mpz_t x) +{ + /* Allow the NOP r == x */ + if (r != x) + { + mp_size_t n; + mp_ptr rp; + + n = GMP_ABS (x->_mp_size); + rp = MPZ_REALLOC (r, n); + + mpn_copyi (rp, x->_mp_d, n); + r->_mp_size = x->_mp_size; + } +} + + +void +mpz_set_ui (mpz_t r, unsigned long int x) +{ + if (x > 0) + { + r->_mp_size = 1; + MPZ_REALLOC (r, 1)[0] = x; + if (GMP_LIMB_BITS < GMP_ULONG_BITS) + { + int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS; + while (x >>= LOCAL_GMP_LIMB_BITS) + { + ++ r->_mp_size; + MPZ_REALLOC (r, r->_mp_size)[r->_mp_size - 1] = x; + } + } + } + else + r->_mp_size = 0; +} + + +void +mpz_neg (mpz_t r, const mpz_t u) +{ + mpz_set (r, u); + r->_mp_size = -r->_mp_size; +} + + +void +mpz_set_si (mpz_t r, signed long int x) +{ + if (x >= 0) + mpz_set_ui (r, x); + else /* (x < 0) */ + if (GMP_LIMB_BITS < GMP_ULONG_BITS) + { + mpz_set_ui (r, GMP_NEG_CAST (unsigned long int, x)); + mpz_neg (r, r); + } + else + { + r->_mp_size = -1; + MPZ_REALLOC (r, 1)[0] = GMP_NEG_CAST (unsigned long int, x); + } +} + +void +mpz_init_set_si (mpz_t r, signed long int x) +{ + mpz_init (r); + mpz_set_si (r, x); +} + + +void +mpz_init_set (mpz_t r, const mpz_t x) +{ + mpz_init (r); + mpz_set (r, x); +} + +void +mpz_init2 (mpz_t r, mp_bitcnt_t bits) +{ + mp_size_t rn; + + bits -= (bits != 0); /* Round down, except if 0 */ + rn = 1 + bits / GMP_LIMB_BITS; + + r->_mp_alloc = rn; + r->_mp_size = 0; + // r->_mp_d = gmp_alloc_limbs (rn); +} + +void +mpz_init_set_ui (mpz_t r, unsigned long int x) +{ + mpz_init (r); + mpz_set_ui (r, x); +} + +void +mpz_clear (mpz_t r) +{ + //if (r->_mp_alloc) + //gmp_free_limbs (r->_mp_d, r->_mp_alloc); +} + + +void +gmp_die (const char *msg) +{ + //fprintf (stderr, "%s\n", msg); + //abort(); +} + +mp_size_t mpn_normalized_size (mp_srcptr xp, mp_size_t n) +{ + while (n > 0 && xp[n-1] == 0) + --n; + return n; +} + +void +mpz_add_ui (mpz_t r, const mpz_t a, unsigned long b) +{ + mpz_t bb; + mpz_init_set_ui (bb, b); + mpz_add (r, a, bb); + mpz_clear (bb); +} + +void +mpz_ui_sub (mpz_t r, unsigned long a, const mpz_t b) +{ + mpz_neg (r, b); + mpz_add_ui (r, r, a); +} + + +void +mpz_sub_ui (mpz_t r, const mpz_t a, unsigned long b) +{ + mpz_ui_sub (r, b, a); + mpz_neg (r, r); +} + +int +mpn_absfits_ulong_p (mp_srcptr up, mp_size_t un) +{ + int ulongsize = GMP_ULONG_BITS / GMP_LIMB_BITS; + mp_limb_t ulongrem = 0; + + if (GMP_ULONG_BITS % GMP_LIMB_BITS != 0) + ulongrem = (mp_limb_t) (ULONG_MAX >> GMP_LIMB_BITS * ulongsize) + 1; + + return un <= ulongsize || (up[ulongsize] < ulongrem && un == ulongsize + 1); +} + +unsigned long int +mpz_get_ui (const mpz_t u) +{ + if (GMP_LIMB_BITS < GMP_ULONG_BITS) + { + int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS; + unsigned long r = 0; + mp_size_t n = GMP_ABS (u->_mp_size); + n = GMP_MIN (n, 1 + (mp_size_t) (GMP_ULONG_BITS - 1) / GMP_LIMB_BITS); + while (--n >= 0) + r = (r << LOCAL_GMP_LIMB_BITS) + u->_mp_d[n]; + return r; + } + + return u->_mp_size == 0 ? 0 : u->_mp_d[0]; +} + +int +mpz_cmpabs_ui (const mpz_t u, unsigned long v) +{ + mp_size_t un = GMP_ABS (u->_mp_size); + + if (! mpn_absfits_ulong_p (u->_mp_d, un)) + return 1; + else + { + unsigned long uu = mpz_get_ui (u); + return GMP_CMP(uu, v); + } +} + +mp_limb_t +mpn_sub_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b) +{ + mp_size_t i; + + assert (n > 0); + + i = 0; + do + { + mp_limb_t a = ap[i]; + /* Carry out */ + mp_limb_t cy = a < b; + rp[i] = a - b; + b = cy; + } + while (++i < n); + + return b; +} + +mp_limb_t +mpn_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n) +{ + mp_size_t i; + mp_limb_t cy; + + for (i = 0, cy = 0; i < n; i++) + { + mp_limb_t a, b; + a = ap[i]; b = bp[i]; + b += cy; + cy = (b < cy); + cy += (a < b); + rp[i] = a - b; + } + return cy; +} + +mp_limb_t +mpn_sub (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn) +{ + mp_limb_t cy; + + assert (an >= bn); + + cy = mpn_sub_n (rp, ap, bp, bn); + if (an > bn) + cy = mpn_sub_1 (rp + bn, ap + bn, an - bn, cy); + return cy; +} + + +mp_limb_t +mpn_invert_3by2 (mp_limb_t u1, mp_limb_t u0) +{ + mp_limb_t r, m; + + { + mp_limb_t p, ql; + unsigned ul, uh, qh; + + assert (sizeof (unsigned) * 2 >= sizeof (mp_limb_t)); + /* For notation, let b denote the half-limb base, so that B = b^2. + Split u1 = b uh + ul. */ + ul = u1 & GMP_LLIMB_MASK; + uh = u1 >> (GMP_LIMB_BITS / 2); + + /* Approximation of the high half of quotient. Differs from the 2/1 + inverse of the half limb uh, since we have already subtracted + u0. */ + qh = (u1 ^ GMP_LIMB_MAX) / uh; + + /* Adjust to get a half-limb 3/2 inverse, i.e., we want + + qh' = floor( (b^3 - 1) / u) - b = floor ((b^3 - b u - 1) / u + = floor( (b (~u) + b-1) / u), + + and the remainder + + r = b (~u) + b-1 - qh (b uh + ul) + = b (~u - qh uh) + b-1 - qh ul + + Subtraction of qh ul may underflow, which implies adjustments. + But by normalization, 2 u >= B > qh ul, so we need to adjust by + at most 2. + */ + + r = ((~u1 - (mp_limb_t) qh * uh) << (GMP_LIMB_BITS / 2)) | GMP_LLIMB_MASK; + + p = (mp_limb_t) qh * ul; + /* Adjustment steps taken from udiv_qrnnd_c */ + if (r < p) + { + qh--; + r += u1; + if (r >= u1) /* i.e. we didn't get carry when adding to r */ + if (r < p) + { + qh--; + r += u1; + } + } + r -= p; + + /* Low half of the quotient is + + ql = floor ( (b r + b-1) / u1). + + This is a 3/2 division (on half-limbs), for which qh is a + suitable inverse. */ + + p = (r >> (GMP_LIMB_BITS / 2)) * qh + r; + /* Unlike full-limb 3/2, we can add 1 without overflow. For this to + work, it is essential that ql is a full mp_limb_t. */ + ql = (p >> (GMP_LIMB_BITS / 2)) + 1; + + /* By the 3/2 trick, we don't need the high half limb. */ + r = (r << (GMP_LIMB_BITS / 2)) + GMP_LLIMB_MASK - ql * u1; + + if (r >= (GMP_LIMB_MAX & (p << (GMP_LIMB_BITS / 2)))) + { + ql--; + r += u1; + } + m = ((mp_limb_t) qh << (GMP_LIMB_BITS / 2)) + ql; + if (r >= u1) + { + m++; + r -= u1; + } + } + + /* Now m is the 2/1 inverse of u1. If u0 > 0, adjust it to become a + 3/2 inverse. */ + if (u0 > 0) + { + mp_limb_t th, tl; + r = ~r; + r += u0; + if (r < u0) + { + m--; + if (r >= u1) + { + m--; + r -= u1; + } + r -= u1; + } + gmp_umul_ppmm (th, tl, u0, m); + r += th; + if (r < th) + { + m--; + m -= ((r > u1) | ((r == u1) & (tl > u0))); + } + } + + return m; +} + +int +mpz_div_qr (mpz_t q, mpz_t r, + const mpz_t n, const mpz_t d, enum mpz_div_round_mode mode) +{ + mp_size_t ns, ds, nn, dn, qs; + ns = n->_mp_size; + ds = d->_mp_size; + + if (ds == 0) {} + //gmp_die("mpz_div_qr: Divide by zero."); + + if (ns == 0) + { + if (q) + q->_mp_size = 0; + if (r) + r->_mp_size = 0; + return 0; + } + + nn = GMP_ABS (ns); + dn = GMP_ABS (ds); + + qs = ds ^ ns; + + if (nn < dn) + { + if (mode == GMP_DIV_CEIL && qs >= 0) + { + /* q = 1, r = n - d */ + if (r) + mpz_sub (r, n, d); + if (q) + mpz_set_ui (q, 1); + } + else if (mode == GMP_DIV_FLOOR && qs < 0) + { + /* q = -1, r = n + d */ + if (r) + mpz_add (r, n, d); + if (q) + mpz_set_si (q, -1); + } + else + { + /* q = 0, r = d */ + if (r) + mpz_set (r, n); + if (q) + q->_mp_size = 0; + } + return 1; + } + else + { + mp_ptr np, qp; + mp_size_t qn, rn; + mpz_t tq, tr; + + mpz_init_set (tr, n); + np = tr->_mp_d; + + qn = nn - dn + 1; + + if (q) + { + mpz_init2 (tq, qn * GMP_LIMB_BITS); + qp = tq->_mp_d; + } + else + qp = NULL; + + mpn_div_qr (qp, np, nn, d->_mp_d, dn); + + if (qp) + { + qn -= (qp[qn-1] == 0); + + tq->_mp_size = qs < 0 ? -qn : qn; + } + rn = mpn_normalized_size (np, dn); + tr->_mp_size = ns < 0 ? - rn : rn; + + if (mode == GMP_DIV_FLOOR && qs < 0 && rn != 0) + { + if (q) + mpz_sub_ui (tq, tq, 1); + if (r) + mpz_add (tr, tr, d); + } + else if (mode == GMP_DIV_CEIL && qs >= 0 && rn != 0) + { + if (q) + mpz_add_ui (tq, tq, 1); + if (r) + mpz_sub (tr, tr, d); + } + + if (q) + { + mpz_swap (tq, q); + mpz_clear (tq); + } + if (r) + mpz_swap (tr, r); + + mpz_clear (tr); + + return rn != 0; + } +} + +void +mpn_div_qr (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn) +{ + struct gmp_div_inverse inv; + // mp_ptr tp = NULL; + + mpz_t tp; + + assert (dn > 0); + assert (nn >= dn); + + mpn_div_qr_invert (&inv, dp, dn); + if (dn > 2 && inv.shift > 0) + { + //tp = gmp_alloc_limbs (dn); + gmp_assert_nocarry (mpn_lshift (tp->_mp_d, dp, dn, inv.shift)); + dp = tp->_mp_d; + } + mpn_div_qr_preinv (qp, np, nn, dp, dn, &inv); + if (tp) {} + //gmp_free_limbs (tp, dn); +} + +void +mpz_addmul (mpz_t r, const mpz_t u, const mpz_t v) +{ + mpz_t t; + mpz_init (t); + mpz_mul (t, u, v); + mpz_add (r, r, t); + mpz_clear (t); +} + +void +mpz_swap (mpz_t u, mpz_t v) +{ + //MP_SIZE_T_SWAP (u->_mp_alloc, v->_mp_alloc); + //MPN_PTR_SWAP (u->_mp_d, u->_mp_size, v->_mp_d, v->_mp_size); + + mpz_t temp; + mpz_init(temp); + + *temp = *u; + *u = *v; + *v = *temp; + +} + +void +mpz_mod (mpz_t r, const mpz_t n, const mpz_t d) +{ + mpz_div_qr (NULL, r, n, d, d->_mp_size >= 0 ? GMP_DIV_FLOOR : GMP_DIV_CEIL); +} + +void +mpn_div_qr_1_invert (struct gmp_div_inverse *inv, mp_limb_t d) +{ + unsigned shift; + + assert (d > 0); + gmp_clz (shift, d); + inv->shift = shift; + inv->d1 = d << shift; + inv->di = mpn_invert_limb (inv->d1); +} + +void +mpn_div_qr_2_invert (struct gmp_div_inverse *inv, + mp_limb_t d1, mp_limb_t d0) +{ + unsigned shift; + + assert (d1 > 0); + gmp_clz (shift, d1); + inv->shift = shift; + if (shift > 0) + { + d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift)); + d0 <<= shift; + } + inv->d1 = d1; + inv->d0 = d0; + inv->di = mpn_invert_3by2 (d1, d0); +} + +void +mpn_div_qr_invert (struct gmp_div_inverse *inv, + mp_srcptr dp, mp_size_t dn) +{ + assert (dn > 0); + + if (dn == 1) + mpn_div_qr_1_invert (inv, dp[0]); + else if (dn == 2) + mpn_div_qr_2_invert (inv, dp[1], dp[0]); + else + { + unsigned shift; + mp_limb_t d1, d0; + + d1 = dp[dn-1]; + d0 = dp[dn-2]; + assert (d1 > 0); + gmp_clz (shift, d1); + inv->shift = shift; + if (shift > 0) + { + d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift)); + d0 = (d0 << shift) | (dp[dn-3] >> (GMP_LIMB_BITS - shift)); + } + inv->d1 = d1; + inv->d0 = d0; + inv->di = mpn_invert_3by2 (d1, d0); + } +} + + +int +mpz_cmp_ui (const mpz_t u, unsigned long v) +{ + mp_size_t usize = u->_mp_size; + + if (usize < 0) + return -1; + else + return mpz_cmpabs_ui (u, v); +} + +int +mpn_cmp (mp_srcptr ap, mp_srcptr bp, mp_size_t n) +{ + while (--n >= 0) + { + if (ap[n] != bp[n]) + return ap[n] > bp[n] ? 1 : -1; + } + return 0; +} + +mp_limb_t +mpn_lshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt) +{ + mp_limb_t high_limb, low_limb; + unsigned int tnc; + mp_limb_t retval; + + assert (n >= 1); + assert (cnt >= 1); + assert (cnt < GMP_LIMB_BITS); + + up += n; + rp += n; + + tnc = GMP_LIMB_BITS - cnt; + low_limb = *--up; + retval = low_limb >> tnc; + high_limb = (low_limb << cnt); + + while (--n != 0) + { + low_limb = *--up; + *--rp = high_limb | (low_limb >> tnc); + high_limb = (low_limb << cnt); + } + *--rp = high_limb; + + return retval; +} + +mp_limb_t +mpn_rshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt) +{ + mp_limb_t high_limb, low_limb; + unsigned int tnc; + mp_limb_t retval; + + assert (n >= 1); + assert (cnt >= 1); + assert (cnt < GMP_LIMB_BITS); + + tnc = GMP_LIMB_BITS - cnt; + high_limb = *up++; + retval = (high_limb << tnc); + low_limb = high_limb >> cnt; + + while (--n != 0) + { + high_limb = *up++; + *rp++ = low_limb | (high_limb << tnc); + low_limb = high_limb >> cnt; + } + *rp = low_limb; + + return retval; +} + +int +mpz_invert (mpz_t r, const mpz_t u, const mpz_t m) +{ + mpz_t g, tr; + int invertible; + + if (u->_mp_size == 0 || mpz_cmpabs_ui (m, 1) <= 0) + return 0; + + mpz_init (g); + mpz_init (tr); + + mpz_gcdext (g, tr, NULL, u, m); + invertible = (mpz_cmp_ui (g, 1) == 0); + + if (invertible) + { + if (tr->_mp_size < 0) + { + if (m->_mp_size >= 0) + mpz_add (tr, tr, m); + else + mpz_sub (tr, tr, m); + } + mpz_swap (r, tr); + } + + mpz_clear (g); + mpz_clear (tr); + return invertible; +} + +/* Not matching current public gmp interface, rather corresponding to + the sbpi1_div_* functions. */ +mp_limb_t +mpn_div_qr_1_preinv (mp_ptr qp, mp_srcptr np, mp_size_t nn, + const struct gmp_div_inverse *inv) +{ + mp_limb_t d, di; + mp_limb_t r; + mp_ptr tp = NULL; + mp_size_t tn = 0; + + if (inv->shift > 0) + { + /* Shift, reusing qp area if possible. In-place shift if qp == np. */ + tp = qp; + if (!tp) + { + tn = nn; + + // tp = gmp_alloc_limbs (tn); + } + r = mpn_lshift (tp, np, nn, inv->shift); + np = tp; + } + else + r = 0; + + d = inv->d1; + di = inv->di; + while (--nn >= 0) + { + mp_limb_t q; + + gmp_udiv_qrnnd_preinv (q, r, r, np[nn], d, di); + if (qp) + qp[nn] = q; + } + //if (tn) + //gmp_free_limbs (tp, tn); + + return r >> inv->shift; +} + +mp_limb_t +mpn_add_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n) +{ + mp_size_t i; + mp_limb_t cy; + + for (i = 0, cy = 0; i < n; i++) + { + mp_limb_t a, b, r; + a = ap[i]; b = bp[i]; + r = a + cy; + cy = (r < cy); + r += b; + cy += (r < b); + rp[i] = r; + } + return cy; +} + +void +mpn_div_qr_2_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn, + const struct gmp_div_inverse *inv) +{ + unsigned shift; + mp_size_t i; + mp_limb_t d1, d0, di, r1, r0; + + assert (nn >= 2); + shift = inv->shift; + d1 = inv->d1; + d0 = inv->d0; + di = inv->di; + + if (shift > 0) + r1 = mpn_lshift (np, np, nn, shift); + else + r1 = 0; + + r0 = np[nn - 1]; + + i = nn - 2; + do + { + mp_limb_t n0, q; + n0 = np[i]; + gmp_udiv_qr_3by2 (q, r1, r0, r1, r0, n0, d1, d0, di); + + if (qp) + qp[i] = q; + } + while (--i >= 0); + + if (shift > 0) + { + assert ((r0 & (GMP_LIMB_MAX >> (GMP_LIMB_BITS - shift))) == 0); + r0 = (r0 >> shift) | (r1 << (GMP_LIMB_BITS - shift)); + r1 >>= shift; + } + + np[1] = r1; + np[0] = r0; +} + +mp_limb_t +mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl) +{ + mp_limb_t ul, cl, hpl, lpl, rl; + + assert (n >= 1); + + cl = 0; + do + { + ul = *up++; + gmp_umul_ppmm (hpl, lpl, ul, vl); + + lpl += cl; + cl = (lpl < cl) + hpl; + + rl = *rp; + lpl = rl - lpl; + cl += lpl > rl; + *rp++ = lpl; + } + while (--n != 0); + + return cl; +} + +void +mpn_div_qr_pi1 (mp_ptr qp, + mp_ptr np, mp_size_t nn, mp_limb_t n1, + mp_srcptr dp, mp_size_t dn, + mp_limb_t dinv) +{ + mp_size_t i; + + mp_limb_t d1, d0; + mp_limb_t cy, cy1; + mp_limb_t q; + + assert (dn > 2); + assert (nn >= dn); + + d1 = dp[dn - 1]; + d0 = dp[dn - 2]; + + assert ((d1 & GMP_LIMB_HIGHBIT) != 0); + /* Iteration variable is the index of the q limb. + * + * We divide <n1, np[dn-1+i], np[dn-2+i], np[dn-3+i],..., np[i]> + * by <d1, d0, dp[dn-3], ..., dp[0] > + */ + + i = nn - dn; + do + { + mp_limb_t n0 = np[dn-1+i]; + + if (n1 == d1 && n0 == d0) + { + q = GMP_LIMB_MAX; + mpn_submul_1 (np+i, dp, dn, q); + n1 = np[dn-1+i]; /* update n1, last loop's value will now be invalid */ + } + else + { + gmp_udiv_qr_3by2 (q, n1, n0, n1, n0, np[dn-2+i], d1, d0, dinv); + + cy = mpn_submul_1 (np + i, dp, dn-2, q); + + cy1 = n0 < cy; + n0 = n0 - cy; + cy = n1 < cy1; + n1 = n1 - cy1; + np[dn-2+i] = n0; + + if (cy != 0) + { + n1 += d1 + mpn_add_n (np + i, np + i, dp, dn - 1); + q--; + } + } + + if (qp) + qp[i] = q; + } + while (--i >= 0); + + np[dn - 1] = n1; +} + +void +mpn_div_qr_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn, + mp_srcptr dp, mp_size_t dn, + const struct gmp_div_inverse *inv) +{ + assert (dn > 0); + assert (nn >= dn); + + if (dn == 1) + np[0] = mpn_div_qr_1_preinv (qp, np, nn, inv); + else if (dn == 2) + mpn_div_qr_2_preinv (qp, np, nn, inv); + else + { + mp_limb_t nh; + unsigned shift; + + assert (inv->d1 == dp[dn-1]); + assert (inv->d0 == dp[dn-2]); + assert ((inv->d1 & GMP_LIMB_HIGHBIT) != 0); + + shift = inv->shift; + if (shift > 0) + nh = mpn_lshift (np, np, nn, shift); + else + nh = 0; + + mpn_div_qr_pi1 (qp, np, nn, nh, dp, dn, inv->di); + + if (shift > 0) + gmp_assert_nocarry (mpn_rshift (np, np, dn, shift)); + } +} + +void +mpz_powm (mpz_t r, const mpz_t b, const mpz_t e, const mpz_t m) +{ + mpz_t tr; + mpz_t base; + mp_size_t en, mn; + mp_srcptr mp; + struct gmp_div_inverse minv; + unsigned shift; + //mp_ptr tp = NULL; + mpz_t tp; + + //mpz_init(tp); + + en = GMP_ABS (e->_mp_size); + mn = GMP_ABS (m->_mp_size); + if (mn == 0) {} + //gmp_die ("mpz_powm: Zero modulo."); + + if (en == 0) + { + mpz_set_ui (r, mpz_cmpabs_ui (m, 1)); + return; + } + + mp = m->_mp_d; + mpn_div_qr_invert (&minv, mp, mn); + shift = minv.shift; + + if (shift > 0) + { + /* To avoid shifts, we do all our reductions, except the final + one, using a *normalized* m. */ + minv.shift = 0; + + // tp = gmp_alloc_limbs (mn); + gmp_assert_nocarry (mpn_lshift (tp->_mp_d, mp, mn, shift)); + mp = tp->_mp_d; + } + + mpz_init (base); + + if (e->_mp_size < 0) + { + if (!mpz_invert (base, b, m)) {} + //gmp_die ("mpz_powm: Negative exponent and non-invertible base."); + } + else + { + mp_size_t bn; + mpz_abs (base, b); + + bn = base->_mp_size; + if (bn >= mn) + { + mpn_div_qr_preinv (NULL, base->_mp_d, base->_mp_size, mp, mn, &minv); + bn = mn; + } + + /* We have reduced the absolute value. Now take care of the + sign. Note that we get zero represented non-canonically as + m. */ + if (b->_mp_size < 0) + { + mp_ptr bp = MPZ_REALLOC (base, mn); + gmp_assert_nocarry (mpn_sub (bp, mp, mn, bp, bn)); + bn = mn; + } + base->_mp_size = mpn_normalized_size (base->_mp_d, bn); + } + mpz_init_set_ui (tr, 1); + + while (--en >= 0) + { + mp_limb_t w = e->_mp_d[en]; + mp_limb_t bit; + + bit = GMP_LIMB_HIGHBIT; + do + { + mpz_mul (tr, tr, tr); + if (w & bit) + mpz_mul (tr, tr, base); + if (tr->_mp_size > mn) + { + mpn_div_qr_preinv (NULL, tr->_mp_d, tr->_mp_size, mp, mn, &minv); + tr->_mp_size = mpn_normalized_size (tr->_mp_d, mn); + } + bit >>= 1; + } + while (bit > 0); + } + + /* Final reduction */ + if (tr->_mp_size >= mn) + { + minv.shift = shift; + mpn_div_qr_preinv (NULL, tr->_mp_d, tr->_mp_size, mp, mn, &minv); + tr->_mp_size = mpn_normalized_size (tr->_mp_d, mn); + } + //if (tp) + //gmp_free_limbs (tp, mn); + + mpz_swap (r, tr); + mpz_clear (tr); + mpz_clear (base); +} + +int +mpn_cmp4 (mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn) +{ + if (an != bn) + return an < bn ? -1 : 1; + else + return mpn_cmp (ap, bp, an); +} + + +mp_size_t +mpz_abs_sub (mpz_t r, const mpz_t a, const mpz_t b) +{ + mp_size_t an = GMP_ABS (a->_mp_size); + mp_size_t bn = GMP_ABS (b->_mp_size); + int cmp; + mp_ptr rp; + + cmp = mpn_cmp4 (a->_mp_d, an, b->_mp_d, bn); + if (cmp > 0) + { + rp = MPZ_REALLOC (r, an); + gmp_assert_nocarry (mpn_sub (rp, a->_mp_d, an, b->_mp_d, bn)); + return mpn_normalized_size (rp, an); + } + else if (cmp < 0) + { + rp = MPZ_REALLOC (r, bn); + gmp_assert_nocarry (mpn_sub (rp, b->_mp_d, bn, a->_mp_d, an)); + return -mpn_normalized_size (rp, bn); + } + else + return 0; +} + +mp_limb_t +mpn_add_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b) +{ + mp_size_t i; + + assert (n > 0); + i = 0; + do + { + mp_limb_t r = ap[i] + b; + /* Carry out */ + b = (r < b); + rp[i] = r; + } + while (++i < n); + + return b; +} + + +mp_limb_t +mpn_add (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn) +{ + mp_limb_t cy; + + assert (an >= bn); + + cy = mpn_add_n (rp, ap, bp, bn); + if (an > bn) + cy = mpn_add_1 (rp + bn, ap + bn, an - bn, cy); + return cy; +} + +mp_size_t +mpz_abs_add (mpz_t r, const mpz_t a, const mpz_t b) +{ + mp_size_t an = GMP_ABS (a->_mp_size); + mp_size_t bn = GMP_ABS (b->_mp_size); + mp_ptr rp; + mp_limb_t cy; + + if (an < bn) + { + MPZ_SRCPTR_SWAP (a, b); + MP_SIZE_T_SWAP (an, bn); + } + + rp = MPZ_REALLOC (r, an + 1); + cy = mpn_add (rp, a->_mp_d, an, b->_mp_d, bn); + + rp[an] = cy; + + return an + cy; +} + +void +mpz_sub (mpz_t r, const mpz_t a, const mpz_t b) +{ + mp_size_t rn; + + if ( (a->_mp_size ^ b->_mp_size) >= 0) + rn = mpz_abs_sub (r, a, b); + else + rn = mpz_abs_add (r, a, b); + + r->_mp_size = a->_mp_size >= 0 ? rn : - rn; +} + +mp_limb_t +mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl) +{ + mp_limb_t ul, cl, hpl, lpl, rl; + + assert (n >= 1); + + cl = 0; + do + { + ul = *up++; + gmp_umul_ppmm (hpl, lpl, ul, vl); + + lpl += cl; + cl = (lpl < cl) + hpl; + + rl = *rp; + lpl = rl + lpl; + cl += lpl < rl; + *rp++ = lpl; + } + while (--n != 0); + + return cl; +} + +mp_limb_t +mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl) +{ + mp_limb_t ul, cl, hpl, lpl; + + assert (n >= 1); + + cl = 0; + do + { + ul = *up++; + gmp_umul_ppmm (hpl, lpl, ul, vl); + + lpl += cl; + cl = (lpl < cl) + hpl; + + *rp++ = lpl; + } + while (--n != 0); + + return cl; +} + + +mp_limb_t +mpn_mul (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn) +{ + assert (un >= vn); + assert (vn >= 1); + assert (!GMP_MPN_OVERLAP_P(rp, un + vn, up, un)); + assert (!GMP_MPN_OVERLAP_P(rp, un + vn, vp, vn)); + + /* We first multiply by the low order limb. This result can be + stored, not added, to rp. We also avoid a loop for zeroing this + way. */ + + rp[un] = mpn_mul_1 (rp, up, un, vp[0]); + + /* Now accumulate the product of up[] and the next higher limb from + vp[]. */ + + while (--vn >= 1) + { + rp += 1, vp += 1; + rp[un] = mpn_addmul_1 (rp, up, un, vp[0]); + } + return rp[un]; +} + + +void +mpz_mul (mpz_t r, const mpz_t u, const mpz_t v) +{ + int sign; + mp_size_t un, vn, rn; + mpz_t t; + mp_ptr tp; + + un = u->_mp_size; + vn = v->_mp_size; + + if (un == 0 || vn == 0) + { + r->_mp_size = 0; + return; + } + + sign = (un ^ vn) < 0; + + un = GMP_ABS (un); + vn = GMP_ABS (vn); + + mpz_init2 (t, (un + vn) * GMP_LIMB_BITS); + + tp = t->_mp_d; + if (un >= vn) + mpn_mul (tp, u->_mp_d, un, v->_mp_d, vn); + else + mpn_mul (tp, v->_mp_d, vn, u->_mp_d, un); + + rn = un + vn; + rn -= tp[rn-1] == 0; + + t->_mp_size = sign ? - rn : rn; + mpz_swap (r, t); + mpz_clear (t); +} + +void +mpn_copyd (mp_ptr d, mp_srcptr s, mp_size_t n) +{ + while (--n >= 0) + d[n] = s[n]; +} + +void +mpn_zero (mp_ptr rp, mp_size_t n) +{ + while (--n >= 0) + rp[n] = 0; +} + + +void +mpz_mul_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bits) +{ + mp_size_t un, rn; + mp_size_t limbs; + unsigned shift; + mp_ptr rp; + + un = GMP_ABS (u->_mp_size); + if (un == 0) + { + r->_mp_size = 0; + return; + } + + limbs = bits / GMP_LIMB_BITS; + shift = bits % GMP_LIMB_BITS; + + rn = un + limbs + (shift > 0); + rp = MPZ_REALLOC (r, rn); + if (shift > 0) + { + mp_limb_t cy = mpn_lshift (rp + limbs, u->_mp_d, un, shift); + rp[rn-1] = cy; + rn -= (cy == 0); + } + else + mpn_copyd (rp + limbs, u->_mp_d, un); + + mpn_zero (rp, limbs); + + r->_mp_size = (u->_mp_size < 0) ? - rn : rn; +} + +int +mpn_zero_p(mp_srcptr rp, mp_size_t n) +{ + return mpn_normalized_size (rp, n) == 0; +} + + +void +mpz_div_q_2exp (mpz_t q, const mpz_t u, mp_bitcnt_t bit_index, + enum mpz_div_round_mode mode) +{ + mp_size_t un, qn; + mp_size_t limb_cnt; + mp_ptr qp; + int adjust; + + un = u->_mp_size; + if (un == 0) + { + q->_mp_size = 0; + return; + } + limb_cnt = bit_index / GMP_LIMB_BITS; + qn = GMP_ABS (un) - limb_cnt; + bit_index %= GMP_LIMB_BITS; + + if (mode == ((un > 0) ? GMP_DIV_CEIL : GMP_DIV_FLOOR)) /* un != 0 here. */ + /* Note: Below, the final indexing at limb_cnt is valid because at + that point we have qn > 0. */ + adjust = (qn <= 0 + || !mpn_zero_p (u->_mp_d, limb_cnt) + || (u->_mp_d[limb_cnt] + & (((mp_limb_t) 1 << bit_index) - 1))); + else + adjust = 0; + + if (qn <= 0) + qn = 0; + else + { + qp = MPZ_REALLOC (q, qn); + + if (bit_index != 0) + { + mpn_rshift (qp, u->_mp_d + limb_cnt, qn, bit_index); + qn -= qp[qn - 1] == 0; + } + else + { + mpn_copyi (qp, u->_mp_d + limb_cnt, qn); + } + } + + q->_mp_size = qn; + + if (adjust) + mpz_add_ui (q, q, 1); + if (un < 0) + mpz_neg (q, q); +} + +void +mpz_tdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt) +{ + mpz_div_q_2exp (r, u, cnt, GMP_DIV_TRUNC); +} + +int +mpz_cmp (const mpz_t a, const mpz_t b) +{ + mp_size_t asize = a->_mp_size; + mp_size_t bsize = b->_mp_size; + + if (asize != bsize) + return (asize < bsize) ? -1 : 1; + else if (asize >= 0) + return mpn_cmp (a->_mp_d, b->_mp_d, asize); + else + return mpn_cmp (b->_mp_d, a->_mp_d, -asize); +} + +void +mpz_add (mpz_t r, const mpz_t a, const mpz_t b) +{ + mp_size_t rn; + + if ( (a->_mp_size ^ b->_mp_size) >= 0) + rn = mpz_abs_add (r, a, b); + else + rn = mpz_abs_sub (r, a, b); + + r->_mp_size = a->_mp_size >= 0 ? rn : - rn; +} + + +int +mpz_tstbit (const mpz_t d, mp_bitcnt_t bit_index) +{ + mp_size_t limb_index; + unsigned shift; + mp_size_t ds; + mp_size_t dn; + mp_limb_t w; + int bit; + + ds = d->_mp_size; + dn = GMP_ABS (ds); + limb_index = bit_index / GMP_LIMB_BITS; + if (limb_index >= dn) + return ds < 0; + + shift = bit_index % GMP_LIMB_BITS; + w = d->_mp_d[limb_index]; + bit = (w >> shift) & 1; + + if (ds < 0) + { + /* d < 0. Check if any of the bits below is set: If so, our bit + must be complemented. */ + if (shift > 0 && (mp_limb_t) (w << (GMP_LIMB_BITS - shift)) > 0) + return bit ^ 1; + while (--limb_index >= 0) + if (d->_mp_d[limb_index] > 0) + return bit ^ 1; + } + return bit; +} + +mp_bitcnt_t +mpn_limb_size_in_base_2 (mp_limb_t u) +{ + unsigned shift; + + assert (u > 0); + gmp_clz (shift, u); + return GMP_LIMB_BITS - shift; +} + +size_t +mpz_sizeinbase (const mpz_t u, int base) +{ + mp_size_t un, tn; + mp_srcptr up; + //mp_ptr tp; + mpz_t tp; + + mp_bitcnt_t bits; + struct gmp_div_inverse bi; + size_t ndigits; + + mpz_init(tp); + + assert (base >= 2); + assert (base <= 62); + + un = GMP_ABS (u->_mp_size); + if (un == 0) + return 1; + + up = u->_mp_d; + + bits = (un - 1) * GMP_LIMB_BITS + mpn_limb_size_in_base_2 (up[un-1]); + switch (base) + { + case 2: + return bits; + case 4: + return (bits + 1) / 2; + case 8: + return (bits + 2) / 3; + case 16: + return (bits + 3) / 4; + case 32: + return (bits + 4) / 5; + /* FIXME: Do something more clever for the common case of base + 10. */ + } + + //tp = gmp_alloc_limbs (un); + + mpn_copyi (tp->_mp_d, up, un); + mpn_div_qr_1_invert (&bi, base); + + tn = un; + ndigits = 0; + do + { + ndigits++; + mpn_div_qr_1_preinv (tp->_mp_d, tp->_mp_d, tn, &bi); + tn -= (tp->_mp_d[tn-1] == 0); + } + while (tn > 0); + + //gmp_free_limbs (tp, un); + return ndigits; +} + +int +mpz_sgn (const mpz_t u) +{ + return GMP_CMP (u->_mp_size, 0); +} + +mp_bitcnt_t +mpn_common_scan (mp_limb_t limb, mp_size_t i, mp_srcptr up, mp_size_t un, + mp_limb_t ux) +{ + unsigned cnt; + + assert (ux == 0 || ux == GMP_LIMB_MAX); + assert (0 <= i && i <= un ); + + while (limb == 0) + { + i++; + if (i == un) + return (ux == 0 ? ~(mp_bitcnt_t) 0 : un * GMP_LIMB_BITS); + limb = ux ^ up[i]; + } + gmp_ctz (cnt, limb); + return (mp_bitcnt_t) i * GMP_LIMB_BITS + cnt; +} + +void +mpz_abs (mpz_t r, const mpz_t u) +{ + mpz_set (r, u); + r->_mp_size = GMP_ABS (r->_mp_size); +} + + +mp_bitcnt_t +mpn_scan1 (mp_srcptr ptr, mp_bitcnt_t bit) +{ + mp_size_t i; + i = bit / GMP_LIMB_BITS; + + return mpn_common_scan ( ptr[i] & (GMP_LIMB_MAX << (bit % GMP_LIMB_BITS)), + i, ptr, i, 0); +} + +mp_bitcnt_t +mpz_scan1 (mpz_t u, mp_bitcnt_t starting_bit) +{ + mp_ptr up; + mp_size_t us, un, i; + mp_limb_t limb, ux; + + us = u->_mp_size; + un = GMP_ABS (us); + i = starting_bit / GMP_LIMB_BITS; + + /* Past the end there's no 1 bits for u>=0, or an immediate 1 bit + for u<0. Notice this test picks up any u==0 too. */ + if (i >= un) + return (us >= 0 ? ~(mp_bitcnt_t) 0 : starting_bit); + + up = u->_mp_d; + ux = 0; + limb = up[i]; + + if (starting_bit != 0) + { + if (us < 0) + { + ux = mpn_zero_p (up, i); + limb = ~ limb + ux; + ux = - (mp_limb_t) (limb >= ux); + } + + /* Mask to 0 all bits before starting_bit, thus ignoring them. */ + limb &= GMP_LIMB_MAX << (starting_bit % GMP_LIMB_BITS); + } + + return mpn_common_scan (limb, i, up, un, ux); +} + + +mp_bitcnt_t +mpz_make_odd (mpz_t r) +{ + mp_bitcnt_t shift; + + assert (r->_mp_size > 0); + /* Count trailing zeros, equivalent to mpn_scan1, because we know that there is a 1 */ + shift = mpn_scan1 (r->_mp_d, 0); + mpz_tdiv_q_2exp (r, r, shift); + + return shift; +} + +void +mpz_tdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d) +{ + mpz_div_qr (q, r, n, d, GMP_DIV_TRUNC); +} + +void +mpz_abs_add_bit (mpz_t d, mp_bitcnt_t bit_index) +{ + mp_size_t dn, limb_index; + mp_limb_t bit; + mp_ptr dp; + + dn = GMP_ABS (d->_mp_size); + + limb_index = bit_index / GMP_LIMB_BITS; + bit = (mp_limb_t) 1 << (bit_index % GMP_LIMB_BITS); + + if (limb_index >= dn) + { + mp_size_t i; + /* The bit should be set outside of the end of the number. + We have to increase the size of the number. */ + dp = MPZ_REALLOC (d, limb_index + 1); + + dp[limb_index] = bit; + for (i = dn; i < limb_index; i++) + dp[i] = 0; + dn = limb_index + 1; + } + else + { + mp_limb_t cy; + + dp = d->_mp_d; + + cy = mpn_add_1 (dp + limb_index, dp + limb_index, dn - limb_index, bit); + if (cy > 0) + { + dp = MPZ_REALLOC (d, dn + 1); + dp[dn++] = cy; + } + } + + d->_mp_size = (d->_mp_size < 0) ? - dn : dn; +} + +void +mpz_abs_sub_bit (mpz_t d, mp_bitcnt_t bit_index) +{ + mp_size_t dn, limb_index; + mp_ptr dp; + mp_limb_t bit; + + dn = GMP_ABS (d->_mp_size); + dp = d->_mp_d; + + limb_index = bit_index / GMP_LIMB_BITS; + bit = (mp_limb_t) 1 << (bit_index % GMP_LIMB_BITS); + + assert (limb_index < dn); + + gmp_assert_nocarry (mpn_sub_1 (dp + limb_index, dp + limb_index, + dn - limb_index, bit)); + dn = mpn_normalized_size (dp, dn); + d->_mp_size = (d->_mp_size < 0) ? - dn : dn; +} + +void +mpz_setbit (mpz_t d, mp_bitcnt_t bit_index) +{ + if (!mpz_tstbit (d, bit_index)) + { + if (d->_mp_size >= 0) + mpz_abs_add_bit (d, bit_index); + else + mpz_abs_sub_bit (d, bit_index); + } +} + +void +mpz_divexact (mpz_t q, const mpz_t n, const mpz_t d) +{ + gmp_assert_nocarry (mpz_div_qr (q, NULL, n, d, GMP_DIV_TRUNC)); +} + +#define mpz_odd_p(z) (((z)->_mp_size != 0) & (int) (z)->_mp_d[0]) +#define mpz_even_p(z) (! mpz_odd_p (z)) + +int +mpz_cmpabs (const mpz_t u, const mpz_t v) +{ + return mpn_cmp4 (u->_mp_d, GMP_ABS (u->_mp_size), + v->_mp_d, GMP_ABS (v->_mp_size)); +} + +void +mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v) +{ + mpz_t tu, tv, s0, s1, t0, t1; + mp_bitcnt_t uz, vz, gz; + mp_bitcnt_t power; + + if (u->_mp_size == 0) + { + /* g = 0 u + sgn(v) v */ + signed long sign = mpz_sgn (v); + mpz_abs (g, v); + if (s) + s->_mp_size = 0; + if (t) + mpz_set_si (t, sign); + return; + } + + if (v->_mp_size == 0) + { + /* g = sgn(u) u + 0 v */ + signed long sign = mpz_sgn (u); + mpz_abs (g, u); + if (s) + mpz_set_si (s, sign); + if (t) + t->_mp_size = 0; + return; + } + + mpz_init (tu); + mpz_init (tv); + mpz_init (s0); + mpz_init (s1); + mpz_init (t0); + mpz_init (t1); + + mpz_abs (tu, u); + uz = mpz_make_odd (tu); + mpz_abs (tv, v); + vz = mpz_make_odd (tv); + gz = GMP_MIN (uz, vz); + + uz -= gz; + vz -= gz; + + /* Cofactors corresponding to odd gcd. gz handled later. */ + if (tu->_mp_size < tv->_mp_size) + { + mpz_swap (tu, tv); + MPZ_SRCPTR_SWAP (u, v); + MPZ_PTR_SWAP (s, t); + MP_BITCNT_T_SWAP (uz, vz); + } + + /* Maintain + * + * u = t0 tu + t1 tv + * v = s0 tu + s1 tv + * + * where u and v denote the inputs with common factors of two + * eliminated, and det (s0, t0; s1, t1) = 2^p. Then + * + * 2^p tu = s1 u - t1 v + * 2^p tv = -s0 u + t0 v + */ + + /* After initial division, tu = q tv + tu', we have + * + * u = 2^uz (tu' + q tv) + * v = 2^vz tv + * + * or + * + * t0 = 2^uz, t1 = 2^uz q + * s0 = 0, s1 = 2^vz + */ + + mpz_tdiv_qr (t1, tu, tu, tv); + mpz_mul_2exp (t1, t1, uz); + + mpz_setbit (s1, vz); + power = uz + vz; + + if (tu->_mp_size > 0) + { + mp_bitcnt_t shift; + shift = mpz_make_odd (tu); + mpz_setbit (t0, uz + shift); + power += shift; + + for (;;) + { + int c; + c = mpz_cmp (tu, tv); + if (c == 0) + break; + + if (c < 0) + { + /* tv = tv' + tu + * + * u = t0 tu + t1 (tv' + tu) = (t0 + t1) tu + t1 tv' + * v = s0 tu + s1 (tv' + tu) = (s0 + s1) tu + s1 tv' */ + + mpz_sub (tv, tv, tu); + mpz_add (t0, t0, t1); + mpz_add (s0, s0, s1); + + shift = mpz_make_odd (tv); + mpz_mul_2exp (t1, t1, shift); + mpz_mul_2exp (s1, s1, shift); + } + else + { + mpz_sub (tu, tu, tv); + mpz_add (t1, t0, t1); + mpz_add (s1, s0, s1); + + shift = mpz_make_odd (tu); + mpz_mul_2exp (t0, t0, shift); + mpz_mul_2exp (s0, s0, shift); + } + power += shift; + } + } + else + mpz_setbit (t0, uz); + + /* Now tv = odd part of gcd, and -s0 and t0 are corresponding + cofactors. */ + + mpz_mul_2exp (tv, tv, gz); + mpz_neg (s0, s0); + + /* 2^p g = s0 u + t0 v. Eliminate one factor of two at a time. To + adjust cofactors, we need u / g and v / g */ + + mpz_divexact (s1, v, tv); + mpz_abs (s1, s1); + mpz_divexact (t1, u, tv); + mpz_abs (t1, t1); + + while (power-- > 0) + { + /* s0 u + t0 v = (s0 - v/g) u - (t0 + u/g) v */ + if (mpz_odd_p (s0) || mpz_odd_p (t0)) + { + mpz_sub (s0, s0, s1); + mpz_add (t0, t0, t1); + } + //assert (mpz_even_p (t0) && mpz_even_p (s0)); + mpz_tdiv_q_2exp (s0, s0, 1); + mpz_tdiv_q_2exp (t0, t0, 1); + } + + /* Arrange so that |s| < |u| / 2g */ + mpz_add (s1, s0, s1); + if (mpz_cmpabs (s0, s1) > 0) + { + mpz_swap (s0, s1); + mpz_sub (t0, t0, t1); + } + if (u->_mp_size < 0) + mpz_neg (s0, s0); + if (v->_mp_size < 0) + mpz_neg (t0, t0); + + mpz_swap (g, tv); + if (s) + mpz_swap (s, s0); + if (t) + mpz_swap (t, t0); + + mpz_clear (tu); + mpz_clear (tv); + mpz_clear (s0); + mpz_clear (s1); + mpz_clear (t0); + mpz_clear (t1); +} + + +void +mpz_addmul_ui (mpz_t r, const mpz_t u, unsigned long int v) +{ + mpz_t t; + mpz_init_set_ui (t, v); + mpz_mul (t, u, t); + mpz_add (r, r, t); + mpz_clear (t); +} + + +// STRING CONVERSION + +unsigned +mpn_base_power_of_two_p (unsigned b) +{ + switch (b) + { + case 2: return 1; + case 4: return 2; + case 8: return 3; + case 16: return 4; + case 32: return 5; + case 64: return 6; + case 128: return 7; + case 256: return 8; + default: return 0; + } +} + + + +void +mpn_get_base_info (struct mpn_base_info *info, mp_limb_t b) +{ + mp_limb_t m; + mp_limb_t p; + unsigned exp; + + m = GMP_LIMB_MAX / b; + for (exp = 1, p = b; p <= m; exp++) + p *= b; + + info->exp = exp; + info->bb = p; +} + +int isspace_gpu(unsigned char c) { + if (c == '\n' || c == ' ' || c == '\t' || c == '\r' || c == '\f' || c == '\v') + return 1; + return 0; +} + +int strlen_c(const char *c) { + + // rather naive implementation – we assume a string is terminated, and is not 0 characters long. + + int i = 0; + while (1) { + if (c[i] == '\0') + return i; + i++; + } + return i; +} + +mp_size_t +mpn_set_str_bits (mp_ptr rp, const unsigned char *sp, size_t sn, + unsigned bits) +{ + mp_size_t rn; + mp_limb_t limb; + unsigned shift; + + for (limb = 0, rn = 0, shift = 0; sn-- > 0; ) + { + limb |= (mp_limb_t) sp[sn] << shift; + shift += bits; + if (shift >= GMP_LIMB_BITS) + { + shift -= GMP_LIMB_BITS; + rp[rn++] = limb; + /* Next line is correct also if shift == 0, + bits == 8, and mp_limb_t == unsigned char. */ + limb = (unsigned int) sp[sn] >> (bits - shift); + } + } + if (limb != 0) + rp[rn++] = limb; + else + rn = mpn_normalized_size (rp, rn); + return rn; +} + +mp_size_t +mpn_set_str_other (mp_ptr rp, const unsigned char *sp, size_t sn, + mp_limb_t b, const struct mpn_base_info *info) +{ + mp_size_t rn; + mp_limb_t w; + unsigned k; + size_t j; + + assert (sn > 0); + + k = 1 + (sn - 1) % info->exp; + + j = 0; + w = sp[j++]; + while (--k != 0) + w = w * b + sp[j++]; + + rp[0] = w; + + for (rn = 1; j < sn;) + { + mp_limb_t cy; + + w = sp[j++]; + for (k = 1; k < info->exp; k++) + w = w * b + sp[j++]; + + cy = mpn_mul_1 (rp, rp, rn, info->bb); + cy += mpn_add_1 (rp, rp, rn, w); + if (cy > 0) + rp[rn++] = cy; + } + assert (j == sn); + + return rn; +} + + +int +mpz_set_str (mpz_t r, const char *sp, int base) +{ + unsigned bits, value_of_a; + mp_size_t rn, alloc; + mp_ptr rp; + size_t dn, sn; + int sign; + unsigned char dp[4096]; + + assert (base == 0 || (base >= 2 && base <= 62)); + + while (isspace_gpu( (unsigned char) *sp)) + sp++; + + sign = (*sp == '-'); + sp += sign; + + if (base == 0) + { + if (sp[0] == '0') + { + if (sp[1] == 'x' || sp[1] == 'X') + { + base = 16; + sp += 2; + } + else if (sp[1] == 'b' || sp[1] == 'B') + { + base = 2; + sp += 2; + } + else + base = 8; + } + else + base = 10; + } + + if (!*sp) + { + r->_mp_size = 0; + return -1; + } + sn = strlen_c(sp); + //dp = (unsigned char *) gmp_alloc (sn); + + + value_of_a = (base > 36) ? 36 : 10; + for (dn = 0; *sp; sp++) + { + unsigned digit; + + if (isspace_gpu ((unsigned char) *sp)) + continue; + else if (*sp >= '0' && *sp <= '9') + digit = *sp - '0'; + else if (*sp >= 'a' && *sp <= 'z') + digit = *sp - 'a' + value_of_a; + else if (*sp >= 'A' && *sp <= 'Z') + digit = *sp - 'A' + 10; + else + digit = base; /* fail */ + + if (digit >= (unsigned) base) + { + //gmp_free (dp, sn); + r->_mp_size = 0; + return -1; + } + + dp[dn++] = digit; + } + + if (!dn) + { + //gmp_free (dp, sn); + r->_mp_size = 0; + return -1; + } + bits = mpn_base_power_of_two_p (base); + + if (bits > 0) + { + alloc = (dn * bits + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS; + rp = MPZ_REALLOC (r, alloc); + rn = mpn_set_str_bits (rp, dp, dn, bits); + } + else + { + struct mpn_base_info info; + mpn_get_base_info (&info, base); + alloc = (dn + info.exp - 1) / info.exp; + rp = MPZ_REALLOC (r, alloc); + rn = mpn_set_str_other (rp, dp, dn, base, &info); + /* Normalization, needed for all-zero input. */ + assert (rn > 0); + rn -= rp[rn-1] == 0; + } + assert (rn <= alloc); + //gmp_free (dp, sn); + + r->_mp_size = sign ? - rn : rn; + + return 0; +} + + + +int +mpz_init_set_str (mpz_t r, const char *sp, int base) +{ + mpz_init (r); + return mpz_set_str (r, sp, base); +} + + +// Montgomery multiplication + +void mont_prepare(mpz_t b, mpz_t e, mpz_t m, + mpz_t r, mpz_t r_1, + mpz_t ni, mpz_t M, mpz_t x + ); + +void mont_product(mpz_t ret, + const mpz_t a, const mpz_t b, + const mpz_t r, const mpz_t r_1, + const mpz_t n, const mpz_t ni + ); + +void mont_modexp(mpz_t ret, + mpz_t a, mpz_t e, + const mpz_t M, + const mpz_t n, const mpz_t ni, + const mpz_t r, const mpz_t r_1 + ); + +void mont_finish(mpz_t ret, + const mpz_t xx, + const mpz_t n, const mpz_t ni, + const mpz_t r, const mpz_t r_1 + ); + +void mont_prepare_even_modulus(mpz_t m, mpz_t q, mpz_t powj); + +void mont_mulmod(mpz_t res, const mpz_t a, const mpz_t b, const mpz_t mod); + + + + +void mont_prepare_even_modulus(mpz_t m, mpz_t q, mpz_t powj) { + + mpz_t two; // powj == 2^j + + mpz_init_set_ui(two, 2); + + mp_bitcnt_t j = mpz_scan1(m, 0); + + mpz_tdiv_q_2exp(q,m,j); + mpz_mul_2exp(powj,two,j - 1); + + mpz_clear(two); + +} + +// CPU +void mont_prepare(mpz_t b, mpz_t e, mpz_t m, + mpz_t r, mpz_t r_1, + mpz_t ni, mpz_t M, mpz_t x) { + + // MARK: break this up, reduce the amount of temporary variables + + // r and n (modulus) must be relatively prime (this is a given if n (modulus) is odd) + + // calculate r, which must be larger than the modulo and also a power of 2 + + mpz_t one, oo; // some helper variables + mpz_init_set_si(one,1); + mpz_init_set_si(oo,0); + + size_t len = mpz_sizeinbase(m,2); + + mpz_mul_2exp(r,one,len); + + mpz_set_si(one, 0); + + + mpz_gcdext(one, r_1, ni, r, m); // set r_1 and ni + + int sgn = mpz_sgn(r_1); + + mpz_abs(r_1, r_1); + mpz_abs(ni, ni); + + if (sgn == -1) { + mpz_sub(ni, r, ni); + mpz_sub(r_1, m, r_1); + } + + if (mpz_cmp_ui(one, 1)) + assert(0); + + mpz_mul(one, r, r_1); + mpz_mul(oo,ni,m); + + mpz_sub(one, one, oo); // oo must be one + + if (mpz_cmp_ui(one, 1)) + assert(0); + + mpz_mul(M, b, r); + mpz_mod(M, M, m); // set M + + mpz_mod(x, r, m); // set x + + mpz_clear(one); + mpz_clear(oo); + +} + +// maybe GPU? +// MARK: n MUST be an odd number +void mont_modexp(mpz_t ret, + mpz_t a, mpz_t e, + const mpz_t M, + const mpz_t n, const mpz_t ni, + const mpz_t r, const mpz_t r_1 + ) { + + mpz_t aa,xx; + + mpz_init_set(aa, M); + mpz_init_set(xx, a); + + int k = (int)mpz_sizeinbase(e,2); + + for (int i = k - 1; i >= 0; i--) { + + mont_product(xx, xx, xx, r, r_1, n, ni); + + if (mpz_tstbit(e, i)) + mont_product(xx, aa, xx, r, r_1, n, ni); + + } + + mpz_set(ret, xx); + + mpz_clear(aa); + mpz_clear(xx); + +} + +void mont_finish(mpz_t ret, + const mpz_t xx, + const mpz_t n, const mpz_t ni, + const mpz_t r, const mpz_t r_1 + ) { + + + mpz_t x,one; + + mpz_init(x); + mpz_init_set_ui(one, 1); + + mont_product(x, xx, one, r, r_1, n, ni); + + mpz_set(ret, x); + + mpz_clear(x); + mpz_clear(one); + +} + + +// GPU +void mont_product(mpz_t ret, + const mpz_t a, const mpz_t b, + const mpz_t r, const mpz_t r_1, + const mpz_t n, const mpz_t ni + ) { + + mpz_t t,m,u; + + mpz_init(t); + mpz_init(m); + mpz_init(u); + + + + mont_mulmod(t, b, a, r); + + mont_mulmod(m, ni, t, r); + + mpz_t ab,mn; + + mpz_init(ab); + mpz_init(mn); + + mpz_mul(ab, a, b); + mpz_mul(mn, m, n); + + mpz_add(ab, ab, mn); + + unsigned long sz = mpz_sizeinbase(r,2) - 1; + mpz_tdiv_q_2exp(u, ab, sz); // this is essentially a bit shift, instead of a division + + if (mpz_cmp(u, n) >= 0) + mpz_sub(u, u, n); + + mpz_set(ret, u); + + mpz_clear(ab); + mpz_clear(mn); + mpz_clear(t); + mpz_clear(m); + mpz_clear(u); + +} + +// not the fastest... but it does not increase the variable sizes +void mont_mulmod(mpz_t res, const mpz_t a, const mpz_t b, const mpz_t mod) { + + mpz_t aa, bb; + mpz_init_set(aa, a); + mpz_init_set(bb,b); + + mpz_mod(aa, aa, mod); // in case a is bigger + + while (mpz_cmp_ui(bb, 0) > 0) { + if (mpz_odd_p(bb)) { + mpz_add(res, res, aa); + mpz_mod(res, res, mod); + } + + mpz_mul_2exp(aa,aa,1); + mpz_mod(aa, aa, mod); + mpz_tdiv_q_2exp(bb, bb, 1); + } +} + + + +void montgomery(const char *signature, + const char *exponent, + const char *modulus, + const char *base, + unsigned long *valid) +{ + + + int radix = 16; + + mpz_t b,e,m,res; + + + + mpz_init(res); + + mpz_init_set_str(b,base,radix); // M + mpz_init_set_str(e,exponent,radix); + mpz_init_set_str(m,modulus,radix); // n + + mpz_t r, r_1, ni, M, x; + mpz_init(r); // MARK: I think I have to destroy these myself + mpz_init(r_1); + mpz_init(ni); + mpz_init(M); + mpz_init(x); + + + mpz_t xx; + mpz_init(xx); + + + + + if (mpz_even_p(m)) { + + mpz_t bb, x1, x2, q, powj; + mpz_init(bb); + mpz_init(x1); + mpz_init(x2); + mpz_init(q); + mpz_init(powj); + + mont_prepare_even_modulus(m, q, powj); + + // q is uneven, so we can use regular modexp + // MARK: we can improve the efficiency here by doing simple reductions + + mpz_mod(bb, b, q); // reductions like this + + mont_prepare(bb, e, q, r, r_1, ni, M, x); + mont_modexp(xx, x, e, M, q, ni, r, r_1); + mont_finish(x1, xx, q, ni, r, r_1); + + + // MARK: we can also reduce and really speed this up as well -> binary method? + mpz_powm(x2, b, e, powj); + + mpz_t y, q_1; + mpz_init(y); + mpz_init(q_1); + + mpz_sub(y, x2, x1); + + mpz_invert(q_1, q, powj); + + mpz_mul(y, y, q_1); + mpz_mod(y, y, powj); + + mpz_addmul(x1, q, y); + + mpz_set(res, x1); + + + + } else { + + mont_prepare(b, e, m, r, r_1, ni, M, x); + + mont_modexp(xx, x, e, M, m, ni, r, r_1); + + mont_finish(res, xx, m, ni, r, r_1); + + + } + + + + + + + + mpz_t sig; + mpz_init_set_str(sig,signature,radix); + + if (mpz_cmp(sig,res) == 0) { + + *valid = 1; + + } else { + + } + + +} diff --git a/source/gmp_GPU.h b/source/gmp_GPU.h @@ -0,0 +1,27 @@ +// +// gmp_GPU.h +// lib-gpu-verify +// +// Created by Cedric Zwahlen on 25.11.2023. +// + +#ifndef gmp_GPU_h +#define gmp_GPU_h + +//#include <stdio.h> + +//#include <assert.h> +#include <ctype.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + + +void montgomery(const char *signature, + const char *exponent, + const char *modulus, + const char *base, + unsigned long *valid); + +#endif /* gmp_GPU_h */ diff --git a/source/rsa-test.c b/source/rsa-test.c @@ -19,7 +19,9 @@ #include "ctype.h" #include "time.h" -//#include "gmp_GPU.h" + +//#include "gmp.h" + #include "gmp_GPU.h" // //#include "RSA-Montgomery.h" @@ -1097,7 +1099,7 @@ int mont_rsa_tests(void) { pks = mont_pairs_from_files(b, e, m, s, &pairs); - printf("--"); + unsigned long result = 0; @@ -1107,11 +1109,14 @@ int mont_rsa_tests(void) { clock_gettime(CLOCK_REALTIME, &t1); - // montgomery(b, e, m, s, &result); - mont_verify_pairs_with_opencl( - b,e,m,s, + //montgomery(b, e, m, s, &result); + + + + + mont_verify_pairs_with_opencl(s,e,m,b, pairs, pks, &result); clock_gettime(CLOCK_REALTIME, &t2); diff --git a/xcode/.DS_Store b/xcode/.DS_Store Binary files differ. diff --git a/xcode/lib-gpu-verify.xcodeproj/project.pbxproj b/xcode/lib-gpu-verify.xcodeproj/project.pbxproj @@ -9,7 +9,7 @@ /* Begin PBXBuildFile section */ 6A36F8892B0F938E00AB772D /* montgomery.cl in Sources */ = {isa = PBXBuildFile; fileRef = 6A36F8882B0F938E00AB772D /* montgomery.cl */; }; 6A8A795F2A89672700116D7D /* verify.cl in Sources */ = {isa = PBXBuildFile; fileRef = 6A8A795E2A89672700116D7D /* verify.cl */; }; - 6A99B0672B125F430004E4B7 /* gmp.c in Sources */ = {isa = PBXBuildFile; fileRef = 6A7914CB2B0CF320001EDCC1 /* gmp.c */; }; + 6A99B06D2B1275760004E4B7 /* gmp_GPU.c in Sources */ = {isa = PBXBuildFile; fileRef = 6A99B06C2B1275760004E4B7 /* gmp_GPU.c */; }; 6AA38E5B2B0A97FC00E85243 /* main.c in Sources */ = {isa = PBXBuildFile; fileRef = 6AA38E5A2B0A97FC00E85243 /* main.c */; }; 6AD85E072AF71AD900662919 /* big-int-test.c in Sources */ = {isa = PBXBuildFile; fileRef = 6AF7487D2ADADF4500D58E08 /* big-int-test.c */; }; 6AD85E0C2AFA510C00662919 /* openssl-test.c in Sources */ = {isa = PBXBuildFile; fileRef = 6AD85E0B2AFA510C00662919 /* openssl-test.c */; }; @@ -48,6 +48,8 @@ 6A7914CD2B0CF320001EDCC1 /* montgomery.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = montgomery.c; path = ../source/montgomery.c; sourceTree = "<group>"; }; 6A7914CE2B0CF320001EDCC1 /* gmp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = gmp.h; path = ../source/gmp.h; sourceTree = "<group>"; }; 6A8A795E2A89672700116D7D /* verify.cl */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.opencl; path = verify.cl; sourceTree = "<group>"; }; + 6A99B06B2B1275760004E4B7 /* gmp_GPU.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = gmp_GPU.h; path = ../source/gmp_GPU.h; sourceTree = "<group>"; }; + 6A99B06C2B1275760004E4B7 /* gmp_GPU.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = gmp_GPU.c; path = ../source/gmp_GPU.c; sourceTree = "<group>"; }; 6AA38E582B0A97FC00E85243 /* lib-gpu-generate */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "lib-gpu-generate"; sourceTree = BUILT_PRODUCTS_DIR; }; 6AA38E5A2B0A97FC00E85243 /* main.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = main.c; sourceTree = "<group>"; }; 6AA38E612B0A9B2100E85243 /* lib-gpu-generate.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = "lib-gpu-generate.entitlements"; sourceTree = "<group>"; }; @@ -136,6 +138,8 @@ 6AF748852ADADFAD00D58E08 /* opencl-test.c */, 6A7914CB2B0CF320001EDCC1 /* gmp.c */, 6A7914CD2B0CF320001EDCC1 /* montgomery.c */, + 6A99B06C2B1275760004E4B7 /* gmp_GPU.c */, + 6A99B06B2B1275760004E4B7 /* gmp_GPU.h */, ); name = Sources; sourceTree = "<group>"; @@ -226,8 +230,8 @@ buildActionMask = 2147483647; files = ( 6AD85E0C2AFA510C00662919 /* openssl-test.c in Sources */, + 6A99B06D2B1275760004E4B7 /* gmp_GPU.c in Sources */, 6AD85E072AF71AD900662919 /* big-int-test.c in Sources */, - 6A99B0672B125F430004E4B7 /* gmp.c in Sources */, 6AF7487A2ADADEBD00D58E08 /* lib-gpu-verify.c in Sources */, 6A8A795F2A89672700116D7D /* verify.cl in Sources */, 6AF748832ADADF4500D58E08 /* rsa-test.c in Sources */, diff --git a/xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcuserdata/cedriczwahlen.xcuserdatad/UserInterfaceState.xcuserstate b/xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcuserdata/cedriczwahlen.xcuserdatad/UserInterfaceState.xcuserstate Binary files differ. diff --git a/xcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist b/xcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist @@ -1262,8 +1262,8 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "73" - endingLineNumber = "73" + startingLineNumber = "75" + endingLineNumber = "75" landmarkName = "generate_random_pairs(bases, b_len, exponents, e_len, moduli, m_len, signatures, s_len, n)" landmarkType = "9"> </BreakpointContent> @@ -1278,8 +1278,8 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "608" - endingLineNumber = "608" + startingLineNumber = "610" + endingLineNumber = "610" landmarkName = "rsa_tests()" landmarkType = "9"> <Locations> @@ -1461,8 +1461,8 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "595" - endingLineNumber = "595" + startingLineNumber = "597" + endingLineNumber = "597" landmarkName = "verify_pairs_with_opencl(bases, b_len, exponents, e_len, moduli, m_len, signatures, s_len, n, pks, result)" landmarkType = "9"> <Locations> @@ -1554,8 +1554,8 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "113" - endingLineNumber = "113" + startingLineNumber = "115" + endingLineNumber = "115" landmarkName = "generate_random_pairs(bases, b_len, exponents, e_len, moduli, m_len, signatures, s_len, n)" landmarkType = "9"> </BreakpointContent> @@ -1570,8 +1570,8 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "140" - endingLineNumber = "140" + startingLineNumber = "142" + endingLineNumber = "142" landmarkName = "generate_random_pairs(bases, b_len, exponents, e_len, moduli, m_len, signatures, s_len, n)" landmarkType = "9"> </BreakpointContent> @@ -1586,8 +1586,8 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "135" - endingLineNumber = "135" + startingLineNumber = "137" + endingLineNumber = "137" landmarkName = "generate_random_pairs(bases, b_len, exponents, e_len, moduli, m_len, signatures, s_len, n)" landmarkType = "9"> <Locations> @@ -1757,8 +1757,8 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "254" - endingLineNumber = "254" + startingLineNumber = "256" + endingLineNumber = "256" landmarkName = "pairs_from_files(bases, b_len, exponents, e_len, moduli, m_len, signatures, s_len, n)" landmarkType = "9"> </BreakpointContent> @@ -1773,8 +1773,8 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "212" - endingLineNumber = "212" + startingLineNumber = "214" + endingLineNumber = "214" landmarkName = "pairs_from_files(bases, b_len, exponents, e_len, moduli, m_len, signatures, s_len, n)" landmarkType = "9"> <Locations> @@ -1866,8 +1866,8 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "208" - endingLineNumber = "208" + startingLineNumber = "210" + endingLineNumber = "210" landmarkName = "pairs_from_files(bases, b_len, exponents, e_len, moduli, m_len, signatures, s_len, n)" landmarkType = "9"> </BreakpointContent> @@ -1882,8 +1882,8 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "247" - endingLineNumber = "247" + startingLineNumber = "249" + endingLineNumber = "249" landmarkName = "pairs_from_files(bases, b_len, exponents, e_len, moduli, m_len, signatures, s_len, n)" landmarkType = "9"> </BreakpointContent> @@ -1898,8 +1898,8 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "199" - endingLineNumber = "199" + startingLineNumber = "201" + endingLineNumber = "201" landmarkName = "pairs_from_files(bases, b_len, exponents, e_len, moduli, m_len, signatures, s_len, n)" landmarkType = "9"> </BreakpointContent> @@ -1914,8 +1914,8 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "565" - endingLineNumber = "565" + startingLineNumber = "567" + endingLineNumber = "567" landmarkName = "verify_pairs_with_opencl(bases, b_len, exponents, e_len, moduli, m_len, signatures, s_len, n, pks, result)" landmarkType = "9"> </BreakpointContent> @@ -1930,8 +1930,8 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "655" - endingLineNumber = "655" + startingLineNumber = "657" + endingLineNumber = "657" landmarkName = "rsa_tests()" landmarkType = "9"> </BreakpointContent> @@ -1946,8 +1946,8 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "574" - endingLineNumber = "574" + startingLineNumber = "576" + endingLineNumber = "576" landmarkName = "verify_pairs_with_opencl(bases, b_len, exponents, e_len, moduli, m_len, signatures, s_len, n, pks, result)" landmarkType = "9"> </BreakpointContent> @@ -1962,8 +1962,8 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "640" - endingLineNumber = "640" + startingLineNumber = "642" + endingLineNumber = "642" landmarkName = "rsa_tests()" landmarkType = "9"> <Locations> @@ -2010,8 +2010,8 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "308" - endingLineNumber = "308" + startingLineNumber = "310" + endingLineNumber = "310" landmarkName = "verify_pairs_with_opencl(bases, b_len, exponents, e_len, moduli, m_len, signatures, s_len, n, pks, result)" landmarkType = "9"> </BreakpointContent> @@ -2026,8 +2026,8 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "241" - endingLineNumber = "241" + startingLineNumber = "243" + endingLineNumber = "243" landmarkName = "pairs_from_files(bases, b_len, exponents, e_len, moduli, m_len, signatures, s_len, n)" landmarkType = "9"> </BreakpointContent> @@ -2042,8 +2042,8 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "249" - endingLineNumber = "249" + startingLineNumber = "251" + endingLineNumber = "251" landmarkName = "pairs_from_files(bases, b_len, exponents, e_len, moduli, m_len, signatures, s_len, n)" landmarkType = "9"> </BreakpointContent> @@ -2872,8 +2872,8 @@ filePath = "../source/gmp.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "1004" - endingLineNumber = "1004" + startingLineNumber = "987" + endingLineNumber = "987" landmarkName = "mpn_div_qr_1_preinv(qp, np, nn, inv)" landmarkType = "9"> </BreakpointContent> @@ -2888,8 +2888,8 @@ filePath = "../source/gmp.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "4219" - endingLineNumber = "4219" + startingLineNumber = "4202" + endingLineNumber = "4202" landmarkName = "mpz_sizeinbase(u, base)" landmarkType = "9"> </BreakpointContent> @@ -2904,8 +2904,8 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "438" - endingLineNumber = "438" + startingLineNumber = "440" + endingLineNumber = "440" landmarkName = "verify_pairs_with_opencl(bases, b_len, exponents, e_len, moduli, m_len, signatures, s_len, n, pks, result)" landmarkType = "9"> </BreakpointContent> @@ -2920,8 +2920,8 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "701" - endingLineNumber = "701" + startingLineNumber = "703" + endingLineNumber = "703" landmarkName = "mont_pairs_from_files(bases, exponents, moduli, signatures, n)" landmarkType = "9"> </BreakpointContent> @@ -2936,8 +2936,8 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "896" - endingLineNumber = "896" + startingLineNumber = "898" + endingLineNumber = "898" landmarkName = "mont_verify_pairs_with_opencl(bases, exponents, moduli, signatures, n, pks, result)" landmarkType = "9"> </BreakpointContent> @@ -2946,14 +2946,14 @@ BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> <BreakpointContent uuid = "6A825320-5EF5-46A3-88EB-CBD1ECAB327F" - shouldBeEnabled = "Yes" + shouldBeEnabled = "No" ignoreCount = "0" continueAfterRunningActions = "No" filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "1050" - endingLineNumber = "1050" + startingLineNumber = "1052" + endingLineNumber = "1052" landmarkName = "mont_verify_pairs_with_opencl(bases, exponents, moduli, signatures, n, pks, result)" landmarkType = "9"> <Locations> @@ -3017,6 +3017,21 @@ endingLineNumber = "1050" offsetFromSymbolStart = "3154"> </Location> + <Location + uuid = "6A825320-5EF5-46A3-88EB-CBD1ECAB327F - 2905f85b306cd353" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "mont_verify_pairs_with_opencl" + moduleName = "lib-gpu-verify" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/libgpuverify/source/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "1052" + endingLineNumber = "1052" + offsetFromSymbolStart = "3154"> + </Location> </Locations> </BreakpointContent> </BreakpointProxy> @@ -3024,14 +3039,14 @@ BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> <BreakpointContent uuid = "A0206612-E01F-4D5F-BA1C-09E3D6085CD0" - shouldBeEnabled = "Yes" + shouldBeEnabled = "No" ignoreCount = "0" continueAfterRunningActions = "No" filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "1045" - endingLineNumber = "1045" + startingLineNumber = "1047" + endingLineNumber = "1047" landmarkName = "mont_verify_pairs_with_opencl(bases, exponents, moduli, signatures, n, pks, result)" landmarkType = "9"> <Locations> @@ -3108,8 +3123,8 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "908" - endingLineNumber = "908" + startingLineNumber = "910" + endingLineNumber = "910" landmarkName = "mont_verify_pairs_with_opencl(bases, exponents, moduli, signatures, n, pks, result)" landmarkType = "9"> </BreakpointContent> @@ -3124,8 +3139,8 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "946" - endingLineNumber = "946" + startingLineNumber = "948" + endingLineNumber = "948" landmarkName = "mont_verify_pairs_with_opencl(bases, exponents, moduli, signatures, n, pks, result)" landmarkType = "9"> </BreakpointContent> @@ -3134,14 +3149,14 @@ BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> <BreakpointContent uuid = "C8B7770D-BAC1-4203-AF94-A85245DE1081" - shouldBeEnabled = "Yes" + shouldBeEnabled = "No" ignoreCount = "0" continueAfterRunningActions = "No" filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "941" - endingLineNumber = "941" + startingLineNumber = "943" + endingLineNumber = "943" landmarkName = "mont_verify_pairs_with_opencl(bases, exponents, moduli, signatures, n, pks, result)" landmarkType = "9"> <Locations> @@ -3188,8 +3203,8 @@ filePath = "../source/gmp.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "1000" - endingLineNumber = "1000" + startingLineNumber = "983" + endingLineNumber = "983" landmarkName = "mpn_div_qr_1_preinv(qp, np, nn, inv)" landmarkType = "9"> </BreakpointContent> @@ -3204,8 +3219,8 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "1110" - endingLineNumber = "1110" + startingLineNumber = "1114" + endingLineNumber = "1114" landmarkName = "mont_rsa_tests()" landmarkType = "9"> </BreakpointContent> @@ -3220,8 +3235,8 @@ filePath = "../source/gmp_GPU.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "495" - endingLineNumber = "495" + startingLineNumber = "490" + endingLineNumber = "490" landmarkName = "mpz_set(r, x)" landmarkType = "9"> </BreakpointContent> @@ -3236,8 +3251,8 @@ filePath = "../source/gmp_GPU.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "492" - endingLineNumber = "492" + startingLineNumber = "487" + endingLineNumber = "487" landmarkName = "mpz_set(r, x)" landmarkType = "9"> </BreakpointContent> @@ -3252,8 +3267,8 @@ filePath = "../source/gmp_GPU.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "2802" - endingLineNumber = "2802" + startingLineNumber = "2797" + endingLineNumber = "2797" landmarkName = "mont_mulmod(res, a, b, mod)" landmarkType = "9"> </BreakpointContent> @@ -3262,14 +3277,14 @@ BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> <BreakpointContent uuid = "D1044013-3029-4CCB-9700-A4ED809E7749" - shouldBeEnabled = "Yes" + shouldBeEnabled = "No" ignoreCount = "0" continueAfterRunningActions = "No" filePath = "../source/gmp_GPU.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "2858" - endingLineNumber = "2858" + startingLineNumber = "2853" + endingLineNumber = "2853" landmarkName = "montgomery(signature, exponent, modulus, base, valid)" landmarkType = "9"> <Locations> @@ -3346,8 +3361,8 @@ filePath = "../source/gmp_GPU.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "2903" - endingLineNumber = "2903" + startingLineNumber = "2898" + endingLineNumber = "2898" landmarkName = "montgomery(signature, exponent, modulus, base, valid)" landmarkType = "9"> </BreakpointContent> @@ -3362,8 +3377,8 @@ filePath = "../source/gmp_GPU.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "2901" - endingLineNumber = "2901" + startingLineNumber = "2896" + endingLineNumber = "2896" landmarkName = "montgomery(signature, exponent, modulus, base, valid)" landmarkType = "9"> </BreakpointContent> @@ -3378,8 +3393,8 @@ filePath = "../source/gmp_GPU.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "2669" - endingLineNumber = "2669" + startingLineNumber = "2664" + endingLineNumber = "2664" landmarkName = "mont_prepare(b, e, m, r, r_1, ni, M, x)" landmarkType = "9"> <Locations> @@ -3441,8 +3456,8 @@ filePath = "../source/gmp_GPU.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "836" - endingLineNumber = "836" + startingLineNumber = "831" + endingLineNumber = "831" landmarkName = "mpz_div_qr(q, r, n, d, mode)" landmarkType = "9"> <Locations> @@ -3504,8 +3519,8 @@ filePath = "../source/gmp_GPU.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "907" - endingLineNumber = "907" + startingLineNumber = "902" + endingLineNumber = "902" landmarkName = "mpz_div_qr(q, r, n, d, mode)" landmarkType = "9"> <Locations> @@ -3567,8 +3582,8 @@ filePath = "../source/gmp_GPU.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "1033" - endingLineNumber = "1033" + startingLineNumber = "1028" + endingLineNumber = "1028" landmarkName = "mpn_div_qr_invert(inv, dp, dn)" landmarkType = "9"> <Locations> @@ -3630,8 +3645,8 @@ filePath = "../source/gmp_GPU.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "948" - endingLineNumber = "948" + startingLineNumber = "943" + endingLineNumber = "943" landmarkName = "mpn_div_qr(qp, np, nn, dp, dn)" landmarkType = "9"> <Locations> @@ -3708,8 +3723,8 @@ filePath = "../source/gmp_GPU.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "2840" - endingLineNumber = "2840" + startingLineNumber = "2835" + endingLineNumber = "2835" landmarkName = "montgomery(signature, exponent, modulus, base, valid)" landmarkType = "9"> <Locations> @@ -3771,8 +3786,8 @@ filePath = "../source/gmp_GPU.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "2586" - endingLineNumber = "2586" + startingLineNumber = "2581" + endingLineNumber = "2581" landmarkName = "mpz_set_str(r, sp, base)" landmarkType = "9"> </BreakpointContent> @@ -3787,8 +3802,8 @@ filePath = "../source/gmp_GPU.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "856" - endingLineNumber = "856" + startingLineNumber = "851" + endingLineNumber = "851" landmarkName = "mpz_div_qr(q, r, n, d, mode)" landmarkType = "9"> </BreakpointContent> @@ -3803,8 +3818,8 @@ filePath = "../source/gmp_GPU.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "2213" - endingLineNumber = "2213" + startingLineNumber = "2208" + endingLineNumber = "2208" landmarkName = "mpz_gcdext(g, s, t, u, v)" landmarkType = "9"> </BreakpointContent> @@ -3819,8 +3834,8 @@ filePath = "../source/gmp_GPU.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "2664" - endingLineNumber = "2664" + startingLineNumber = "2659" + endingLineNumber = "2659" landmarkName = "mont_prepare(b, e, m, r, r_1, ni, M, x)" landmarkType = "9"> </BreakpointContent> @@ -3835,10 +3850,57 @@ filePath = "../source/gmp_GPU.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "2919" - endingLineNumber = "2919" + startingLineNumber = "2914" + endingLineNumber = "2914" landmarkName = "montgomery(signature, exponent, modulus, base, valid)" landmarkType = "9"> + <Locations> + <Location + uuid = "F1B8A13D-2350-47BF-BCEB-CDEAD7827EA9 - 941e65ef5d451925" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "montgomery" + moduleName = "lib-gpu-verify" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/libgpuverify/source/gmp_GPU.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "2916" + endingLineNumber = "2916" + offsetFromSymbolStart = "990"> + </Location> + <Location + uuid = "F1B8A13D-2350-47BF-BCEB-CDEAD7827EA9 - 941e65ef5d451906" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "montgomery" + moduleName = "lib-gpu-verify" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/libgpuverify/source/gmp_GPU.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "2915" + endingLineNumber = "2915" + offsetFromSymbolStart = "1009"> + </Location> + <Location + uuid = "F1B8A13D-2350-47BF-BCEB-CDEAD7827EA9 - 941e65ef5d451967" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "montgomery" + moduleName = "lib-gpu-verify" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/libgpuverify/source/gmp_GPU.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "2914" + endingLineNumber = "2914" + offsetFromSymbolStart = "990"> + </Location> + </Locations> </BreakpointContent> </BreakpointProxy> <BreakpointProxy @@ -3851,11 +3913,27 @@ filePath = "../source/gmp.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "999" - endingLineNumber = "999" + startingLineNumber = "982" + endingLineNumber = "982" landmarkName = "mpn_div_qr_1_preinv(qp, np, nn, inv)" landmarkType = "9"> </BreakpointContent> </BreakpointProxy> + <BreakpointProxy + BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> + <BreakpointContent + uuid = "E3DBFC38-2984-402B-8763-F6B5E1731C25" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + filePath = "../source/gmp_GPU.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "1982" + endingLineNumber = "1982" + landmarkName = "mpn_common_scan(limb, i, up, un, ux)" + landmarkType = "9"> + </BreakpointContent> + </BreakpointProxy> </Breakpoints> </Bucket> diff --git a/xcode/montgomery.cl b/xcode/montgomery.cl @@ -199,7 +199,7 @@ } while (0) -#define assert(x){} +#define assert(x){if((x)==0){printf((char __constant *)"assert reached\n");}} #define NULL ((void*)0) @@ -467,7 +467,7 @@ mpz_init (mpz_t r) r->_mp_alloc = 0; r->_mp_size = 0; - // memset(r->_mp_d, 0, 256); + memset(r->_mp_d, 0, 256); // r->_mp_d = (mp_ptr) &dummy_limb; } @@ -840,7 +840,9 @@ mpz_div_qr (mpz_t q, mpz_t r, ns = n->_mp_size; ds = d->_mp_size; - if (ds == 0) {} + if (ds == 0) { + + } //gmp_die("mpz_div_qr: Divide by zero."); if (ns == 0) @@ -952,6 +954,8 @@ mpn_div_qr (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn) mpz_t tp; + + assert (dn > 0); assert (nn >= dn); @@ -1928,7 +1932,7 @@ mpz_sizeinbase (const mpz_t u, int base) mpz_init(tp); assert (base >= 2); - assert (base <= 62); +assert (base <= 62); un = GMP_ABS (u->_mp_size); if (un == 0) @@ -2204,7 +2208,7 @@ mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v) uz = mpz_make_odd (tu); mpz_abs (tv, v); vz = mpz_make_odd (tv); - gz = GMP_MIN (uz, vz); +gz = GMP_MIN (uz, vz); uz -= gz; vz -= gz; @@ -2487,7 +2491,7 @@ mpz_set_str (mpz_t r, __constant char *sp, int base) mp_ptr rp; size_t dn, sn; int sign; - unsigned char dp[4096]; + unsigned char dp[256]; assert (base == 0 || (base >= 2 && base <= 62)); @@ -2659,7 +2663,9 @@ void mont_prepare(mpz_t b, mpz_t e, mpz_t m, mpz_init_set_si(one,1); mpz_init_set_si(oo,0); - size_t len = mpz_sizeinbase(m,2); + //unsigned long len = mpz_sizeinbase(m,2); + + unsigned long len = 2049; mpz_mul_2exp(r,one,len); @@ -2819,7 +2825,16 @@ void mont_mulmod(mpz_t res, const mpz_t a, const mpz_t b, const mpz_t mod) { } } - +void printmpz(mpz_t n) { + + for (int i = 0; i < n->_mp_size; i++) { + + printf((char __constant *)"%lu", n->_mp_d[i]); + + } + printf((char __constant *)"\n\n"); + +} __kernel void montgomery(__constant char *signature, __constant char *exponent, @@ -2829,100 +2844,123 @@ __kernel void montgomery(__constant char *signature, { - int radix = 16; - - mpz_t b,e,m,res; - - - - mpz_init(res); - - mpz_init_set_str(b,base,radix); // M - mpz_init_set_str(e,exponent,radix); - mpz_init_set_str(m,modulus,radix); // n - - mpz_t r, r_1, ni, M, x; - mpz_init(r); // MARK: I think I have to destroy these myself - mpz_init(r_1); - mpz_init(ni); - mpz_init(M); - mpz_init(x); - - - mpz_t xx; - mpz_init(xx); - + int index = get_global_id(0); - - - if (mpz_even_p(m)) { + if (index == 0) { - mpz_t bb, x1, x2, q, powj; - mpz_init(bb); - mpz_init(x1); - mpz_init(x2); - mpz_init(q); - mpz_init(powj); - mont_prepare_even_modulus(m, q, powj); + int radix = 16; - // q is uneven, so we can use regular modexp - // MARK: we can improve the efficiency here by doing simple reductions + mpz_t b,e,m,res; - mpz_mod(bb, b, q); // reductions like this - mont_prepare(bb, e, q, r, r_1, ni, M, x); - mont_modexp(xx, x, e, M, q, ni, r, r_1); - mont_finish(x1, xx, q, ni, r, r_1); - // MARK: we can also reduce and really speed this up as well -> binary method? - mpz_powm(x2, b, e, powj); + mpz_init(res); + + mpz_init_set_str(b,base,radix); // M + mpz_init_set_str(e,exponent,radix); + mpz_init_set_str(m,modulus,radix); // n - mpz_t y, q_1; - mpz_init(y); - mpz_init(q_1); + mpz_t r, r_1, ni, M, x; + mpz_init(r); // MARK: I think I have to destroy these myself + mpz_init(r_1); + mpz_init(ni); + mpz_init(M); + mpz_init(x); - mpz_sub(y, x2, x1); - mpz_invert(q_1, q, powj); + mpz_t xx; + mpz_init(xx); - mpz_mul(y, y, q_1); - mpz_mod(y, y, powj); - mpz_addmul(x1, q, y); + + printf((char __constant *)"%lu\n",GMP_LIMB_BITS); - mpz_set(res, x1); + + if (mpz_even_p(m)) { + + mpz_t bb, x1, x2, q, powj; + mpz_init(bb); + mpz_init(x1); + mpz_init(x2); + mpz_init(q); + mpz_init(powj); + + mont_prepare_even_modulus(m, q, powj); + + // q is uneven, so we can use regular modexp + // MARK: we can improve the efficiency here by doing simple reductions + + mpz_mod(bb, b, q); // reductions like this + + mont_prepare(bb, e, q, r, r_1, ni, M, x); + mont_modexp(xx, x, e, M, q, ni, r, r_1); + mont_finish(x1, xx, q, ni, r, r_1); + + + // MARK: we can also reduce and really speed this up as well -> binary method? + mpz_powm(x2, b, e, powj); + + mpz_t y, q_1; + mpz_init(y); + mpz_init(q_1); + + mpz_sub(y, x2, x1); + + mpz_invert(q_1, q, powj); + + mpz_mul(y, y, q_1); + mpz_mod(y, y, powj); + + mpz_addmul(x1, q, y); + + mpz_set(res, x1); + + + + } else { + + + + mont_prepare(b, e, m, r, r_1, ni, M, x); + + + mont_modexp(xx, x, e, M, m, ni, r, r_1); + + // printf((char __constant *)"--\n"); + + //printmpz(xx); + + mont_finish(res, xx, m, ni, r, r_1); + + // printf((char __constant *)"--\n"); + + // printmpz(res); + + + } - } else { - - mont_prepare(b, e, m, r, r_1, ni, M, x); - mont_modexp(xx, x, e, M, m, ni, r, r_1); - mont_finish(res, xx, m, ni, r, r_1); + + + + mpz_t sig; + mpz_init_set_str(sig,signature,radix); + + if (mpz_cmp(sig,res) == 0) { + + *valid = 1; + + } else { - } - - - - - - - - mpz_t sig; - mpz_init_set_str(sig,signature,radix); - - if (mpz_cmp(sig,res) == 0) { + } - *valid = 1; - } else { - } - }