commit b1f4f32728669766e1e7121856f779a97ef01f50
parent e496a5a3755148a391d018e3174401a8f894ea9b
Author: Cedric <cedric.zwahlen@students.bfh.ch>
Date: Sun, 26 Nov 2023 00:30:04 +0100
Batch verification with montgomery kernel
Diffstat:
10 files changed, 385 insertions(+), 3196 deletions(-)
diff --git a/.DS_Store b/.DS_Store
Binary files differ.
diff --git a/source/gmp_GPU.c b/source/gmp_GPU.c
@@ -1,2923 +0,0 @@
-//
-// gmp_GPU.c
-// lib-gpu-verify
-//
-// Created by Cedric Zwahlen on 25.11.2023.
-//
-
-#include "gmp_GPU.h"
-
-#define MINI_GMP_LIMB_TYPE long
-
-#define GMP_LIMB_BITS (sizeof(mp_limb_t) * CHAR_BIT)
-
-#define GMP_LIMB_MAX ((mp_limb_t) ~ (mp_limb_t) 0)
-#define GMP_LIMB_HIGHBIT ((mp_limb_t) 1 << (GMP_LIMB_BITS - 1))
-
-#define GMP_HLIMB_BIT ((mp_limb_t) 1 << (GMP_LIMB_BITS / 2))
-#define GMP_LLIMB_MASK (GMP_HLIMB_BIT - 1)
-
-#define GMP_ULONG_BITS (sizeof(unsigned long) * CHAR_BIT)
-#define GMP_ULONG_HIGHBIT ((unsigned long) 1 << (GMP_ULONG_BITS - 1))
-
-#define GMP_ABS(x) ((x) >= 0 ? (x) : -(x))
-#define GMP_NEG_CAST(T,x) (-((T)((x) + 1) - 1))
-
-#define GMP_MIN(a, b) ((a) < (b) ? (a) : (b))
-#define GMP_MAX(a, b) ((a) > (b) ? (a) : (b))
-
-#define GMP_CMP(a,b) (((a) > (b)) - ((a) < (b)))
-
-#define GMP_MPN_OVERLAP_P(xp, xsize, yp, ysize) \
- ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
-
-
-#define gmp_clz(count, x) do { \
- mp_limb_t __clz_x = (x); \
- unsigned __clz_c = 0; \
- int LOCAL_SHIFT_BITS = 8; \
- if (GMP_LIMB_BITS > LOCAL_SHIFT_BITS) \
- for (; \
- (__clz_x & ((mp_limb_t) 0xff << (GMP_LIMB_BITS - 8))) == 0; \
- __clz_c += 8) \
- { __clz_x <<= LOCAL_SHIFT_BITS; } \
- for (; (__clz_x & GMP_LIMB_HIGHBIT) == 0; __clz_c++) \
- __clz_x <<= 1; \
- (count) = __clz_c; \
- } while (0)
-
-#define gmp_umullo_limb(u, v) \
- ((sizeof(mp_limb_t) >= sizeof(int)) ? (u)*(v) : (unsigned int)(u) * (v))
-
-#define gmp_umul_ppmm(w1, w0, u, v) \
- do { \
- int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS; \
- if (sizeof(unsigned int) * CHAR_BIT >= 2 * GMP_LIMB_BITS) \
- { \
- unsigned int __ww = (unsigned int) (u) * (v); \
- w0 = (mp_limb_t) __ww; \
- w1 = (mp_limb_t) (__ww >> LOCAL_GMP_LIMB_BITS); \
- } \
- else if (GMP_ULONG_BITS >= 2 * GMP_LIMB_BITS) \
- { \
- unsigned long int __ww = (unsigned long int) (u) * (v); \
- w0 = (mp_limb_t) __ww; \
- w1 = (mp_limb_t) (__ww >> LOCAL_GMP_LIMB_BITS); \
- } \
- else { \
- mp_limb_t __x0, __x1, __x2, __x3; \
- unsigned __ul, __vl, __uh, __vh; \
- mp_limb_t __u = (u), __v = (v); \
- assert (sizeof (unsigned) * 2 >= sizeof (mp_limb_t)); \
- \
- __ul = __u & GMP_LLIMB_MASK; \
- __uh = __u >> (GMP_LIMB_BITS / 2); \
- __vl = __v & GMP_LLIMB_MASK; \
- __vh = __v >> (GMP_LIMB_BITS / 2); \
- \
- __x0 = (mp_limb_t) __ul * __vl; \
- __x1 = (mp_limb_t) __ul * __vh; \
- __x2 = (mp_limb_t) __uh * __vl; \
- __x3 = (mp_limb_t) __uh * __vh; \
- \
- __x1 += __x0 >> (GMP_LIMB_BITS / 2);/* this can't give carry */ \
- __x1 += __x2; /* but this indeed can */ \
- if (__x1 < __x2) /* did we get it? */ \
- __x3 += GMP_HLIMB_BIT; /* yes, add it in the proper pos. */ \
- \
- (w1) = __x3 + (__x1 >> (GMP_LIMB_BITS / 2)); \
- (w0) = (__x1 << (GMP_LIMB_BITS / 2)) + (__x0 & GMP_LLIMB_MASK); \
- } \
- } while (0)
-
-#define gmp_assert_nocarry(x) do { \
- mp_limb_t __cy = (x); \
- assert (__cy == 0); \
- (void) (__cy); \
- } while (0)
-
-#define gmp_add_ssaaaa(sh, sl, ah, al, bh, bl) \
- do { \
- mp_limb_t __x; \
- __x = (al) + (bl); \
- (sh) = (ah) + (bh) + (__x < (al)); \
- (sl) = __x; \
- } while (0)
-
-#define gmp_sub_ddmmss(sh, sl, ah, al, bh, bl) \
- do { \
- mp_limb_t __x; \
- __x = (al) - (bl); \
- (sh) = (ah) - (bh) - ((al) < (bl)); \
- (sl) = __x; \
- } while (0)
-
-
-#define gmp_udiv_qrnnd_preinv(q, r, nh, nl, d, di) \
- do { \
- mp_limb_t _qh, _ql, _r, _mask; \
- gmp_umul_ppmm (_qh, _ql, (nh), (di)); \
- gmp_add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \
- _r = (nl) - gmp_umullo_limb (_qh, (d)); \
- _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \
- _qh += _mask; \
- _r += _mask & (d); \
- if (_r >= (d)) \
- { \
- _r -= (d); \
- _qh++; \
- } \
- \
- (r) = _r; \
- (q) = _qh; \
- } while (0)
-
-#define gmp_udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv) \
- do { \
- mp_limb_t _q0, _t1, _t0, _mask; \
- gmp_umul_ppmm ((q), _q0, (n2), (dinv)); \
- gmp_add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1)); \
- \
- /* Compute the two most significant limbs of n - q'd */ \
- (r1) = (n1) - gmp_umullo_limb ((d1), (q)); \
- gmp_sub_ddmmss ((r1), (r0), (r1), (n0), (d1), (d0)); \
- gmp_umul_ppmm (_t1, _t0, (d0), (q)); \
- gmp_sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0); \
- (q)++; \
- \
- /* Conditionally adjust q and the remainders */ \
- _mask = - (mp_limb_t) ((r1) >= _q0); \
- (q) += _mask; \
- gmp_add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0)); \
- if ((r1) >= (d1)) \
- { \
- if ((r1) > (d1) || (r0) >= (d0)) \
- { \
- (q)++; \
- gmp_sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0)); \
- } \
- } \
- } while (0)
-
-#define gmp_ctz(count, x) do { \
- mp_limb_t __ctz_x = (x); \
- unsigned __ctz_c = 0; \
- gmp_clz (__ctz_c, __ctz_x & - __ctz_x); \
- (count) = GMP_LIMB_BITS - 1 - __ctz_c; \
- } while (0)
-
-
-#define MPZ_SRCPTR_SWAP(x, y) \
- do { \
- mpz_srcptr __mpz_srcptr_swap__tmp = (x); \
- (x) = (y); \
- (y) = __mpz_srcptr_swap__tmp; \
- } while (0)
-
-#define MP_SIZE_T_SWAP(x, y) \
- do { \
- mp_size_t __mp_size_t_swap__tmp = (x); \
- (x) = (y); \
- (y) = __mp_size_t_swap__tmp; \
- } while (0)
-
-#define MPZ_PTR_SWAP(x, y) \
- do { \
- mpz_ptr __mpz_ptr_swap__tmp = (x); \
- (x) = (y); \
- (y) = __mpz_ptr_swap__tmp; \
- } while (0)
-
-#define MP_BITCNT_T_SWAP(x,y) \
- do { \
- mp_bitcnt_t __mp_bitcnt_t_swap__tmp = (x); \
- (x) = (y); \
- (y) = __mp_bitcnt_t_swap__tmp; \
- } while (0)
-
-
-#define assert(x){if((x)==0){printf("assert reached\n");}}
-
-typedef unsigned MINI_GMP_LIMB_TYPE mp_limb_t;
-typedef long mp_size_t;
-typedef unsigned long mp_bitcnt_t;
-
-typedef mp_limb_t *mp_ptr;
-typedef const mp_limb_t *mp_srcptr;
-
-typedef struct
-{
- int _mp_alloc; /* Number of *limbs* allocated and pointed
- to by the _mp_d field. */
- int _mp_size; /* abs(_mp_size) is the number of limbs the
- last field points to. If _mp_size is
- negative this is a negative number. */
- //mp_limb_t *_mp_d; /* Pointer to the limbs. */
-
- mp_limb_t _mp_d[256];
-
-} __mpz_struct;
-
-typedef __mpz_struct mpz_t[1];
-
-typedef __mpz_struct *mpz_ptr;
-
-typedef const __mpz_struct *mpz_srcptr;
-
-struct gmp_div_inverse
-{
- /* Normalization shift count. */
- unsigned shift;
- /* Normalized divisor (d0 unused for mpn_div_qr_1) */
- mp_limb_t d1, d0;
- /* Inverse, for 2/1 or 3/2. */
- mp_limb_t di;
-};
-
-
-struct mpn_base_info
-{
- /* bb is the largest power of the base which fits in one limb, and
- exp is the corresponding exponent. */
- unsigned exp;
- mp_limb_t bb;
-};
-
-
-enum mpz_div_round_mode { GMP_DIV_FLOOR, GMP_DIV_CEIL, GMP_DIV_TRUNC };
-
-void mpz_init (mpz_t r);
-void mpn_copyi (mp_ptr d, mp_srcptr s, mp_size_t n);
-void mpz_set (mpz_t r, const mpz_t x);
-void
-mpz_set (mpz_t r, const mpz_t x);
-void
-mpz_set_ui (mpz_t r, unsigned long int x);
-void
-mpz_set_si (mpz_t r, signed long int x);
-void
-mpz_init_set_si (mpz_t r, signed long int x);
-void
-mpz_init_set (mpz_t r, const mpz_t x);
-void
-mpz_init2 (mpz_t r, mp_bitcnt_t bits);
-void
-mpz_init_set_ui (mpz_t r, unsigned long int x);
-void
-mpz_clear (mpz_t r);
-void
-gmp_die (const char *msg);
-
-
-mp_size_t mpn_normalized_size (mp_srcptr xp, mp_size_t n);
-void
-mpz_add_ui (mpz_t r, const mpz_t a, unsigned long b);
-void
-mpz_ui_sub (mpz_t r, unsigned long a, const mpz_t b);
-void
-mpz_sub_ui (mpz_t r, const mpz_t a, unsigned long b);
-int
-mpn_absfits_ulong_p (mp_srcptr up, mp_size_t un);
-unsigned long int
-mpz_get_ui (const mpz_t u);
-int
-mpz_cmpabs_ui (const mpz_t u, unsigned long v);
-mp_limb_t
-mpn_sub_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b);
-mp_limb_t
-mpn_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n);
-mp_limb_t
-mpn_sub (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn);
-mp_limb_t
-mpn_invert_3by2 (mp_limb_t u1, mp_limb_t u0);
-int
-mpz_div_qr (mpz_t q, mpz_t r,
- const mpz_t n, const mpz_t d, enum mpz_div_round_mode mode);
-void
-mpz_mod (mpz_t r, const mpz_t n, const mpz_t d);
-void
-mpn_div_qr_1_invert (struct gmp_div_inverse *inv, mp_limb_t d);
-
-void
-mpn_div_qr_2_invert (struct gmp_div_inverse *inv,
- mp_limb_t d1, mp_limb_t d0);
-
-void
-mpn_div_qr_invert (struct gmp_div_inverse *inv,
- mp_srcptr dp, mp_size_t dn);
-int
-mpz_cmp_ui (const mpz_t u, unsigned long v);
-int
-mpn_cmp (mp_srcptr ap, mp_srcptr bp, mp_size_t n);
-mp_limb_t
-mpn_lshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt);
-mp_limb_t
-mpn_rshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt);
-int
-mpz_invert (mpz_t r, const mpz_t u, const mpz_t m);
-mp_limb_t
-mpn_div_qr_1_preinv (mp_ptr qp, mp_srcptr np, mp_size_t nn,
- const struct gmp_div_inverse *inv);
-mp_limb_t
-mpn_add_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n);
-void
-mpn_div_qr_2_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn,
- const struct gmp_div_inverse *inv);
-mp_limb_t
-mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl);
-void
-mpn_div_qr_pi1 (mp_ptr qp,
- mp_ptr np, mp_size_t nn, mp_limb_t n1,
- mp_srcptr dp, mp_size_t dn,
- mp_limb_t dinv);
-void
-mpn_div_qr_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn,
- mp_srcptr dp, mp_size_t dn,
- const struct gmp_div_inverse *inv);
-void
-mpz_powm (mpz_t r, const mpz_t b, const mpz_t e, const mpz_t m);
-int
-mpn_cmp4 (mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn);
-mp_size_t
-mpz_abs_sub (mpz_t r, const mpz_t a, const mpz_t b);
-mp_limb_t
-mpn_add_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b);
-mp_limb_t
-mpn_add (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn);
-mp_size_t
-mpz_abs_add (mpz_t r, const mpz_t a, const mpz_t b);
-void
-mpz_sub (mpz_t r, const mpz_t a, const mpz_t b);
-mp_limb_t
-mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl);
-mp_limb_t
-mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl);
-mp_limb_t
-mpn_mul (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn);
-void
-mpz_mul (mpz_t r, const mpz_t u, const mpz_t v);
-void
-mpn_copyd (mp_ptr d, mp_srcptr s, mp_size_t n);
-void
-mpn_zero (mp_ptr rp, mp_size_t n);
-void
-mpz_mul_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bits);
-int
-mpn_zero_p(mp_srcptr rp, mp_size_t n);
-void
-mpz_div_q_2exp (mpz_t q, const mpz_t u, mp_bitcnt_t bit_index,
- enum mpz_div_round_mode mode);
-void
-mpz_tdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt);
-int
-mpz_cmp (const mpz_t a, const mpz_t b);
-void
-mpz_add (mpz_t r, const mpz_t a, const mpz_t b);
-int
-mpz_tstbit (const mpz_t d, mp_bitcnt_t bit_index);
-mp_bitcnt_t
-mpn_limb_size_in_base_2 (mp_limb_t u);
-size_t
-mpz_sizeinbase (const mpz_t u, int base);
-int
-mpz_sgn (const mpz_t u);
-mp_bitcnt_t
-mpn_common_scan (mp_limb_t limb, mp_size_t i, mp_srcptr up, mp_size_t un,
- mp_limb_t ux);
-mp_bitcnt_t
-mpn_scan1 (mp_srcptr ptr, mp_bitcnt_t bit);
-mp_bitcnt_t
-mpz_scan1 (mpz_t u, mp_bitcnt_t starting_bit);
-mp_bitcnt_t
-mpz_make_odd (mpz_t r);
-void
-mpz_tdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d);
-void
-mpz_abs_add_bit (mpz_t d, mp_bitcnt_t bit_index);
-void
-mpz_abs_sub_bit (mpz_t d, mp_bitcnt_t bit_index);
-void
-mpz_setbit (mpz_t d, mp_bitcnt_t bit_index);
-void
-mpz_divexact (mpz_t q, const mpz_t n, const mpz_t d);
-int
-mpz_cmpabs (const mpz_t u, const mpz_t v);
-void
-mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v);
-void
-mpz_addmul_ui (mpz_t r, const mpz_t u, unsigned long int v);
-
-unsigned
-mpn_base_power_of_two_p (unsigned b);
-void
-mpn_get_base_info (struct mpn_base_info *info, mp_limb_t b);
-int isspace_gpu(unsigned char c);
-int strlen_c(const char *c);
-mp_size_t mpn_set_str_bits (mp_ptr rp, const unsigned char *sp, size_t sn,
- unsigned bits);
-mp_size_t
-mpn_set_str_other (mp_ptr rp, const unsigned char *sp, size_t sn,
- mp_limb_t b, const struct mpn_base_info *info);
-int
-mpz_set_str (mpz_t r, const char *sp, int base);
-int
-mpz_init_set_str (mpz_t r, const char *sp, int base);
-
-//void mpz_sub (mpz_t r, const mpz_t a, const mpz_t b);
-////void mpz_add (mpz_t, const mpz_t, const mpz_t);
-
-void mpz_abs (mpz_t, const mpz_t);
-
-void mpz_neg (mpz_t, const mpz_t);
-void mpz_swap (mpz_t, mpz_t);
-//void mpz_mod (mpz_t, const mpz_t, const mpz_t);
-//
-////int mpz_sgn (const mpz_t);
-//
-////void mpz_mul (mpz_t, const mpz_t, const mpz_t);
-//void mpz_mul_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
-//
-//void mpz_gcdext (mpz_t, mpz_t, mpz_t, const mpz_t, const mpz_t);
-////void mpz_powm (mpz_t, const mpz_t, const mpz_t, const mpz_t);
-//
-void mpz_addmul (mpz_t, const mpz_t, const mpz_t);
-//
-//int mpz_tstbit (const mpz_t, mp_bitcnt_t);
-//
-//int mpz_cmp_ui (const mpz_t u, unsigned long v);
-//
-void mpn_div_qr (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn);
-//
-//mp_limb_t mpn_invert_3by2 (mp_limb_t, mp_limb_t);
-
-#define mpn_invert_limb(x) mpn_invert_3by2 ((x), 0)
-
-#define MPZ_REALLOC(z,n) (z)->_mp_d
-
-void
-mpz_init (mpz_t r)
-{
- const mp_limb_t dummy_limb = GMP_LIMB_MAX & 0xc1a0;
-
- r->_mp_alloc = 0;
- r->_mp_size = 0;
-
- // memset(r->_mp_d, 0, 256);
-
- // r->_mp_d = (mp_ptr) &dummy_limb;
-}
-
-void
-mpn_copyi (mp_ptr d, mp_srcptr s, mp_size_t n)
-{
- mp_size_t i;
- for (i = 0; i < n; i++)
- d[i] = s[i];
-}
-
-void
-mpz_set (mpz_t r, const mpz_t x)
-{
- /* Allow the NOP r == x */
- if (r != x)
- {
- mp_size_t n;
- mp_ptr rp;
-
- n = GMP_ABS (x->_mp_size);
- rp = MPZ_REALLOC (r, n);
-
- mpn_copyi (rp, x->_mp_d, n);
- r->_mp_size = x->_mp_size;
- }
-}
-
-
-void
-mpz_set_ui (mpz_t r, unsigned long int x)
-{
- if (x > 0)
- {
- r->_mp_size = 1;
- MPZ_REALLOC (r, 1)[0] = x;
- if (GMP_LIMB_BITS < GMP_ULONG_BITS)
- {
- int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS;
- while (x >>= LOCAL_GMP_LIMB_BITS)
- {
- ++ r->_mp_size;
- MPZ_REALLOC (r, r->_mp_size)[r->_mp_size - 1] = x;
- }
- }
- }
- else
- r->_mp_size = 0;
-}
-
-
-void
-mpz_neg (mpz_t r, const mpz_t u)
-{
- mpz_set (r, u);
- r->_mp_size = -r->_mp_size;
-}
-
-
-void
-mpz_set_si (mpz_t r, signed long int x)
-{
- if (x >= 0)
- mpz_set_ui (r, x);
- else /* (x < 0) */
- if (GMP_LIMB_BITS < GMP_ULONG_BITS)
- {
- mpz_set_ui (r, GMP_NEG_CAST (unsigned long int, x));
- mpz_neg (r, r);
- }
- else
- {
- r->_mp_size = -1;
- MPZ_REALLOC (r, 1)[0] = GMP_NEG_CAST (unsigned long int, x);
- }
-}
-
-void
-mpz_init_set_si (mpz_t r, signed long int x)
-{
- mpz_init (r);
- mpz_set_si (r, x);
-}
-
-
-void
-mpz_init_set (mpz_t r, const mpz_t x)
-{
- mpz_init (r);
- mpz_set (r, x);
-}
-
-void
-mpz_init2 (mpz_t r, mp_bitcnt_t bits)
-{
- mp_size_t rn;
-
- bits -= (bits != 0); /* Round down, except if 0 */
- rn = 1 + bits / GMP_LIMB_BITS;
-
- r->_mp_alloc = rn;
- r->_mp_size = 0;
- // r->_mp_d = gmp_alloc_limbs (rn);
-}
-
-void
-mpz_init_set_ui (mpz_t r, unsigned long int x)
-{
- mpz_init (r);
- mpz_set_ui (r, x);
-}
-
-void
-mpz_clear (mpz_t r)
-{
- //if (r->_mp_alloc)
- //gmp_free_limbs (r->_mp_d, r->_mp_alloc);
-}
-
-
-void
-gmp_die (const char *msg)
-{
- //fprintf (stderr, "%s\n", msg);
- //abort();
-}
-
-mp_size_t mpn_normalized_size (mp_srcptr xp, mp_size_t n)
-{
- while (n > 0 && xp[n-1] == 0)
- --n;
- return n;
-}
-
-void
-mpz_add_ui (mpz_t r, const mpz_t a, unsigned long b)
-{
- mpz_t bb;
- mpz_init_set_ui (bb, b);
- mpz_add (r, a, bb);
- mpz_clear (bb);
-}
-
-void
-mpz_ui_sub (mpz_t r, unsigned long a, const mpz_t b)
-{
- mpz_neg (r, b);
- mpz_add_ui (r, r, a);
-}
-
-
-void
-mpz_sub_ui (mpz_t r, const mpz_t a, unsigned long b)
-{
- mpz_ui_sub (r, b, a);
- mpz_neg (r, r);
-}
-
-int
-mpn_absfits_ulong_p (mp_srcptr up, mp_size_t un)
-{
- int ulongsize = GMP_ULONG_BITS / GMP_LIMB_BITS;
- mp_limb_t ulongrem = 0;
-
- if (GMP_ULONG_BITS % GMP_LIMB_BITS != 0)
- ulongrem = (mp_limb_t) (ULONG_MAX >> GMP_LIMB_BITS * ulongsize) + 1;
-
- return un <= ulongsize || (up[ulongsize] < ulongrem && un == ulongsize + 1);
-}
-
-unsigned long int
-mpz_get_ui (const mpz_t u)
-{
- if (GMP_LIMB_BITS < GMP_ULONG_BITS)
- {
- int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS;
- unsigned long r = 0;
- mp_size_t n = GMP_ABS (u->_mp_size);
- n = GMP_MIN (n, 1 + (mp_size_t) (GMP_ULONG_BITS - 1) / GMP_LIMB_BITS);
- while (--n >= 0)
- r = (r << LOCAL_GMP_LIMB_BITS) + u->_mp_d[n];
- return r;
- }
-
- return u->_mp_size == 0 ? 0 : u->_mp_d[0];
-}
-
-int
-mpz_cmpabs_ui (const mpz_t u, unsigned long v)
-{
- mp_size_t un = GMP_ABS (u->_mp_size);
-
- if (! mpn_absfits_ulong_p (u->_mp_d, un))
- return 1;
- else
- {
- unsigned long uu = mpz_get_ui (u);
- return GMP_CMP(uu, v);
- }
-}
-
-mp_limb_t
-mpn_sub_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b)
-{
- mp_size_t i;
-
- assert (n > 0);
-
- i = 0;
- do
- {
- mp_limb_t a = ap[i];
- /* Carry out */
- mp_limb_t cy = a < b;
- rp[i] = a - b;
- b = cy;
- }
- while (++i < n);
-
- return b;
-}
-
-mp_limb_t
-mpn_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
-{
- mp_size_t i;
- mp_limb_t cy;
-
- for (i = 0, cy = 0; i < n; i++)
- {
- mp_limb_t a, b;
- a = ap[i]; b = bp[i];
- b += cy;
- cy = (b < cy);
- cy += (a < b);
- rp[i] = a - b;
- }
- return cy;
-}
-
-mp_limb_t
-mpn_sub (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
-{
- mp_limb_t cy;
-
- assert (an >= bn);
-
- cy = mpn_sub_n (rp, ap, bp, bn);
- if (an > bn)
- cy = mpn_sub_1 (rp + bn, ap + bn, an - bn, cy);
- return cy;
-}
-
-
-mp_limb_t
-mpn_invert_3by2 (mp_limb_t u1, mp_limb_t u0)
-{
- mp_limb_t r, m;
-
- {
- mp_limb_t p, ql;
- unsigned ul, uh, qh;
-
- assert (sizeof (unsigned) * 2 >= sizeof (mp_limb_t));
- /* For notation, let b denote the half-limb base, so that B = b^2.
- Split u1 = b uh + ul. */
- ul = u1 & GMP_LLIMB_MASK;
- uh = u1 >> (GMP_LIMB_BITS / 2);
-
- /* Approximation of the high half of quotient. Differs from the 2/1
- inverse of the half limb uh, since we have already subtracted
- u0. */
- qh = (u1 ^ GMP_LIMB_MAX) / uh;
-
- /* Adjust to get a half-limb 3/2 inverse, i.e., we want
-
- qh' = floor( (b^3 - 1) / u) - b = floor ((b^3 - b u - 1) / u
- = floor( (b (~u) + b-1) / u),
-
- and the remainder
-
- r = b (~u) + b-1 - qh (b uh + ul)
- = b (~u - qh uh) + b-1 - qh ul
-
- Subtraction of qh ul may underflow, which implies adjustments.
- But by normalization, 2 u >= B > qh ul, so we need to adjust by
- at most 2.
- */
-
- r = ((~u1 - (mp_limb_t) qh * uh) << (GMP_LIMB_BITS / 2)) | GMP_LLIMB_MASK;
-
- p = (mp_limb_t) qh * ul;
- /* Adjustment steps taken from udiv_qrnnd_c */
- if (r < p)
- {
- qh--;
- r += u1;
- if (r >= u1) /* i.e. we didn't get carry when adding to r */
- if (r < p)
- {
- qh--;
- r += u1;
- }
- }
- r -= p;
-
- /* Low half of the quotient is
-
- ql = floor ( (b r + b-1) / u1).
-
- This is a 3/2 division (on half-limbs), for which qh is a
- suitable inverse. */
-
- p = (r >> (GMP_LIMB_BITS / 2)) * qh + r;
- /* Unlike full-limb 3/2, we can add 1 without overflow. For this to
- work, it is essential that ql is a full mp_limb_t. */
- ql = (p >> (GMP_LIMB_BITS / 2)) + 1;
-
- /* By the 3/2 trick, we don't need the high half limb. */
- r = (r << (GMP_LIMB_BITS / 2)) + GMP_LLIMB_MASK - ql * u1;
-
- if (r >= (GMP_LIMB_MAX & (p << (GMP_LIMB_BITS / 2))))
- {
- ql--;
- r += u1;
- }
- m = ((mp_limb_t) qh << (GMP_LIMB_BITS / 2)) + ql;
- if (r >= u1)
- {
- m++;
- r -= u1;
- }
- }
-
- /* Now m is the 2/1 inverse of u1. If u0 > 0, adjust it to become a
- 3/2 inverse. */
- if (u0 > 0)
- {
- mp_limb_t th, tl;
- r = ~r;
- r += u0;
- if (r < u0)
- {
- m--;
- if (r >= u1)
- {
- m--;
- r -= u1;
- }
- r -= u1;
- }
- gmp_umul_ppmm (th, tl, u0, m);
- r += th;
- if (r < th)
- {
- m--;
- m -= ((r > u1) | ((r == u1) & (tl > u0)));
- }
- }
-
- return m;
-}
-
-int
-mpz_div_qr (mpz_t q, mpz_t r,
- const mpz_t n, const mpz_t d, enum mpz_div_round_mode mode)
-{
- mp_size_t ns, ds, nn, dn, qs;
- ns = n->_mp_size;
- ds = d->_mp_size;
-
- if (ds == 0) {}
- //gmp_die("mpz_div_qr: Divide by zero.");
-
- if (ns == 0)
- {
- if (q)
- q->_mp_size = 0;
- if (r)
- r->_mp_size = 0;
- return 0;
- }
-
- nn = GMP_ABS (ns);
- dn = GMP_ABS (ds);
-
- qs = ds ^ ns;
-
- if (nn < dn)
- {
- if (mode == GMP_DIV_CEIL && qs >= 0)
- {
- /* q = 1, r = n - d */
- if (r)
- mpz_sub (r, n, d);
- if (q)
- mpz_set_ui (q, 1);
- }
- else if (mode == GMP_DIV_FLOOR && qs < 0)
- {
- /* q = -1, r = n + d */
- if (r)
- mpz_add (r, n, d);
- if (q)
- mpz_set_si (q, -1);
- }
- else
- {
- /* q = 0, r = d */
- if (r)
- mpz_set (r, n);
- if (q)
- q->_mp_size = 0;
- }
- return 1;
- }
- else
- {
- mp_ptr np, qp;
- mp_size_t qn, rn;
- mpz_t tq, tr;
-
- mpz_init_set (tr, n);
- np = tr->_mp_d;
-
- qn = nn - dn + 1;
-
- if (q)
- {
- mpz_init2 (tq, qn * GMP_LIMB_BITS);
- qp = tq->_mp_d;
- }
- else
- qp = NULL;
-
- mpn_div_qr (qp, np, nn, d->_mp_d, dn);
-
- if (qp)
- {
- qn -= (qp[qn-1] == 0);
-
- tq->_mp_size = qs < 0 ? -qn : qn;
- }
- rn = mpn_normalized_size (np, dn);
- tr->_mp_size = ns < 0 ? - rn : rn;
-
- if (mode == GMP_DIV_FLOOR && qs < 0 && rn != 0)
- {
- if (q)
- mpz_sub_ui (tq, tq, 1);
- if (r)
- mpz_add (tr, tr, d);
- }
- else if (mode == GMP_DIV_CEIL && qs >= 0 && rn != 0)
- {
- if (q)
- mpz_add_ui (tq, tq, 1);
- if (r)
- mpz_sub (tr, tr, d);
- }
-
- if (q)
- {
- mpz_swap (tq, q);
- mpz_clear (tq);
- }
- if (r)
- mpz_swap (tr, r);
-
- mpz_clear (tr);
-
- return rn != 0;
- }
-}
-
-void
-mpn_div_qr (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
-{
- struct gmp_div_inverse inv;
- // mp_ptr tp = NULL;
-
- mpz_t tp;
-
- assert (dn > 0);
- assert (nn >= dn);
-
- mpn_div_qr_invert (&inv, dp, dn);
- if (dn > 2 && inv.shift > 0)
- {
- //tp = gmp_alloc_limbs (dn);
- gmp_assert_nocarry (mpn_lshift (tp->_mp_d, dp, dn, inv.shift));
- dp = tp->_mp_d;
- }
- mpn_div_qr_preinv (qp, np, nn, dp, dn, &inv);
- if (tp) {}
- //gmp_free_limbs (tp, dn);
-}
-
-void
-mpz_addmul (mpz_t r, const mpz_t u, const mpz_t v)
-{
- mpz_t t;
- mpz_init (t);
- mpz_mul (t, u, v);
- mpz_add (r, r, t);
- mpz_clear (t);
-}
-
-void
-mpz_swap (mpz_t u, mpz_t v)
-{
- //MP_SIZE_T_SWAP (u->_mp_alloc, v->_mp_alloc);
- //MPN_PTR_SWAP (u->_mp_d, u->_mp_size, v->_mp_d, v->_mp_size);
-
- mpz_t temp;
- mpz_init(temp);
-
- *temp = *u;
- *u = *v;
- *v = *temp;
-
-}
-
-void
-mpz_mod (mpz_t r, const mpz_t n, const mpz_t d)
-{
- mpz_div_qr (NULL, r, n, d, d->_mp_size >= 0 ? GMP_DIV_FLOOR : GMP_DIV_CEIL);
-}
-
-void
-mpn_div_qr_1_invert (struct gmp_div_inverse *inv, mp_limb_t d)
-{
- unsigned shift;
-
- assert (d > 0);
- gmp_clz (shift, d);
- inv->shift = shift;
- inv->d1 = d << shift;
- inv->di = mpn_invert_limb (inv->d1);
-}
-
-void
-mpn_div_qr_2_invert (struct gmp_div_inverse *inv,
- mp_limb_t d1, mp_limb_t d0)
-{
- unsigned shift;
-
- assert (d1 > 0);
- gmp_clz (shift, d1);
- inv->shift = shift;
- if (shift > 0)
- {
- d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift));
- d0 <<= shift;
- }
- inv->d1 = d1;
- inv->d0 = d0;
- inv->di = mpn_invert_3by2 (d1, d0);
-}
-
-void
-mpn_div_qr_invert (struct gmp_div_inverse *inv,
- mp_srcptr dp, mp_size_t dn)
-{
- assert (dn > 0);
-
- if (dn == 1)
- mpn_div_qr_1_invert (inv, dp[0]);
- else if (dn == 2)
- mpn_div_qr_2_invert (inv, dp[1], dp[0]);
- else
- {
- unsigned shift;
- mp_limb_t d1, d0;
-
- d1 = dp[dn-1];
- d0 = dp[dn-2];
- assert (d1 > 0);
- gmp_clz (shift, d1);
- inv->shift = shift;
- if (shift > 0)
- {
- d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift));
- d0 = (d0 << shift) | (dp[dn-3] >> (GMP_LIMB_BITS - shift));
- }
- inv->d1 = d1;
- inv->d0 = d0;
- inv->di = mpn_invert_3by2 (d1, d0);
- }
-}
-
-
-int
-mpz_cmp_ui (const mpz_t u, unsigned long v)
-{
- mp_size_t usize = u->_mp_size;
-
- if (usize < 0)
- return -1;
- else
- return mpz_cmpabs_ui (u, v);
-}
-
-int
-mpn_cmp (mp_srcptr ap, mp_srcptr bp, mp_size_t n)
-{
- while (--n >= 0)
- {
- if (ap[n] != bp[n])
- return ap[n] > bp[n] ? 1 : -1;
- }
- return 0;
-}
-
-mp_limb_t
-mpn_lshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
-{
- mp_limb_t high_limb, low_limb;
- unsigned int tnc;
- mp_limb_t retval;
-
- assert (n >= 1);
- assert (cnt >= 1);
- assert (cnt < GMP_LIMB_BITS);
-
- up += n;
- rp += n;
-
- tnc = GMP_LIMB_BITS - cnt;
- low_limb = *--up;
- retval = low_limb >> tnc;
- high_limb = (low_limb << cnt);
-
- while (--n != 0)
- {
- low_limb = *--up;
- *--rp = high_limb | (low_limb >> tnc);
- high_limb = (low_limb << cnt);
- }
- *--rp = high_limb;
-
- return retval;
-}
-
-mp_limb_t
-mpn_rshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
-{
- mp_limb_t high_limb, low_limb;
- unsigned int tnc;
- mp_limb_t retval;
-
- assert (n >= 1);
- assert (cnt >= 1);
- assert (cnt < GMP_LIMB_BITS);
-
- tnc = GMP_LIMB_BITS - cnt;
- high_limb = *up++;
- retval = (high_limb << tnc);
- low_limb = high_limb >> cnt;
-
- while (--n != 0)
- {
- high_limb = *up++;
- *rp++ = low_limb | (high_limb << tnc);
- low_limb = high_limb >> cnt;
- }
- *rp = low_limb;
-
- return retval;
-}
-
-int
-mpz_invert (mpz_t r, const mpz_t u, const mpz_t m)
-{
- mpz_t g, tr;
- int invertible;
-
- if (u->_mp_size == 0 || mpz_cmpabs_ui (m, 1) <= 0)
- return 0;
-
- mpz_init (g);
- mpz_init (tr);
-
- mpz_gcdext (g, tr, NULL, u, m);
- invertible = (mpz_cmp_ui (g, 1) == 0);
-
- if (invertible)
- {
- if (tr->_mp_size < 0)
- {
- if (m->_mp_size >= 0)
- mpz_add (tr, tr, m);
- else
- mpz_sub (tr, tr, m);
- }
- mpz_swap (r, tr);
- }
-
- mpz_clear (g);
- mpz_clear (tr);
- return invertible;
-}
-
-/* Not matching current public gmp interface, rather corresponding to
- the sbpi1_div_* functions. */
-mp_limb_t
-mpn_div_qr_1_preinv (mp_ptr qp, mp_srcptr np, mp_size_t nn,
- const struct gmp_div_inverse *inv)
-{
- mp_limb_t d, di;
- mp_limb_t r;
- mp_ptr tp = NULL;
- mp_size_t tn = 0;
-
- if (inv->shift > 0)
- {
- /* Shift, reusing qp area if possible. In-place shift if qp == np. */
- tp = qp;
- if (!tp)
- {
- tn = nn;
-
- // tp = gmp_alloc_limbs (tn);
- }
- r = mpn_lshift (tp, np, nn, inv->shift);
- np = tp;
- }
- else
- r = 0;
-
- d = inv->d1;
- di = inv->di;
- while (--nn >= 0)
- {
- mp_limb_t q;
-
- gmp_udiv_qrnnd_preinv (q, r, r, np[nn], d, di);
- if (qp)
- qp[nn] = q;
- }
- //if (tn)
- //gmp_free_limbs (tp, tn);
-
- return r >> inv->shift;
-}
-
-mp_limb_t
-mpn_add_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
-{
- mp_size_t i;
- mp_limb_t cy;
-
- for (i = 0, cy = 0; i < n; i++)
- {
- mp_limb_t a, b, r;
- a = ap[i]; b = bp[i];
- r = a + cy;
- cy = (r < cy);
- r += b;
- cy += (r < b);
- rp[i] = r;
- }
- return cy;
-}
-
-void
-mpn_div_qr_2_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn,
- const struct gmp_div_inverse *inv)
-{
- unsigned shift;
- mp_size_t i;
- mp_limb_t d1, d0, di, r1, r0;
-
- assert (nn >= 2);
- shift = inv->shift;
- d1 = inv->d1;
- d0 = inv->d0;
- di = inv->di;
-
- if (shift > 0)
- r1 = mpn_lshift (np, np, nn, shift);
- else
- r1 = 0;
-
- r0 = np[nn - 1];
-
- i = nn - 2;
- do
- {
- mp_limb_t n0, q;
- n0 = np[i];
- gmp_udiv_qr_3by2 (q, r1, r0, r1, r0, n0, d1, d0, di);
-
- if (qp)
- qp[i] = q;
- }
- while (--i >= 0);
-
- if (shift > 0)
- {
- assert ((r0 & (GMP_LIMB_MAX >> (GMP_LIMB_BITS - shift))) == 0);
- r0 = (r0 >> shift) | (r1 << (GMP_LIMB_BITS - shift));
- r1 >>= shift;
- }
-
- np[1] = r1;
- np[0] = r0;
-}
-
-mp_limb_t
-mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
-{
- mp_limb_t ul, cl, hpl, lpl, rl;
-
- assert (n >= 1);
-
- cl = 0;
- do
- {
- ul = *up++;
- gmp_umul_ppmm (hpl, lpl, ul, vl);
-
- lpl += cl;
- cl = (lpl < cl) + hpl;
-
- rl = *rp;
- lpl = rl - lpl;
- cl += lpl > rl;
- *rp++ = lpl;
- }
- while (--n != 0);
-
- return cl;
-}
-
-void
-mpn_div_qr_pi1 (mp_ptr qp,
- mp_ptr np, mp_size_t nn, mp_limb_t n1,
- mp_srcptr dp, mp_size_t dn,
- mp_limb_t dinv)
-{
- mp_size_t i;
-
- mp_limb_t d1, d0;
- mp_limb_t cy, cy1;
- mp_limb_t q;
-
- assert (dn > 2);
- assert (nn >= dn);
-
- d1 = dp[dn - 1];
- d0 = dp[dn - 2];
-
- assert ((d1 & GMP_LIMB_HIGHBIT) != 0);
- /* Iteration variable is the index of the q limb.
- *
- * We divide <n1, np[dn-1+i], np[dn-2+i], np[dn-3+i],..., np[i]>
- * by <d1, d0, dp[dn-3], ..., dp[0] >
- */
-
- i = nn - dn;
- do
- {
- mp_limb_t n0 = np[dn-1+i];
-
- if (n1 == d1 && n0 == d0)
- {
- q = GMP_LIMB_MAX;
- mpn_submul_1 (np+i, dp, dn, q);
- n1 = np[dn-1+i]; /* update n1, last loop's value will now be invalid */
- }
- else
- {
- gmp_udiv_qr_3by2 (q, n1, n0, n1, n0, np[dn-2+i], d1, d0, dinv);
-
- cy = mpn_submul_1 (np + i, dp, dn-2, q);
-
- cy1 = n0 < cy;
- n0 = n0 - cy;
- cy = n1 < cy1;
- n1 = n1 - cy1;
- np[dn-2+i] = n0;
-
- if (cy != 0)
- {
- n1 += d1 + mpn_add_n (np + i, np + i, dp, dn - 1);
- q--;
- }
- }
-
- if (qp)
- qp[i] = q;
- }
- while (--i >= 0);
-
- np[dn - 1] = n1;
-}
-
-void
-mpn_div_qr_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn,
- mp_srcptr dp, mp_size_t dn,
- const struct gmp_div_inverse *inv)
-{
- assert (dn > 0);
- assert (nn >= dn);
-
- if (dn == 1)
- np[0] = mpn_div_qr_1_preinv (qp, np, nn, inv);
- else if (dn == 2)
- mpn_div_qr_2_preinv (qp, np, nn, inv);
- else
- {
- mp_limb_t nh;
- unsigned shift;
-
- assert (inv->d1 == dp[dn-1]);
- assert (inv->d0 == dp[dn-2]);
- assert ((inv->d1 & GMP_LIMB_HIGHBIT) != 0);
-
- shift = inv->shift;
- if (shift > 0)
- nh = mpn_lshift (np, np, nn, shift);
- else
- nh = 0;
-
- mpn_div_qr_pi1 (qp, np, nn, nh, dp, dn, inv->di);
-
- if (shift > 0)
- gmp_assert_nocarry (mpn_rshift (np, np, dn, shift));
- }
-}
-
-void
-mpz_powm (mpz_t r, const mpz_t b, const mpz_t e, const mpz_t m)
-{
- mpz_t tr;
- mpz_t base;
- mp_size_t en, mn;
- mp_srcptr mp;
- struct gmp_div_inverse minv;
- unsigned shift;
- //mp_ptr tp = NULL;
- mpz_t tp;
-
- //mpz_init(tp);
-
- en = GMP_ABS (e->_mp_size);
- mn = GMP_ABS (m->_mp_size);
- if (mn == 0) {}
- //gmp_die ("mpz_powm: Zero modulo.");
-
- if (en == 0)
- {
- mpz_set_ui (r, mpz_cmpabs_ui (m, 1));
- return;
- }
-
- mp = m->_mp_d;
- mpn_div_qr_invert (&minv, mp, mn);
- shift = minv.shift;
-
- if (shift > 0)
- {
- /* To avoid shifts, we do all our reductions, except the final
- one, using a *normalized* m. */
- minv.shift = 0;
-
- // tp = gmp_alloc_limbs (mn);
- gmp_assert_nocarry (mpn_lshift (tp->_mp_d, mp, mn, shift));
- mp = tp->_mp_d;
- }
-
- mpz_init (base);
-
- if (e->_mp_size < 0)
- {
- if (!mpz_invert (base, b, m)) {}
- //gmp_die ("mpz_powm: Negative exponent and non-invertible base.");
- }
- else
- {
- mp_size_t bn;
- mpz_abs (base, b);
-
- bn = base->_mp_size;
- if (bn >= mn)
- {
- mpn_div_qr_preinv (NULL, base->_mp_d, base->_mp_size, mp, mn, &minv);
- bn = mn;
- }
-
- /* We have reduced the absolute value. Now take care of the
- sign. Note that we get zero represented non-canonically as
- m. */
- if (b->_mp_size < 0)
- {
- mp_ptr bp = MPZ_REALLOC (base, mn);
- gmp_assert_nocarry (mpn_sub (bp, mp, mn, bp, bn));
- bn = mn;
- }
- base->_mp_size = mpn_normalized_size (base->_mp_d, bn);
- }
- mpz_init_set_ui (tr, 1);
-
- while (--en >= 0)
- {
- mp_limb_t w = e->_mp_d[en];
- mp_limb_t bit;
-
- bit = GMP_LIMB_HIGHBIT;
- do
- {
- mpz_mul (tr, tr, tr);
- if (w & bit)
- mpz_mul (tr, tr, base);
- if (tr->_mp_size > mn)
- {
- mpn_div_qr_preinv (NULL, tr->_mp_d, tr->_mp_size, mp, mn, &minv);
- tr->_mp_size = mpn_normalized_size (tr->_mp_d, mn);
- }
- bit >>= 1;
- }
- while (bit > 0);
- }
-
- /* Final reduction */
- if (tr->_mp_size >= mn)
- {
- minv.shift = shift;
- mpn_div_qr_preinv (NULL, tr->_mp_d, tr->_mp_size, mp, mn, &minv);
- tr->_mp_size = mpn_normalized_size (tr->_mp_d, mn);
- }
- //if (tp)
- //gmp_free_limbs (tp, mn);
-
- mpz_swap (r, tr);
- mpz_clear (tr);
- mpz_clear (base);
-}
-
-int
-mpn_cmp4 (mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
-{
- if (an != bn)
- return an < bn ? -1 : 1;
- else
- return mpn_cmp (ap, bp, an);
-}
-
-
-mp_size_t
-mpz_abs_sub (mpz_t r, const mpz_t a, const mpz_t b)
-{
- mp_size_t an = GMP_ABS (a->_mp_size);
- mp_size_t bn = GMP_ABS (b->_mp_size);
- int cmp;
- mp_ptr rp;
-
- cmp = mpn_cmp4 (a->_mp_d, an, b->_mp_d, bn);
- if (cmp > 0)
- {
- rp = MPZ_REALLOC (r, an);
- gmp_assert_nocarry (mpn_sub (rp, a->_mp_d, an, b->_mp_d, bn));
- return mpn_normalized_size (rp, an);
- }
- else if (cmp < 0)
- {
- rp = MPZ_REALLOC (r, bn);
- gmp_assert_nocarry (mpn_sub (rp, b->_mp_d, bn, a->_mp_d, an));
- return -mpn_normalized_size (rp, bn);
- }
- else
- return 0;
-}
-
-mp_limb_t
-mpn_add_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b)
-{
- mp_size_t i;
-
- assert (n > 0);
- i = 0;
- do
- {
- mp_limb_t r = ap[i] + b;
- /* Carry out */
- b = (r < b);
- rp[i] = r;
- }
- while (++i < n);
-
- return b;
-}
-
-
-mp_limb_t
-mpn_add (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
-{
- mp_limb_t cy;
-
- assert (an >= bn);
-
- cy = mpn_add_n (rp, ap, bp, bn);
- if (an > bn)
- cy = mpn_add_1 (rp + bn, ap + bn, an - bn, cy);
- return cy;
-}
-
-mp_size_t
-mpz_abs_add (mpz_t r, const mpz_t a, const mpz_t b)
-{
- mp_size_t an = GMP_ABS (a->_mp_size);
- mp_size_t bn = GMP_ABS (b->_mp_size);
- mp_ptr rp;
- mp_limb_t cy;
-
- if (an < bn)
- {
- MPZ_SRCPTR_SWAP (a, b);
- MP_SIZE_T_SWAP (an, bn);
- }
-
- rp = MPZ_REALLOC (r, an + 1);
- cy = mpn_add (rp, a->_mp_d, an, b->_mp_d, bn);
-
- rp[an] = cy;
-
- return an + cy;
-}
-
-void
-mpz_sub (mpz_t r, const mpz_t a, const mpz_t b)
-{
- mp_size_t rn;
-
- if ( (a->_mp_size ^ b->_mp_size) >= 0)
- rn = mpz_abs_sub (r, a, b);
- else
- rn = mpz_abs_add (r, a, b);
-
- r->_mp_size = a->_mp_size >= 0 ? rn : - rn;
-}
-
-mp_limb_t
-mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
-{
- mp_limb_t ul, cl, hpl, lpl, rl;
-
- assert (n >= 1);
-
- cl = 0;
- do
- {
- ul = *up++;
- gmp_umul_ppmm (hpl, lpl, ul, vl);
-
- lpl += cl;
- cl = (lpl < cl) + hpl;
-
- rl = *rp;
- lpl = rl + lpl;
- cl += lpl < rl;
- *rp++ = lpl;
- }
- while (--n != 0);
-
- return cl;
-}
-
-mp_limb_t
-mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
-{
- mp_limb_t ul, cl, hpl, lpl;
-
- assert (n >= 1);
-
- cl = 0;
- do
- {
- ul = *up++;
- gmp_umul_ppmm (hpl, lpl, ul, vl);
-
- lpl += cl;
- cl = (lpl < cl) + hpl;
-
- *rp++ = lpl;
- }
- while (--n != 0);
-
- return cl;
-}
-
-
-mp_limb_t
-mpn_mul (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
-{
- assert (un >= vn);
- assert (vn >= 1);
- assert (!GMP_MPN_OVERLAP_P(rp, un + vn, up, un));
- assert (!GMP_MPN_OVERLAP_P(rp, un + vn, vp, vn));
-
- /* We first multiply by the low order limb. This result can be
- stored, not added, to rp. We also avoid a loop for zeroing this
- way. */
-
- rp[un] = mpn_mul_1 (rp, up, un, vp[0]);
-
- /* Now accumulate the product of up[] and the next higher limb from
- vp[]. */
-
- while (--vn >= 1)
- {
- rp += 1, vp += 1;
- rp[un] = mpn_addmul_1 (rp, up, un, vp[0]);
- }
- return rp[un];
-}
-
-
-void
-mpz_mul (mpz_t r, const mpz_t u, const mpz_t v)
-{
- int sign;
- mp_size_t un, vn, rn;
- mpz_t t;
- mp_ptr tp;
-
- un = u->_mp_size;
- vn = v->_mp_size;
-
- if (un == 0 || vn == 0)
- {
- r->_mp_size = 0;
- return;
- }
-
- sign = (un ^ vn) < 0;
-
- un = GMP_ABS (un);
- vn = GMP_ABS (vn);
-
- mpz_init2 (t, (un + vn) * GMP_LIMB_BITS);
-
- tp = t->_mp_d;
- if (un >= vn)
- mpn_mul (tp, u->_mp_d, un, v->_mp_d, vn);
- else
- mpn_mul (tp, v->_mp_d, vn, u->_mp_d, un);
-
- rn = un + vn;
- rn -= tp[rn-1] == 0;
-
- t->_mp_size = sign ? - rn : rn;
- mpz_swap (r, t);
- mpz_clear (t);
-}
-
-void
-mpn_copyd (mp_ptr d, mp_srcptr s, mp_size_t n)
-{
- while (--n >= 0)
- d[n] = s[n];
-}
-
-void
-mpn_zero (mp_ptr rp, mp_size_t n)
-{
- while (--n >= 0)
- rp[n] = 0;
-}
-
-
-void
-mpz_mul_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bits)
-{
- mp_size_t un, rn;
- mp_size_t limbs;
- unsigned shift;
- mp_ptr rp;
-
- un = GMP_ABS (u->_mp_size);
- if (un == 0)
- {
- r->_mp_size = 0;
- return;
- }
-
- limbs = bits / GMP_LIMB_BITS;
- shift = bits % GMP_LIMB_BITS;
-
- rn = un + limbs + (shift > 0);
- rp = MPZ_REALLOC (r, rn);
- if (shift > 0)
- {
- mp_limb_t cy = mpn_lshift (rp + limbs, u->_mp_d, un, shift);
- rp[rn-1] = cy;
- rn -= (cy == 0);
- }
- else
- mpn_copyd (rp + limbs, u->_mp_d, un);
-
- mpn_zero (rp, limbs);
-
- r->_mp_size = (u->_mp_size < 0) ? - rn : rn;
-}
-
-int
-mpn_zero_p(mp_srcptr rp, mp_size_t n)
-{
- return mpn_normalized_size (rp, n) == 0;
-}
-
-
-void
-mpz_div_q_2exp (mpz_t q, const mpz_t u, mp_bitcnt_t bit_index,
- enum mpz_div_round_mode mode)
-{
- mp_size_t un, qn;
- mp_size_t limb_cnt;
- mp_ptr qp;
- int adjust;
-
- un = u->_mp_size;
- if (un == 0)
- {
- q->_mp_size = 0;
- return;
- }
- limb_cnt = bit_index / GMP_LIMB_BITS;
- qn = GMP_ABS (un) - limb_cnt;
- bit_index %= GMP_LIMB_BITS;
-
- if (mode == ((un > 0) ? GMP_DIV_CEIL : GMP_DIV_FLOOR)) /* un != 0 here. */
- /* Note: Below, the final indexing at limb_cnt is valid because at
- that point we have qn > 0. */
- adjust = (qn <= 0
- || !mpn_zero_p (u->_mp_d, limb_cnt)
- || (u->_mp_d[limb_cnt]
- & (((mp_limb_t) 1 << bit_index) - 1)));
- else
- adjust = 0;
-
- if (qn <= 0)
- qn = 0;
- else
- {
- qp = MPZ_REALLOC (q, qn);
-
- if (bit_index != 0)
- {
- mpn_rshift (qp, u->_mp_d + limb_cnt, qn, bit_index);
- qn -= qp[qn - 1] == 0;
- }
- else
- {
- mpn_copyi (qp, u->_mp_d + limb_cnt, qn);
- }
- }
-
- q->_mp_size = qn;
-
- if (adjust)
- mpz_add_ui (q, q, 1);
- if (un < 0)
- mpz_neg (q, q);
-}
-
-void
-mpz_tdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
-{
- mpz_div_q_2exp (r, u, cnt, GMP_DIV_TRUNC);
-}
-
-int
-mpz_cmp (const mpz_t a, const mpz_t b)
-{
- mp_size_t asize = a->_mp_size;
- mp_size_t bsize = b->_mp_size;
-
- if (asize != bsize)
- return (asize < bsize) ? -1 : 1;
- else if (asize >= 0)
- return mpn_cmp (a->_mp_d, b->_mp_d, asize);
- else
- return mpn_cmp (b->_mp_d, a->_mp_d, -asize);
-}
-
-void
-mpz_add (mpz_t r, const mpz_t a, const mpz_t b)
-{
- mp_size_t rn;
-
- if ( (a->_mp_size ^ b->_mp_size) >= 0)
- rn = mpz_abs_add (r, a, b);
- else
- rn = mpz_abs_sub (r, a, b);
-
- r->_mp_size = a->_mp_size >= 0 ? rn : - rn;
-}
-
-
-int
-mpz_tstbit (const mpz_t d, mp_bitcnt_t bit_index)
-{
- mp_size_t limb_index;
- unsigned shift;
- mp_size_t ds;
- mp_size_t dn;
- mp_limb_t w;
- int bit;
-
- ds = d->_mp_size;
- dn = GMP_ABS (ds);
- limb_index = bit_index / GMP_LIMB_BITS;
- if (limb_index >= dn)
- return ds < 0;
-
- shift = bit_index % GMP_LIMB_BITS;
- w = d->_mp_d[limb_index];
- bit = (w >> shift) & 1;
-
- if (ds < 0)
- {
- /* d < 0. Check if any of the bits below is set: If so, our bit
- must be complemented. */
- if (shift > 0 && (mp_limb_t) (w << (GMP_LIMB_BITS - shift)) > 0)
- return bit ^ 1;
- while (--limb_index >= 0)
- if (d->_mp_d[limb_index] > 0)
- return bit ^ 1;
- }
- return bit;
-}
-
-mp_bitcnt_t
-mpn_limb_size_in_base_2 (mp_limb_t u)
-{
- unsigned shift;
-
- assert (u > 0);
- gmp_clz (shift, u);
- return GMP_LIMB_BITS - shift;
-}
-
-size_t
-mpz_sizeinbase (const mpz_t u, int base)
-{
- mp_size_t un, tn;
- mp_srcptr up;
- //mp_ptr tp;
- mpz_t tp;
-
- mp_bitcnt_t bits;
- struct gmp_div_inverse bi;
- size_t ndigits;
-
- mpz_init(tp);
-
- assert (base >= 2);
- assert (base <= 62);
-
- un = GMP_ABS (u->_mp_size);
- if (un == 0)
- return 1;
-
- up = u->_mp_d;
-
- bits = (un - 1) * GMP_LIMB_BITS + mpn_limb_size_in_base_2 (up[un-1]);
- switch (base)
- {
- case 2:
- return bits;
- case 4:
- return (bits + 1) / 2;
- case 8:
- return (bits + 2) / 3;
- case 16:
- return (bits + 3) / 4;
- case 32:
- return (bits + 4) / 5;
- /* FIXME: Do something more clever for the common case of base
- 10. */
- }
-
- //tp = gmp_alloc_limbs (un);
-
- mpn_copyi (tp->_mp_d, up, un);
- mpn_div_qr_1_invert (&bi, base);
-
- tn = un;
- ndigits = 0;
- do
- {
- ndigits++;
- mpn_div_qr_1_preinv (tp->_mp_d, tp->_mp_d, tn, &bi);
- tn -= (tp->_mp_d[tn-1] == 0);
- }
- while (tn > 0);
-
- //gmp_free_limbs (tp, un);
- return ndigits;
-}
-
-int
-mpz_sgn (const mpz_t u)
-{
- return GMP_CMP (u->_mp_size, 0);
-}
-
-mp_bitcnt_t
-mpn_common_scan (mp_limb_t limb, mp_size_t i, mp_srcptr up, mp_size_t un,
- mp_limb_t ux)
-{
- unsigned cnt;
-
- assert (ux == 0 || ux == GMP_LIMB_MAX);
- assert (0 <= i && i <= un );
-
- while (limb == 0)
- {
- i++;
- if (i == un)
- return (ux == 0 ? ~(mp_bitcnt_t) 0 : un * GMP_LIMB_BITS);
- limb = ux ^ up[i];
- }
- gmp_ctz (cnt, limb);
- return (mp_bitcnt_t) i * GMP_LIMB_BITS + cnt;
-}
-
-void
-mpz_abs (mpz_t r, const mpz_t u)
-{
- mpz_set (r, u);
- r->_mp_size = GMP_ABS (r->_mp_size);
-}
-
-
-mp_bitcnt_t
-mpn_scan1 (mp_srcptr ptr, mp_bitcnt_t bit)
-{
- mp_size_t i;
- i = bit / GMP_LIMB_BITS;
-
- return mpn_common_scan ( ptr[i] & (GMP_LIMB_MAX << (bit % GMP_LIMB_BITS)),
- i, ptr, i, 0);
-}
-
-mp_bitcnt_t
-mpz_scan1 (mpz_t u, mp_bitcnt_t starting_bit)
-{
- mp_ptr up;
- mp_size_t us, un, i;
- mp_limb_t limb, ux;
-
- us = u->_mp_size;
- un = GMP_ABS (us);
- i = starting_bit / GMP_LIMB_BITS;
-
- /* Past the end there's no 1 bits for u>=0, or an immediate 1 bit
- for u<0. Notice this test picks up any u==0 too. */
- if (i >= un)
- return (us >= 0 ? ~(mp_bitcnt_t) 0 : starting_bit);
-
- up = u->_mp_d;
- ux = 0;
- limb = up[i];
-
- if (starting_bit != 0)
- {
- if (us < 0)
- {
- ux = mpn_zero_p (up, i);
- limb = ~ limb + ux;
- ux = - (mp_limb_t) (limb >= ux);
- }
-
- /* Mask to 0 all bits before starting_bit, thus ignoring them. */
- limb &= GMP_LIMB_MAX << (starting_bit % GMP_LIMB_BITS);
- }
-
- return mpn_common_scan (limb, i, up, un, ux);
-}
-
-
-mp_bitcnt_t
-mpz_make_odd (mpz_t r)
-{
- mp_bitcnt_t shift;
-
- assert (r->_mp_size > 0);
- /* Count trailing zeros, equivalent to mpn_scan1, because we know that there is a 1 */
- shift = mpn_scan1 (r->_mp_d, 0);
- mpz_tdiv_q_2exp (r, r, shift);
-
- return shift;
-}
-
-void
-mpz_tdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d)
-{
- mpz_div_qr (q, r, n, d, GMP_DIV_TRUNC);
-}
-
-void
-mpz_abs_add_bit (mpz_t d, mp_bitcnt_t bit_index)
-{
- mp_size_t dn, limb_index;
- mp_limb_t bit;
- mp_ptr dp;
-
- dn = GMP_ABS (d->_mp_size);
-
- limb_index = bit_index / GMP_LIMB_BITS;
- bit = (mp_limb_t) 1 << (bit_index % GMP_LIMB_BITS);
-
- if (limb_index >= dn)
- {
- mp_size_t i;
- /* The bit should be set outside of the end of the number.
- We have to increase the size of the number. */
- dp = MPZ_REALLOC (d, limb_index + 1);
-
- dp[limb_index] = bit;
- for (i = dn; i < limb_index; i++)
- dp[i] = 0;
- dn = limb_index + 1;
- }
- else
- {
- mp_limb_t cy;
-
- dp = d->_mp_d;
-
- cy = mpn_add_1 (dp + limb_index, dp + limb_index, dn - limb_index, bit);
- if (cy > 0)
- {
- dp = MPZ_REALLOC (d, dn + 1);
- dp[dn++] = cy;
- }
- }
-
- d->_mp_size = (d->_mp_size < 0) ? - dn : dn;
-}
-
-void
-mpz_abs_sub_bit (mpz_t d, mp_bitcnt_t bit_index)
-{
- mp_size_t dn, limb_index;
- mp_ptr dp;
- mp_limb_t bit;
-
- dn = GMP_ABS (d->_mp_size);
- dp = d->_mp_d;
-
- limb_index = bit_index / GMP_LIMB_BITS;
- bit = (mp_limb_t) 1 << (bit_index % GMP_LIMB_BITS);
-
- assert (limb_index < dn);
-
- gmp_assert_nocarry (mpn_sub_1 (dp + limb_index, dp + limb_index,
- dn - limb_index, bit));
- dn = mpn_normalized_size (dp, dn);
- d->_mp_size = (d->_mp_size < 0) ? - dn : dn;
-}
-
-void
-mpz_setbit (mpz_t d, mp_bitcnt_t bit_index)
-{
- if (!mpz_tstbit (d, bit_index))
- {
- if (d->_mp_size >= 0)
- mpz_abs_add_bit (d, bit_index);
- else
- mpz_abs_sub_bit (d, bit_index);
- }
-}
-
-void
-mpz_divexact (mpz_t q, const mpz_t n, const mpz_t d)
-{
- gmp_assert_nocarry (mpz_div_qr (q, NULL, n, d, GMP_DIV_TRUNC));
-}
-
-#define mpz_odd_p(z) (((z)->_mp_size != 0) & (int) (z)->_mp_d[0])
-#define mpz_even_p(z) (! mpz_odd_p (z))
-
-int
-mpz_cmpabs (const mpz_t u, const mpz_t v)
-{
- return mpn_cmp4 (u->_mp_d, GMP_ABS (u->_mp_size),
- v->_mp_d, GMP_ABS (v->_mp_size));
-}
-
-void
-mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v)
-{
- mpz_t tu, tv, s0, s1, t0, t1;
- mp_bitcnt_t uz, vz, gz;
- mp_bitcnt_t power;
-
- if (u->_mp_size == 0)
- {
- /* g = 0 u + sgn(v) v */
- signed long sign = mpz_sgn (v);
- mpz_abs (g, v);
- if (s)
- s->_mp_size = 0;
- if (t)
- mpz_set_si (t, sign);
- return;
- }
-
- if (v->_mp_size == 0)
- {
- /* g = sgn(u) u + 0 v */
- signed long sign = mpz_sgn (u);
- mpz_abs (g, u);
- if (s)
- mpz_set_si (s, sign);
- if (t)
- t->_mp_size = 0;
- return;
- }
-
- mpz_init (tu);
- mpz_init (tv);
- mpz_init (s0);
- mpz_init (s1);
- mpz_init (t0);
- mpz_init (t1);
-
- mpz_abs (tu, u);
- uz = mpz_make_odd (tu);
- mpz_abs (tv, v);
- vz = mpz_make_odd (tv);
- gz = GMP_MIN (uz, vz);
-
- uz -= gz;
- vz -= gz;
-
- /* Cofactors corresponding to odd gcd. gz handled later. */
- if (tu->_mp_size < tv->_mp_size)
- {
- mpz_swap (tu, tv);
- MPZ_SRCPTR_SWAP (u, v);
- MPZ_PTR_SWAP (s, t);
- MP_BITCNT_T_SWAP (uz, vz);
- }
-
- /* Maintain
- *
- * u = t0 tu + t1 tv
- * v = s0 tu + s1 tv
- *
- * where u and v denote the inputs with common factors of two
- * eliminated, and det (s0, t0; s1, t1) = 2^p. Then
- *
- * 2^p tu = s1 u - t1 v
- * 2^p tv = -s0 u + t0 v
- */
-
- /* After initial division, tu = q tv + tu', we have
- *
- * u = 2^uz (tu' + q tv)
- * v = 2^vz tv
- *
- * or
- *
- * t0 = 2^uz, t1 = 2^uz q
- * s0 = 0, s1 = 2^vz
- */
-
- mpz_tdiv_qr (t1, tu, tu, tv);
- mpz_mul_2exp (t1, t1, uz);
-
- mpz_setbit (s1, vz);
- power = uz + vz;
-
- if (tu->_mp_size > 0)
- {
- mp_bitcnt_t shift;
- shift = mpz_make_odd (tu);
- mpz_setbit (t0, uz + shift);
- power += shift;
-
- for (;;)
- {
- int c;
- c = mpz_cmp (tu, tv);
- if (c == 0)
- break;
-
- if (c < 0)
- {
- /* tv = tv' + tu
- *
- * u = t0 tu + t1 (tv' + tu) = (t0 + t1) tu + t1 tv'
- * v = s0 tu + s1 (tv' + tu) = (s0 + s1) tu + s1 tv' */
-
- mpz_sub (tv, tv, tu);
- mpz_add (t0, t0, t1);
- mpz_add (s0, s0, s1);
-
- shift = mpz_make_odd (tv);
- mpz_mul_2exp (t1, t1, shift);
- mpz_mul_2exp (s1, s1, shift);
- }
- else
- {
- mpz_sub (tu, tu, tv);
- mpz_add (t1, t0, t1);
- mpz_add (s1, s0, s1);
-
- shift = mpz_make_odd (tu);
- mpz_mul_2exp (t0, t0, shift);
- mpz_mul_2exp (s0, s0, shift);
- }
- power += shift;
- }
- }
- else
- mpz_setbit (t0, uz);
-
- /* Now tv = odd part of gcd, and -s0 and t0 are corresponding
- cofactors. */
-
- mpz_mul_2exp (tv, tv, gz);
- mpz_neg (s0, s0);
-
- /* 2^p g = s0 u + t0 v. Eliminate one factor of two at a time. To
- adjust cofactors, we need u / g and v / g */
-
- mpz_divexact (s1, v, tv);
- mpz_abs (s1, s1);
- mpz_divexact (t1, u, tv);
- mpz_abs (t1, t1);
-
- while (power-- > 0)
- {
- /* s0 u + t0 v = (s0 - v/g) u - (t0 + u/g) v */
- if (mpz_odd_p (s0) || mpz_odd_p (t0))
- {
- mpz_sub (s0, s0, s1);
- mpz_add (t0, t0, t1);
- }
- //assert (mpz_even_p (t0) && mpz_even_p (s0));
- mpz_tdiv_q_2exp (s0, s0, 1);
- mpz_tdiv_q_2exp (t0, t0, 1);
- }
-
- /* Arrange so that |s| < |u| / 2g */
- mpz_add (s1, s0, s1);
- if (mpz_cmpabs (s0, s1) > 0)
- {
- mpz_swap (s0, s1);
- mpz_sub (t0, t0, t1);
- }
- if (u->_mp_size < 0)
- mpz_neg (s0, s0);
- if (v->_mp_size < 0)
- mpz_neg (t0, t0);
-
- mpz_swap (g, tv);
- if (s)
- mpz_swap (s, s0);
- if (t)
- mpz_swap (t, t0);
-
- mpz_clear (tu);
- mpz_clear (tv);
- mpz_clear (s0);
- mpz_clear (s1);
- mpz_clear (t0);
- mpz_clear (t1);
-}
-
-
-void
-mpz_addmul_ui (mpz_t r, const mpz_t u, unsigned long int v)
-{
- mpz_t t;
- mpz_init_set_ui (t, v);
- mpz_mul (t, u, t);
- mpz_add (r, r, t);
- mpz_clear (t);
-}
-
-
-// STRING CONVERSION
-
-unsigned
-mpn_base_power_of_two_p (unsigned b)
-{
- switch (b)
- {
- case 2: return 1;
- case 4: return 2;
- case 8: return 3;
- case 16: return 4;
- case 32: return 5;
- case 64: return 6;
- case 128: return 7;
- case 256: return 8;
- default: return 0;
- }
-}
-
-
-
-void
-mpn_get_base_info (struct mpn_base_info *info, mp_limb_t b)
-{
- mp_limb_t m;
- mp_limb_t p;
- unsigned exp;
-
- m = GMP_LIMB_MAX / b;
- for (exp = 1, p = b; p <= m; exp++)
- p *= b;
-
- info->exp = exp;
- info->bb = p;
-}
-
-int isspace_gpu(unsigned char c) {
- if (c == '\n' || c == ' ' || c == '\t' || c == '\r' || c == '\f' || c == '\v')
- return 1;
- return 0;
-}
-
-int strlen_c(const char *c) {
-
- // rather naive implementation – we assume a string is terminated, and is not 0 characters long.
-
- int i = 0;
- while (1) {
- if (c[i] == '\0')
- return i;
- i++;
- }
- return i;
-}
-
-mp_size_t
-mpn_set_str_bits (mp_ptr rp, const unsigned char *sp, size_t sn,
- unsigned bits)
-{
- mp_size_t rn;
- mp_limb_t limb;
- unsigned shift;
-
- for (limb = 0, rn = 0, shift = 0; sn-- > 0; )
- {
- limb |= (mp_limb_t) sp[sn] << shift;
- shift += bits;
- if (shift >= GMP_LIMB_BITS)
- {
- shift -= GMP_LIMB_BITS;
- rp[rn++] = limb;
- /* Next line is correct also if shift == 0,
- bits == 8, and mp_limb_t == unsigned char. */
- limb = (unsigned int) sp[sn] >> (bits - shift);
- }
- }
- if (limb != 0)
- rp[rn++] = limb;
- else
- rn = mpn_normalized_size (rp, rn);
- return rn;
-}
-
-mp_size_t
-mpn_set_str_other (mp_ptr rp, const unsigned char *sp, size_t sn,
- mp_limb_t b, const struct mpn_base_info *info)
-{
- mp_size_t rn;
- mp_limb_t w;
- unsigned k;
- size_t j;
-
- assert (sn > 0);
-
- k = 1 + (sn - 1) % info->exp;
-
- j = 0;
- w = sp[j++];
- while (--k != 0)
- w = w * b + sp[j++];
-
- rp[0] = w;
-
- for (rn = 1; j < sn;)
- {
- mp_limb_t cy;
-
- w = sp[j++];
- for (k = 1; k < info->exp; k++)
- w = w * b + sp[j++];
-
- cy = mpn_mul_1 (rp, rp, rn, info->bb);
- cy += mpn_add_1 (rp, rp, rn, w);
- if (cy > 0)
- rp[rn++] = cy;
- }
- assert (j == sn);
-
- return rn;
-}
-
-
-int
-mpz_set_str (mpz_t r, const char *sp, int base)
-{
- unsigned bits, value_of_a;
- mp_size_t rn, alloc;
- mp_ptr rp;
- size_t dn, sn;
- int sign;
- unsigned char dp[4096];
-
- assert (base == 0 || (base >= 2 && base <= 62));
-
- while (isspace_gpu( (unsigned char) *sp))
- sp++;
-
- sign = (*sp == '-');
- sp += sign;
-
- if (base == 0)
- {
- if (sp[0] == '0')
- {
- if (sp[1] == 'x' || sp[1] == 'X')
- {
- base = 16;
- sp += 2;
- }
- else if (sp[1] == 'b' || sp[1] == 'B')
- {
- base = 2;
- sp += 2;
- }
- else
- base = 8;
- }
- else
- base = 10;
- }
-
- if (!*sp)
- {
- r->_mp_size = 0;
- return -1;
- }
- sn = strlen_c(sp);
- //dp = (unsigned char *) gmp_alloc (sn);
-
-
- value_of_a = (base > 36) ? 36 : 10;
- for (dn = 0; *sp; sp++)
- {
- unsigned digit;
-
- if (isspace_gpu ((unsigned char) *sp))
- continue;
- else if (*sp >= '0' && *sp <= '9')
- digit = *sp - '0';
- else if (*sp >= 'a' && *sp <= 'z')
- digit = *sp - 'a' + value_of_a;
- else if (*sp >= 'A' && *sp <= 'Z')
- digit = *sp - 'A' + 10;
- else
- digit = base; /* fail */
-
- if (digit >= (unsigned) base)
- {
- //gmp_free (dp, sn);
- r->_mp_size = 0;
- return -1;
- }
-
- dp[dn++] = digit;
- }
-
- if (!dn)
- {
- //gmp_free (dp, sn);
- r->_mp_size = 0;
- return -1;
- }
- bits = mpn_base_power_of_two_p (base);
-
- if (bits > 0)
- {
- alloc = (dn * bits + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS;
- rp = MPZ_REALLOC (r, alloc);
- rn = mpn_set_str_bits (rp, dp, dn, bits);
- }
- else
- {
- struct mpn_base_info info;
- mpn_get_base_info (&info, base);
- alloc = (dn + info.exp - 1) / info.exp;
- rp = MPZ_REALLOC (r, alloc);
- rn = mpn_set_str_other (rp, dp, dn, base, &info);
- /* Normalization, needed for all-zero input. */
- assert (rn > 0);
- rn -= rp[rn-1] == 0;
- }
- assert (rn <= alloc);
- //gmp_free (dp, sn);
-
- r->_mp_size = sign ? - rn : rn;
-
- return 0;
-}
-
-
-
-int
-mpz_init_set_str (mpz_t r, const char *sp, int base)
-{
- mpz_init (r);
- return mpz_set_str (r, sp, base);
-}
-
-
-// Montgomery multiplication
-
-void mont_prepare(mpz_t b, mpz_t e, mpz_t m,
- mpz_t r, mpz_t r_1,
- mpz_t ni, mpz_t M, mpz_t x
- );
-
-void mont_product(mpz_t ret,
- const mpz_t a, const mpz_t b,
- const mpz_t r, const mpz_t r_1,
- const mpz_t n, const mpz_t ni
- );
-
-void mont_modexp(mpz_t ret,
- mpz_t a, mpz_t e,
- const mpz_t M,
- const mpz_t n, const mpz_t ni,
- const mpz_t r, const mpz_t r_1
- );
-
-void mont_finish(mpz_t ret,
- const mpz_t xx,
- const mpz_t n, const mpz_t ni,
- const mpz_t r, const mpz_t r_1
- );
-
-void mont_prepare_even_modulus(mpz_t m, mpz_t q, mpz_t powj);
-
-void mont_mulmod(mpz_t res, const mpz_t a, const mpz_t b, const mpz_t mod);
-
-
-
-
-void mont_prepare_even_modulus(mpz_t m, mpz_t q, mpz_t powj) {
-
- mpz_t two; // powj == 2^j
-
- mpz_init_set_ui(two, 2);
-
- mp_bitcnt_t j = mpz_scan1(m, 0);
-
- mpz_tdiv_q_2exp(q,m,j);
- mpz_mul_2exp(powj,two,j - 1);
-
- mpz_clear(two);
-
-}
-
-// CPU
-void mont_prepare(mpz_t b, mpz_t e, mpz_t m,
- mpz_t r, mpz_t r_1,
- mpz_t ni, mpz_t M, mpz_t x) {
-
- // MARK: break this up, reduce the amount of temporary variables
-
- // r and n (modulus) must be relatively prime (this is a given if n (modulus) is odd)
-
- // calculate r, which must be larger than the modulo and also a power of 2
-
- mpz_t one, oo; // some helper variables
- mpz_init_set_si(one,1);
- mpz_init_set_si(oo,0);
-
- size_t len = mpz_sizeinbase(m,2);
-
- mpz_mul_2exp(r,one,len);
-
- mpz_set_si(one, 0);
-
-
- mpz_gcdext(one, r_1, ni, r, m); // set r_1 and ni
-
- int sgn = mpz_sgn(r_1);
-
- mpz_abs(r_1, r_1);
- mpz_abs(ni, ni);
-
- if (sgn == -1) {
- mpz_sub(ni, r, ni);
- mpz_sub(r_1, m, r_1);
- }
-
- if (mpz_cmp_ui(one, 1))
- assert(0);
-
- mpz_mul(one, r, r_1);
- mpz_mul(oo,ni,m);
-
- mpz_sub(one, one, oo); // oo must be one
-
- if (mpz_cmp_ui(one, 1))
- assert(0);
-
- mpz_mul(M, b, r);
- mpz_mod(M, M, m); // set M
-
- mpz_mod(x, r, m); // set x
-
- mpz_clear(one);
- mpz_clear(oo);
-
-}
-
-// maybe GPU?
-// MARK: n MUST be an odd number
-void mont_modexp(mpz_t ret,
- mpz_t a, mpz_t e,
- const mpz_t M,
- const mpz_t n, const mpz_t ni,
- const mpz_t r, const mpz_t r_1
- ) {
-
- mpz_t aa,xx;
-
- mpz_init_set(aa, M);
- mpz_init_set(xx, a);
-
- int k = (int)mpz_sizeinbase(e,2);
-
- for (int i = k - 1; i >= 0; i--) {
-
- mont_product(xx, xx, xx, r, r_1, n, ni);
-
- if (mpz_tstbit(e, i))
- mont_product(xx, aa, xx, r, r_1, n, ni);
-
- }
-
- mpz_set(ret, xx);
-
- mpz_clear(aa);
- mpz_clear(xx);
-
-}
-
-void mont_finish(mpz_t ret,
- const mpz_t xx,
- const mpz_t n, const mpz_t ni,
- const mpz_t r, const mpz_t r_1
- ) {
-
-
- mpz_t x,one;
-
- mpz_init(x);
- mpz_init_set_ui(one, 1);
-
- mont_product(x, xx, one, r, r_1, n, ni);
-
- mpz_set(ret, x);
-
- mpz_clear(x);
- mpz_clear(one);
-
-}
-
-
-// GPU
-void mont_product(mpz_t ret,
- const mpz_t a, const mpz_t b,
- const mpz_t r, const mpz_t r_1,
- const mpz_t n, const mpz_t ni
- ) {
-
- mpz_t t,m,u;
-
- mpz_init(t);
- mpz_init(m);
- mpz_init(u);
-
-
-
- mont_mulmod(t, b, a, r);
-
- mont_mulmod(m, ni, t, r);
-
- mpz_t ab,mn;
-
- mpz_init(ab);
- mpz_init(mn);
-
- mpz_mul(ab, a, b);
- mpz_mul(mn, m, n);
-
- mpz_add(ab, ab, mn);
-
- unsigned long sz = mpz_sizeinbase(r,2) - 1;
- mpz_tdiv_q_2exp(u, ab, sz); // this is essentially a bit shift, instead of a division
-
- if (mpz_cmp(u, n) >= 0)
- mpz_sub(u, u, n);
-
- mpz_set(ret, u);
-
- mpz_clear(ab);
- mpz_clear(mn);
- mpz_clear(t);
- mpz_clear(m);
- mpz_clear(u);
-
-}
-
-// not the fastest... but it does not increase the variable sizes
-void mont_mulmod(mpz_t res, const mpz_t a, const mpz_t b, const mpz_t mod) {
-
- mpz_t aa, bb;
- mpz_init_set(aa, a);
- mpz_init_set(bb,b);
-
- mpz_mod(aa, aa, mod); // in case a is bigger
-
- while (mpz_cmp_ui(bb, 0) > 0) {
- if (mpz_odd_p(bb)) {
- mpz_add(res, res, aa);
- mpz_mod(res, res, mod);
- }
-
- mpz_mul_2exp(aa,aa,1);
- mpz_mod(aa, aa, mod);
- mpz_tdiv_q_2exp(bb, bb, 1);
- }
-}
-
-
-
-void montgomery(const char *signature,
- const char *exponent,
- const char *modulus,
- const char *base,
- unsigned long *valid)
-{
-
-
- int radix = 16;
-
- mpz_t b,e,m,res;
-
-
-
- mpz_init(res);
-
- mpz_init_set_str(b,base,radix); // M
- mpz_init_set_str(e,exponent,radix);
- mpz_init_set_str(m,modulus,radix); // n
-
- mpz_t r, r_1, ni, M, x;
- mpz_init(r); // MARK: I think I have to destroy these myself
- mpz_init(r_1);
- mpz_init(ni);
- mpz_init(M);
- mpz_init(x);
-
-
- mpz_t xx;
- mpz_init(xx);
-
-
-
-
- if (mpz_even_p(m)) {
-
- mpz_t bb, x1, x2, q, powj;
- mpz_init(bb);
- mpz_init(x1);
- mpz_init(x2);
- mpz_init(q);
- mpz_init(powj);
-
- mont_prepare_even_modulus(m, q, powj);
-
- // q is uneven, so we can use regular modexp
- // MARK: we can improve the efficiency here by doing simple reductions
-
- mpz_mod(bb, b, q); // reductions like this
-
- mont_prepare(bb, e, q, r, r_1, ni, M, x);
- mont_modexp(xx, x, e, M, q, ni, r, r_1);
- mont_finish(x1, xx, q, ni, r, r_1);
-
-
- // MARK: we can also reduce and really speed this up as well -> binary method?
- mpz_powm(x2, b, e, powj);
-
- mpz_t y, q_1;
- mpz_init(y);
- mpz_init(q_1);
-
- mpz_sub(y, x2, x1);
-
- mpz_invert(q_1, q, powj);
-
- mpz_mul(y, y, q_1);
- mpz_mod(y, y, powj);
-
- mpz_addmul(x1, q, y);
-
- mpz_set(res, x1);
-
-
-
- } else {
-
- mont_prepare(b, e, m, r, r_1, ni, M, x);
-
- mont_modexp(xx, x, e, M, m, ni, r, r_1);
-
- mont_finish(res, xx, m, ni, r, r_1);
-
-
- }
-
-
-
-
-
-
-
- mpz_t sig;
- mpz_init_set_str(sig,signature,radix);
-
- if (mpz_cmp(sig,res) == 0) {
-
- *valid = 1;
-
- } else {
-
- }
-
-
-}
diff --git a/source/gmp_GPU.h b/source/gmp_GPU.h
@@ -1,27 +0,0 @@
-//
-// gmp_GPU.h
-// lib-gpu-verify
-//
-// Created by Cedric Zwahlen on 25.11.2023.
-//
-
-#ifndef gmp_GPU_h
-#define gmp_GPU_h
-
-//#include <stdio.h>
-
-//#include <assert.h>
-#include <ctype.h>
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-
-void montgomery(const char *signature,
- const char *exponent,
- const char *modulus,
- const char *base,
- unsigned long *valid);
-
-#endif /* gmp_GPU_h */
diff --git a/source/lib-gpu-verify.c b/source/lib-gpu-verify.c
@@ -7,23 +7,75 @@
#include "montgomery.h"
+#include "time.h"
+
int main(int argc, char** argv)
{
+ mont_rsa_tests();
+
mpz_t res;
mpz_init(res);
//mont_go(res,"13", "0F", "C7",16);
- //mont_go(res, "5BD6158BDE0AC0655B6FCEA57994011D18B6B3C9E5FF75C45FC1E5EC2C1F26D6AB8547A17C0BC15D40F4346CFE74CF4EB417E6850D45C3B49E9389DAF400BC5E5B3F5D8E1E45A23DD042A87E82703209F9EA9808A002FEC00C96A5F0D9B7673B4B0A224438D81C0A9CEAD0DD22802B409230072768E73688D63EAB1C9BC242FAEDCFE0C8478B38254BAAC07AD6F82A27A0C3893FCB604BB57158F9125027AECC91D55B364B5C2BB9FE07FB6AB69F5A65112A2B7D5A805CA9B2C1CB75D315DE345BA68100DD5E46FA3BA54B614C298E60EBAF95CEC738DA2513736ECE051D153CECAC29F4A432A5FEB287E2A1B8C4640C58FF9E9E7DB6889E4865D1F1C8CF4E47", "010001", "00BB5175E55C2F1BBAE52B0C1225F43385FF54B3BFEA88B42B21044328815B8742E303C843ABE76D147861AE92D563592EFD748BF2E5BE4D76793FB32FCF6B38F755D408D114C9DF89B3FAA77EDF0C9358AC3BC23C90CDAA8337927A3530DCF2AD6EFC023C96A7932F8A7935B9B3F5C84668B41FB39059A1B723A40D59A7B1BD03F56933D641409F2A49E614BBAA9F2573ED24899840585B73329A01071793332BA92A0C9033D7004B45FD01C3A850125FA2E4A40818F8E233B7B7595ABAB04B84AE88E4F7B516359EAB7C285F399A3EFF467113DDBDB17981F2F4F2DE405BA18863046570C1621AD9446CE8A3884893CEF50933CB60053B6862E2443CC8554121", 16);
+ struct timespec t1, t2;
+
+ clock_gettime(CLOCK_REALTIME, &t1);
+
+ mont_go(res, "5BD6158BDE0AC0655B6FCEA57994011D18B6B3C9E5FF75C45FC1E5EC2C1F26D6AB8547A17C0BC15D40F4346CFE74CF4EB417E6850D45C3B49E9389DAF400BC5E5B3F5D8E1E45A23DD042A87E82703209F9EA9808A002FEC00C96A5F0D9B7673B4B0A224438D81C0A9CEAD0DD22802B409230072768E73688D63EAB1C9BC242FAEDCFE0C8478B38254BAAC07AD6F82A27A0C3893FCB604BB57158F9125027AECC91D55B364B5C2BB9FE07FB6AB69F5A65112A2B7D5A805CA9B2C1CB75D315DE345BA68100DD5E46FA3BA54B614C298E60EBAF95CEC738DA2513736ECE051D153CECAC29F4A432A5FEB287E2A1B8C4640C58FF9E9E7DB6889E4865D1F1C8CF4E47", "010001", "00BB5175E55C2F1BBAE52B0C1225F43385FF54B3BFEA88B42B21044328815B8742E303C843ABE76D147861AE92D563592EFD748BF2E5BE4D76793FB32FCF6B38F755D408D114C9DF89B3FAA77EDF0C9358AC3BC23C90CDAA8337927A3530DCF2AD6EFC023C96A7932F8A7935B9B3F5C84668B41FB39059A1B723A40D59A7B1BD03F56933D641409F2A49E614BBAA9F2573ED24899840585B73329A01071793332BA92A0C9033D7004B45FD01C3A850125FA2E4A40818F8E233B7B7595ABAB04B84AE88E4F7B516359EAB7C285F399A3EFF467113DDBDB17981F2F4F2DE405BA18863046570C1621AD9446CE8A3884893CEF50933CB60053B6862E2443CC8554121", 16);
//mont_go(res, "13", "05", "31",10);
- //char str[2048];
- //mpz_get_str(str, 16, res); // result is base 10!
+ char str[2048];
+ mpz_get_str(str, 16, res); // result is base 10!
+
+ printf("%s\n",str);
+
+ clock_gettime(CLOCK_REALTIME, &t2);
+
+
+ printf("\nCPU verification (with montgomery) took %ld ms\n", (t2.tv_nsec - t1.tv_nsec) / 1000000);
+
+
+
+
+ char *template = "(genkey(rsa(nbits 4:2048)))";
+ gcry_sexp_t parms;
+
+ gcry_sexp_new(&parms, template, strlen(template), 1);
+
+
+ gcry_sexp_t key;
+
+ gcry_pk_genkey(&key,parms);
+
+ char *val = "1234567890ABCDEF"; // MARK: try random values as well
+ gcry_mpi_t m_mpi = gcry_mpi_new((int)strlen(val) * 8);
+ size_t scanned = 0;
+
+ gcry_mpi_scan(&m_mpi, GCRYMPI_FMT_HEX, val, 0, &scanned);
+
+ gcry_sexp_t toSign;
+ size_t errOff = 0;
+ char *dataformat = "(data (flags raw) (value %m))";
+
+ gcry_sexp_build(&toSign,&errOff,dataformat,m_mpi);
+
+ gcry_sexp_t resSign;
+
+ gcry_pk_sign(&resSign, toSign, key);
+
+ clock_gettime(CLOCK_REALTIME, &t1);
+
+ int gkvy = gcry_pk_verify(resSign, toSign, key);
- //printf("%s\n",str);
+ clock_gettime(CLOCK_REALTIME, &t2);
+
+ if (gkvy == 0) {
+ printf("\ngcry verification took %ld micro seconds\n", (t2.tv_nsec - t1.tv_nsec) / 1000);
+ }
//mont_go(res, "00956E3E7B09F7FECEF26CA44FFD69F19DC8DB6C3A29A707C2CDAD56994A58D6ACB8B275678D0D8670D3C716AC5C98398C8067943C7292F787F5451E8202F4C8BAEFA6CA787BC79B73A99CC4C85743EC7320E17195D560A380356A9D32AA81EF276A9DE8B9F6728647851AAD0090A458FB928BCE86884BD7CC7AC3CF226CE546E596135A948B820E1865D6A3395DF2BD5EB26FE5259B2B950CC61F887C0D5A81F77549D8F792D32552870358EC5B2B45552C35829D732CC1A08898FD2FFDFF5EBFE0BEE7D5702FCA240B377BFE7D2821E123F2A146725D01A5CF0A6C89FB7E73CA6F3B8640C44B0FA1A51B429BB3D4668495F20A25FB4185831C3B479C5041713C", "010001", "00BB5175E55C2F1BBAE52B0C1225F43385FF54B3BFEA88B42B21044328815B8742E303C843ABE76D147861AE92D563592EFD748BF2E5BE4D76793FB32FCF6B38F755D408D114C9DF89B3FAA77EDF0C9358AC3BC23C90CDAA8337927A3530DCF2AD6EFC023C96A7932F8A7935B9B3F5C84668B41FB39059A1B723A40D59A7B1BD03F56933D641409F2A49E614BBAA9F2573ED24899840585B73329A01071793332BA92A0C9033D7004B45FD01C3A850125FA2E4A40818F8E233B7B7595ABAB04B84AE88E4F7B516359EAB7C285F399A3EFF467113DDBDB17981F2F4F2DE405BA18863046570C1621AD9446CE8A3884893CEF50933CB60053B6862E2443CC8554121", 16);
@@ -39,7 +91,7 @@ int main(int argc, char** argv)
// montgomery_test();
- mont_rsa_tests();
+
diff --git a/source/rsa-test.c b/source/rsa-test.c
@@ -21,7 +21,7 @@
//#include "gmp.h"
- #include "gmp_GPU.h"
+ //#include "gmp_GPU.h"
//
//#include "RSA-Montgomery.h"
@@ -641,12 +641,6 @@ int rsa_tests(void) {
printf("--");
-/*
- generate_random_pairs(q, u,
- r, v,
- s, w,
- t, x, gen_n_pairs);
-*/
unsigned long result = 0;
@@ -680,10 +674,10 @@ int rsa_tests(void) {
}
// returns how many public keys were read – either 1 or n
-int mont_pairs_from_files(char *bases,
- char *exponents,
- char *moduli,
- char *signatures,
+int mont_pairs_from_files(char *bases, unsigned long *b_off,
+ char *exponents, unsigned long *e_off,
+ char *moduli, unsigned long *m_off,
+ char *signatures, unsigned long *s_off,
unsigned int *n) {
FILE *pkfile;
@@ -700,23 +694,37 @@ int mont_pairs_from_files(char *bases,
int i = 0;
+ unsigned long b_offset = 0;
+ unsigned long e_offset = 0;
+ unsigned long m_offset = 0;
+ unsigned long s_offset = 0;
+
while (1) {
- char n_buf[2048];
+ char n_buf[2048]; // need to be 0
char e_buf[2048];
+ memset(n_buf, 0, 2048);
+ memset(e_buf, 0, 2048);
+
if (fscanf(pkfile, "%s %s ", n_buf,e_buf) == -1)
break;
unsigned long n_buf_len = strlen(n_buf);
unsigned long e_buf_len = strlen(e_buf);
- memcpy(moduli, n_buf, n_buf_len);
- memcpy(exponents, e_buf, e_buf_len);
+ memcpy(&moduli[m_offset], n_buf, n_buf_len);
+ memcpy(&exponents[e_offset], e_buf, e_buf_len);
+
+ m_off[i] = m_offset;
+ e_off[i] = e_offset;
+
+ m_offset += n_buf_len + 1;
+ e_offset += e_buf_len + 1;
i++;
- break; // testing with just one
+ // break; // testing with just one
}
int j = 0;
@@ -726,18 +734,27 @@ int mont_pairs_from_files(char *bases,
char m_buf[2048]; // temp storage, large enough
char s_buf[2048];
+ memset(m_buf, 0, 2048);
+ memset(s_buf, 0, 2048);
+
if (fscanf(msfile, "%s %s ", m_buf,s_buf) == -1)
break;
unsigned long m_buf_len = strlen(m_buf);
unsigned long s_buf_len = strlen(s_buf);
- memcpy(bases, m_buf, m_buf_len);
- memcpy(signatures, s_buf, s_buf_len);
+ memcpy(&bases[b_offset], m_buf, m_buf_len);
+ memcpy(&signatures[s_offset], s_buf, s_buf_len);
+
+ b_off[j] = b_offset;
+ s_off[j] = s_offset;
+
+ b_offset += m_buf_len + 1;
+ s_offset += s_buf_len + 1;
j++;
- break; // testing with just one
+ // break; // testing with just one
}
@@ -751,10 +768,10 @@ int mont_pairs_from_files(char *bases,
}
-int mont_verify_pairs_with_opencl(char *bases,
- char *exponents,
- char *moduli,
- char *signatures,
+int mont_verify_pairs_with_opencl(char *bases, unsigned long *b_off,
+ char *exponents, unsigned long *e_off,
+ char *moduli, unsigned long *m_off,
+ char *signatures, unsigned long *s_off,
const unsigned int n,
const unsigned int pks,
unsigned long *result) {
@@ -895,21 +912,13 @@ int mont_verify_pairs_with_opencl(char *bases,
if (err != CL_SUCCESS)
{
size_t len;
- char buffer[352323];
+ char buffer[4096];
printf("Error: Failed to build program executable!\n");
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
printf("%s\n", buffer);
exit(1);
}
-
- size_t len;
- char buffer[3523];
-
- clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
- printf("%s\n", buffer);
-
-
// Create the compute kernel in the program we wish to run
//
@@ -927,43 +936,32 @@ int mont_verify_pairs_with_opencl(char *bases,
cl_mem mod_mem;
cl_mem comp_mem;
-// cl_mem sig_len;
-// cl_mem exp_len;
-// cl_mem mod_len;
-// cl_mem comp_len;
+ cl_mem sig_len;
+ cl_mem exp_len;
+ cl_mem mod_len;
+ cl_mem comp_len;
cl_mem valid; // needs to be a buffer because it goes out
unsigned long signature_is_valid = 0;
- size_t moduli_len = strlen(moduli) + 1;
- size_t exponents_len = strlen(exponents) + 1;
- size_t signatures_len = strlen(signatures) + 1;
- size_t bases_len = strlen(bases) + 1;
-
-
-
+ unsigned long moduli_len = m_off[pks-1] + strlen(&moduli[m_off[pks-1]]) + 1;
+ unsigned long exponents_len = e_off[pks-1] + strlen(&exponents[e_off[pks-1]]) + 1;
+ unsigned long signatures_len = s_off[n-1] + strlen(&signatures[s_off[n-1]]) + 1;
+ unsigned long bases_len = b_off[n-1] + strlen(&bases[b_off[n-1]]) + 1;
- if (pks == 1) {
- mod_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, moduli_len, NULL, NULL);
- exp_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, exponents_len, NULL, NULL);
- } else {
-
- }
+ mod_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, moduli_len, NULL, NULL);
+ exp_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, exponents_len, NULL, NULL);
sig_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, signatures_len , NULL, NULL);
comp_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, bases_len, NULL, NULL); // the base, to compare whether we get the same signature
-//
-// if (pks == 1) {
-// mod_len = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned long), NULL, NULL);
-// exp_len = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned long), NULL, NULL);
-// } else {
-//
-// }
-//
-// sig_len = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned long) * n, NULL, NULL);
-// comp_len = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned long) * n, NULL, NULL);
+
+ mod_len = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned long) * pks, NULL, NULL);
+ exp_len = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned long) * pks, NULL, NULL);
+
+ sig_len = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned long) * n, NULL, NULL);
+ comp_len = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned long) * n, NULL, NULL);
valid = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(unsigned long) ,NULL, NULL);
@@ -978,19 +976,16 @@ int mont_verify_pairs_with_opencl(char *bases,
// Write our data set into the input array in device memory
//
err = clEnqueueWriteBuffer(commands, sig_mem, CL_TRUE, 0, signatures_len, signatures, 0, NULL, NULL);
-// err |= clEnqueueWriteBuffer(commands, sig_len, CL_TRUE, 0,sizeof(unsigned long) * n, s_len, 0, NULL, NULL);
+ err |= clEnqueueWriteBuffer(commands, sig_len, CL_TRUE, 0,sizeof(unsigned long) * n, s_off, 0, NULL, NULL);
- if (pks == 1) {
- err |= clEnqueueWriteBuffer(commands, exp_mem, CL_TRUE, 0, exponents_len, exponents, 0, NULL, NULL);
-// err |= clEnqueueWriteBuffer(commands, exp_len, CL_TRUE, 0,sizeof(unsigned long), e_len, 0, NULL, NULL);
- err |= clEnqueueWriteBuffer(commands, mod_mem, CL_TRUE, 0, moduli_len, moduli, 0, NULL, NULL);
-// err |= clEnqueueWriteBuffer(commands, mod_len, CL_TRUE, 0,sizeof(unsigned long), m_len, 0, NULL, NULL);
- } else {
-
- }
+ err |= clEnqueueWriteBuffer(commands, exp_mem, CL_TRUE, 0, exponents_len, exponents, 0, NULL, NULL);
+ err |= clEnqueueWriteBuffer(commands, exp_len, CL_TRUE, 0, sizeof(unsigned long) * pks, e_off, 0, NULL, NULL);
+ err |= clEnqueueWriteBuffer(commands, mod_mem, CL_TRUE, 0, moduli_len, moduli, 0, NULL, NULL);
+ err |= clEnqueueWriteBuffer(commands, mod_len, CL_TRUE, 0, sizeof(unsigned long) * pks, m_off, 0, NULL, NULL);
err |= clEnqueueWriteBuffer(commands, comp_mem, CL_TRUE, 0, bases_len, bases, 0, NULL, NULL);
-// err |= clEnqueueWriteBuffer(commands, comp_len, CL_TRUE, 0,sizeof(unsigned long) * n, b_len, 0, NULL, NULL);
+ err |= clEnqueueWriteBuffer(commands, comp_len, CL_TRUE, 0,sizeof(unsigned long) * n, b_off, 0, NULL, NULL);
+
err |= clEnqueueWriteBuffer(commands, valid, CL_TRUE, 0, sizeof(unsigned long), &signature_is_valid, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
@@ -1002,21 +997,21 @@ int mont_verify_pairs_with_opencl(char *bases,
//
err = 0;
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &sig_mem);
-// err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &sig_len);
- err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &exp_mem);
-// err |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &exp_len);
- err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &mod_mem);
-// err |= clSetKernelArg(kernel, 5, sizeof(cl_mem), &mod_len);
- err |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &comp_mem);
-// err |= clSetKernelArg(kernel, 7, sizeof(cl_mem), &comp_len);
- err |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &valid);
- //err |= clSetKernelArg(kernel, 5, sizeof(unsigned int), &n);
- //err |= clSetKernelArg(kernel, 6, sizeof(unsigned int), &pks);
+ err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &sig_len);
+ err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &exp_mem);
+ err |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &exp_len);
+ err |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &mod_mem);
+ err |= clSetKernelArg(kernel, 5, sizeof(cl_mem), &mod_len);
+ err |= clSetKernelArg(kernel, 6, sizeof(cl_mem), &comp_mem);
+ err |= clSetKernelArg(kernel, 7, sizeof(cl_mem), &comp_len);
+ err |= clSetKernelArg(kernel, 8, sizeof(cl_mem), &valid);
+ err |= clSetKernelArg(kernel, 9, sizeof(unsigned int), &n);
+ err |= clSetKernelArg(kernel, 10, sizeof(unsigned int), &pks);
//err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count);
if (err != CL_SUCCESS)
{
- printf("RSA-Error: Failed to set kernel arguments! %d\n", err);
+ printf("Error: Failed to set kernel arguments! %d\n", err);
exit(1);
}
@@ -1042,6 +1037,11 @@ int mont_verify_pairs_with_opencl(char *bases,
printf("KERNEL IS EXECUTING...\n");
+ struct timespec t1, t2;
+
+ clock_gettime(CLOCK_REALTIME, &t1);
+
+
// Wait for the command commands to get serviced before reading back results
//
clFinish(commands);
@@ -1055,6 +1055,12 @@ int mont_verify_pairs_with_opencl(char *bases,
printf("Error: Failed to read output array! %d\n", err);
exit(1);
}
+
+ clock_gettime(CLOCK_REALTIME, &t2);
+
+
+
+ printf("\nGPU verification took %ld ms\n", (t2.tv_nsec - t1.tv_nsec) / 1000000);
*result = signature_is_valid;
@@ -1065,10 +1071,10 @@ int mont_verify_pairs_with_opencl(char *bases,
clReleaseMemObject(mod_mem);
clReleaseMemObject(sig_mem);
-// clReleaseMemObject(comp_len);
-// clReleaseMemObject(exp_len);
-// clReleaseMemObject(mod_len);
-// clReleaseMemObject(sig_len);
+ clReleaseMemObject(comp_len);
+ clReleaseMemObject(exp_len);
+ clReleaseMemObject(mod_len);
+ clReleaseMemObject(sig_len);
clReleaseProgram(program);
clReleaseKernel(kernel);
@@ -1084,47 +1090,37 @@ int mont_rsa_tests(void) {
setup_gcry();
- unsigned int pairs = 1;
-
+ unsigned int pairs = number_of_pairs();;
- int str_sz = (2048);
+ int str_sz = (2048) * pairs;
-
char *b = malloc(str_sz);
char *e = malloc(str_sz);
char *m = malloc(str_sz);
char *s = malloc(str_sz);
- unsigned int pks = pairs;
+ unsigned long *b_off = malloc(str_sz);
+ unsigned long *e_off = malloc(str_sz);
+ unsigned long *m_off = malloc(str_sz);
+ unsigned long *s_off = malloc(str_sz);
- pks = mont_pairs_from_files(b, e, m, s, &pairs);
+ unsigned int pks = mont_pairs_from_files(b, b_off, e, e_off, m, m_off, s, s_off,
+ &pairs);
unsigned long result = 0;
- struct timespec t1, t2;
-
- clock_gettime(CLOCK_REALTIME, &t1);
-
-
-
-
- //montgomery(b, e, m, s, &result);
-
+
- mont_verify_pairs_with_opencl(s,e,m,b,
+ mont_verify_pairs_with_opencl(s, s_off, e, e_off, m, m_off, b, b_off,
pairs, pks, &result);
- clock_gettime(CLOCK_REALTIME, &t2);
-
- printf("VERIFICATION RESULT: %lu\n",result);
-
- printf("\nGPU verification took %ld ms\n", (t2.tv_nsec - t1.tv_nsec) / 1000000);
+ printf("VERIFICATION RESULT: %lu\n\n",result);
free(b);
@@ -1135,19 +1131,3 @@ int mont_rsa_tests(void) {
}
-
-/*
-static void show_sexp(const char *prefix, gcry_sexp_t a) {
- char *buf;
- size_t size;
-
- if (prefix)
- fputs(prefix, stderr);
- size = gcry_sexp_sprint(a, GCRYSEXP_FMT_ADVANCED, NULL, 0);
- buf = gcry_xmalloc(size);
-
- gcry_sexp_sprint(a, GCRYSEXP_FMT_ADVANCED, buf, size);
- fprintf(stderr, "%.*s", (int) size, buf);
- gcry_free(buf);
- }
-*/
diff --git a/xcode/.DS_Store b/xcode/.DS_Store
Binary files differ.
diff --git a/xcode/lib-gpu-verify.xcodeproj/project.pbxproj b/xcode/lib-gpu-verify.xcodeproj/project.pbxproj
@@ -9,7 +9,8 @@
/* Begin PBXBuildFile section */
6A36F8892B0F938E00AB772D /* montgomery.cl in Sources */ = {isa = PBXBuildFile; fileRef = 6A36F8882B0F938E00AB772D /* montgomery.cl */; };
6A8A795F2A89672700116D7D /* verify.cl in Sources */ = {isa = PBXBuildFile; fileRef = 6A8A795E2A89672700116D7D /* verify.cl */; };
- 6A99B06D2B1275760004E4B7 /* gmp_GPU.c in Sources */ = {isa = PBXBuildFile; fileRef = 6A99B06C2B1275760004E4B7 /* gmp_GPU.c */; };
+ 6A99B06E2B1293DA0004E4B7 /* gmp.c in Sources */ = {isa = PBXBuildFile; fileRef = 6A7914CB2B0CF320001EDCC1 /* gmp.c */; };
+ 6A99B06F2B1297220004E4B7 /* montgomery.c in Sources */ = {isa = PBXBuildFile; fileRef = 6A7914CD2B0CF320001EDCC1 /* montgomery.c */; };
6AA38E5B2B0A97FC00E85243 /* main.c in Sources */ = {isa = PBXBuildFile; fileRef = 6AA38E5A2B0A97FC00E85243 /* main.c */; };
6AD85E072AF71AD900662919 /* big-int-test.c in Sources */ = {isa = PBXBuildFile; fileRef = 6AF7487D2ADADF4500D58E08 /* big-int-test.c */; };
6AD85E0C2AFA510C00662919 /* openssl-test.c in Sources */ = {isa = PBXBuildFile; fileRef = 6AD85E0B2AFA510C00662919 /* openssl-test.c */; };
@@ -48,8 +49,6 @@
6A7914CD2B0CF320001EDCC1 /* montgomery.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = montgomery.c; path = ../source/montgomery.c; sourceTree = "<group>"; };
6A7914CE2B0CF320001EDCC1 /* gmp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = gmp.h; path = ../source/gmp.h; sourceTree = "<group>"; };
6A8A795E2A89672700116D7D /* verify.cl */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.opencl; path = verify.cl; sourceTree = "<group>"; };
- 6A99B06B2B1275760004E4B7 /* gmp_GPU.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = gmp_GPU.h; path = ../source/gmp_GPU.h; sourceTree = "<group>"; };
- 6A99B06C2B1275760004E4B7 /* gmp_GPU.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = gmp_GPU.c; path = ../source/gmp_GPU.c; sourceTree = "<group>"; };
6AA38E582B0A97FC00E85243 /* lib-gpu-generate */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "lib-gpu-generate"; sourceTree = BUILT_PRODUCTS_DIR; };
6AA38E5A2B0A97FC00E85243 /* main.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = main.c; sourceTree = "<group>"; };
6AA38E612B0A9B2100E85243 /* lib-gpu-generate.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = "lib-gpu-generate.entitlements"; sourceTree = "<group>"; };
@@ -138,8 +137,6 @@
6AF748852ADADFAD00D58E08 /* opencl-test.c */,
6A7914CB2B0CF320001EDCC1 /* gmp.c */,
6A7914CD2B0CF320001EDCC1 /* montgomery.c */,
- 6A99B06C2B1275760004E4B7 /* gmp_GPU.c */,
- 6A99B06B2B1275760004E4B7 /* gmp_GPU.h */,
);
name = Sources;
sourceTree = "<group>";
@@ -230,8 +227,9 @@
buildActionMask = 2147483647;
files = (
6AD85E0C2AFA510C00662919 /* openssl-test.c in Sources */,
- 6A99B06D2B1275760004E4B7 /* gmp_GPU.c in Sources */,
6AD85E072AF71AD900662919 /* big-int-test.c in Sources */,
+ 6A99B06F2B1297220004E4B7 /* montgomery.c in Sources */,
+ 6A99B06E2B1293DA0004E4B7 /* gmp.c in Sources */,
6AF7487A2ADADEBD00D58E08 /* lib-gpu-verify.c in Sources */,
6A8A795F2A89672700116D7D /* verify.cl in Sources */,
6AF748832ADADF4500D58E08 /* rsa-test.c in Sources */,
diff --git a/xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcuserdata/cedriczwahlen.xcuserdatad/UserInterfaceState.xcuserstate b/xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcuserdata/cedriczwahlen.xcuserdatad/UserInterfaceState.xcuserstate
Binary files differ.
diff --git a/xcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist b/xcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist
@@ -1930,8 +1930,8 @@
filePath = "../source/rsa-test.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "657"
- endingLineNumber = "657"
+ startingLineNumber = "651"
+ endingLineNumber = "651"
landmarkName = "rsa_tests()"
landmarkType = "9">
</BreakpointContent>
@@ -2913,22 +2913,6 @@
<BreakpointProxy
BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
<BreakpointContent
- uuid = "A4D3CD3E-C63D-4684-A93A-EE22635022EE"
- shouldBeEnabled = "No"
- ignoreCount = "0"
- continueAfterRunningActions = "No"
- filePath = "../source/rsa-test.c"
- startingColumnNumber = "9223372036854775807"
- endingColumnNumber = "9223372036854775807"
- startingLineNumber = "703"
- endingLineNumber = "703"
- landmarkName = "mont_pairs_from_files(bases, exponents, moduli, signatures, n)"
- landmarkType = "9">
- </BreakpointContent>
- </BreakpointProxy>
- <BreakpointProxy
- BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
- <BreakpointContent
uuid = "4184BD65-645D-4022-94C1-79216BEF6823"
shouldBeEnabled = "No"
ignoreCount = "0"
@@ -2936,9 +2920,9 @@
filePath = "../source/rsa-test.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "898"
- endingLineNumber = "898"
- landmarkName = "mont_verify_pairs_with_opencl(bases, exponents, moduli, signatures, n, pks, result)"
+ startingLineNumber = "915"
+ endingLineNumber = "915"
+ landmarkName = "mont_verify_pairs_with_opencl(bases, b_off, exponents, e_off, moduli, m_off, signatures, s_off, n, pks, result)"
landmarkType = "9">
</BreakpointContent>
</BreakpointProxy>
@@ -2954,7 +2938,7 @@
endingColumnNumber = "9223372036854775807"
startingLineNumber = "1052"
endingLineNumber = "1052"
- landmarkName = "mont_verify_pairs_with_opencl(bases, exponents, moduli, signatures, n, pks, result)"
+ landmarkName = "mont_verify_pairs_with_opencl(bases, b_off, exponents, e_off, moduli, m_off, signatures, s_off, n, pks, result)"
landmarkType = "9">
<Locations>
<Location
@@ -3047,7 +3031,7 @@
endingColumnNumber = "9223372036854775807"
startingLineNumber = "1047"
endingLineNumber = "1047"
- landmarkName = "mont_verify_pairs_with_opencl(bases, exponents, moduli, signatures, n, pks, result)"
+ landmarkName = "mont_verify_pairs_with_opencl(bases, b_off, exponents, e_off, moduli, m_off, signatures, s_off, n, pks, result)"
landmarkType = "9">
<Locations>
<Location
@@ -3116,38 +3100,6 @@
<BreakpointProxy
BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
<BreakpointContent
- uuid = "8BB8F280-7EFB-48F8-A762-C497F0136785"
- shouldBeEnabled = "No"
- ignoreCount = "0"
- continueAfterRunningActions = "No"
- filePath = "../source/rsa-test.c"
- startingColumnNumber = "9223372036854775807"
- endingColumnNumber = "9223372036854775807"
- startingLineNumber = "910"
- endingLineNumber = "910"
- landmarkName = "mont_verify_pairs_with_opencl(bases, exponents, moduli, signatures, n, pks, result)"
- landmarkType = "9">
- </BreakpointContent>
- </BreakpointProxy>
- <BreakpointProxy
- BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
- <BreakpointContent
- uuid = "F6E2F2AB-21A2-4DC2-8B9C-59FF6326A2DC"
- shouldBeEnabled = "No"
- ignoreCount = "0"
- continueAfterRunningActions = "No"
- filePath = "../source/rsa-test.c"
- startingColumnNumber = "9223372036854775807"
- endingColumnNumber = "9223372036854775807"
- startingLineNumber = "948"
- endingLineNumber = "948"
- landmarkName = "mont_verify_pairs_with_opencl(bases, exponents, moduli, signatures, n, pks, result)"
- landmarkType = "9">
- </BreakpointContent>
- </BreakpointProxy>
- <BreakpointProxy
- BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
- <BreakpointContent
uuid = "C8B7770D-BAC1-4203-AF94-A85245DE1081"
shouldBeEnabled = "No"
ignoreCount = "0"
@@ -3155,9 +3107,9 @@
filePath = "../source/rsa-test.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "943"
- endingLineNumber = "943"
- landmarkName = "mont_verify_pairs_with_opencl(bases, exponents, moduli, signatures, n, pks, result)"
+ startingLineNumber = "952"
+ endingLineNumber = "952"
+ landmarkName = "mont_verify_pairs_with_opencl(bases, b_off, exponents, e_off, moduli, m_off, signatures, s_off, n, pks, result)"
landmarkType = "9">
<Locations>
<Location
@@ -3212,22 +3164,6 @@
<BreakpointProxy
BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
<BreakpointContent
- uuid = "2123D5D5-C635-404E-91CB-F962716A9093"
- shouldBeEnabled = "No"
- ignoreCount = "0"
- continueAfterRunningActions = "No"
- filePath = "../source/rsa-test.c"
- startingColumnNumber = "9223372036854775807"
- endingColumnNumber = "9223372036854775807"
- startingLineNumber = "1114"
- endingLineNumber = "1114"
- landmarkName = "mont_rsa_tests()"
- landmarkType = "9">
- </BreakpointContent>
- </BreakpointProxy>
- <BreakpointProxy
- BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
- <BreakpointContent
uuid = "8ECD43E8-C440-47F2-92EE-5C0EFBCD0487"
shouldBeEnabled = "No"
ignoreCount = "0"
@@ -3935,5 +3871,195 @@
landmarkType = "9">
</BreakpointContent>
</BreakpointProxy>
+ <BreakpointProxy
+ BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
+ <BreakpointContent
+ uuid = "CF9E9C13-FF32-4C51-9757-FE0110A980AF"
+ shouldBeEnabled = "No"
+ ignoreCount = "0"
+ continueAfterRunningActions = "No"
+ filePath = "../source/rsa-test.c"
+ startingColumnNumber = "9223372036854775807"
+ endingColumnNumber = "9223372036854775807"
+ startingLineNumber = "695"
+ endingLineNumber = "695"
+ landmarkName = "mont_pairs_from_files(bases, b_off, exponents, e_off, moduli, m_off, signatures, s_off, n)"
+ landmarkType = "9">
+ </BreakpointContent>
+ </BreakpointProxy>
+ <BreakpointProxy
+ BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
+ <BreakpointContent
+ uuid = "371A23E1-484F-42F6-B2EE-46D44AE6083B"
+ shouldBeEnabled = "No"
+ ignoreCount = "0"
+ continueAfterRunningActions = "No"
+ filePath = "../source/rsa-test.c"
+ startingColumnNumber = "9223372036854775807"
+ endingColumnNumber = "9223372036854775807"
+ startingLineNumber = "730"
+ endingLineNumber = "730"
+ landmarkName = "mont_pairs_from_files(bases, b_off, exponents, e_off, moduli, m_off, signatures, s_off, n)"
+ landmarkType = "9">
+ <Locations>
+ <Location
+ uuid = "371A23E1-484F-42F6-B2EE-46D44AE6083B - 2e7ee53289851cae"
+ shouldBeEnabled = "Yes"
+ ignoreCount = "0"
+ continueAfterRunningActions = "No"
+ symbolName = "mont_pairs_from_files"
+ moduleName = "lib-gpu-verify"
+ usesParentBreakpointCondition = "Yes"
+ urlString = "file:///Users/cedriczwahlen/libgpuverify/source/rsa-test.c"
+ startingColumnNumber = "9223372036854775807"
+ endingColumnNumber = "9223372036854775807"
+ startingLineNumber = "729"
+ endingLineNumber = "729"
+ offsetFromSymbolStart = "517">
+ </Location>
+ <Location
+ uuid = "371A23E1-484F-42F6-B2EE-46D44AE6083B - 2e7ee53289851cae"
+ shouldBeEnabled = "Yes"
+ ignoreCount = "0"
+ continueAfterRunningActions = "No"
+ symbolName = "mont_pairs_from_files"
+ moduleName = "lib-gpu-verify"
+ usesParentBreakpointCondition = "Yes"
+ urlString = "file:///Users/cedriczwahlen/libgpuverify/source/rsa-test.c"
+ startingColumnNumber = "9223372036854775807"
+ endingColumnNumber = "9223372036854775807"
+ startingLineNumber = "729"
+ endingLineNumber = "729"
+ offsetFromSymbolStart = "525">
+ </Location>
+ <Location
+ uuid = "371A23E1-484F-42F6-B2EE-46D44AE6083B - 2e7ee53289851f4f"
+ shouldBeEnabled = "Yes"
+ ignoreCount = "0"
+ continueAfterRunningActions = "No"
+ symbolName = "mont_pairs_from_files"
+ moduleName = "lib-gpu-verify"
+ usesParentBreakpointCondition = "Yes"
+ urlString = "file:///Users/cedriczwahlen/libgpuverify/source/rsa-test.c"
+ startingColumnNumber = "9223372036854775807"
+ endingColumnNumber = "9223372036854775807"
+ startingLineNumber = "730"
+ endingLineNumber = "730"
+ offsetFromSymbolStart = "563">
+ </Location>
+ </Locations>
+ </BreakpointContent>
+ </BreakpointProxy>
+ <BreakpointProxy
+ BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
+ <BreakpointContent
+ uuid = "7FF43810-FAE0-46C6-9842-A8D405C02B2A"
+ shouldBeEnabled = "No"
+ ignoreCount = "0"
+ continueAfterRunningActions = "No"
+ filePath = "../source/rsa-test.c"
+ startingColumnNumber = "9223372036854775807"
+ endingColumnNumber = "9223372036854775807"
+ startingLineNumber = "755"
+ endingLineNumber = "755"
+ landmarkName = "mont_pairs_from_files(bases, b_off, exponents, e_off, moduli, m_off, signatures, s_off, n)"
+ landmarkType = "9">
+ <Locations>
+ <Location
+ uuid = "7FF43810-FAE0-46C6-9842-A8D405C02B2A - 2e7ee53289851998"
+ shouldBeEnabled = "Yes"
+ ignoreCount = "0"
+ continueAfterRunningActions = "No"
+ symbolName = "mont_pairs_from_files"
+ moduleName = "lib-gpu-verify"
+ usesParentBreakpointCondition = "Yes"
+ urlString = "file:///Users/cedriczwahlen/libgpuverify/source/rsa-test.c"
+ startingColumnNumber = "9223372036854775807"
+ endingColumnNumber = "9223372036854775807"
+ startingLineNumber = "751"
+ endingLineNumber = "751"
+ offsetFromSymbolStart = "780">
+ </Location>
+ <Location
+ uuid = "7FF43810-FAE0-46C6-9842-A8D405C02B2A - 2e7ee53289851998"
+ shouldBeEnabled = "Yes"
+ ignoreCount = "0"
+ continueAfterRunningActions = "No"
+ symbolName = "mont_pairs_from_files"
+ moduleName = "lib-gpu-verify"
+ usesParentBreakpointCondition = "Yes"
+ urlString = "file:///Users/cedriczwahlen/libgpuverify/source/rsa-test.c"
+ startingColumnNumber = "9223372036854775807"
+ endingColumnNumber = "9223372036854775807"
+ startingLineNumber = "751"
+ endingLineNumber = "751"
+ offsetFromSymbolStart = "788">
+ </Location>
+ <Location
+ uuid = "7FF43810-FAE0-46C6-9842-A8D405C02B2A - 2e7ee53289851998"
+ shouldBeEnabled = "Yes"
+ ignoreCount = "0"
+ continueAfterRunningActions = "No"
+ symbolName = "mont_pairs_from_files"
+ moduleName = "lib-gpu-verify"
+ usesParentBreakpointCondition = "Yes"
+ urlString = "file:///Users/cedriczwahlen/libgpuverify/source/rsa-test.c"
+ startingColumnNumber = "9223372036854775807"
+ endingColumnNumber = "9223372036854775807"
+ startingLineNumber = "751"
+ endingLineNumber = "751"
+ offsetFromSymbolStart = "796">
+ </Location>
+ </Locations>
+ </BreakpointContent>
+ </BreakpointProxy>
+ <BreakpointProxy
+ BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
+ <BreakpointContent
+ uuid = "096C0BB2-DC22-4EC5-8BD2-796A1B196075"
+ shouldBeEnabled = "No"
+ ignoreCount = "0"
+ continueAfterRunningActions = "No"
+ filePath = "../source/rsa-test.c"
+ startingColumnNumber = "9223372036854775807"
+ endingColumnNumber = "9223372036854775807"
+ startingLineNumber = "766"
+ endingLineNumber = "766"
+ landmarkName = "mont_pairs_from_files(bases, b_off, exponents, e_off, moduli, m_off, signatures, s_off, n)"
+ landmarkType = "9">
+ </BreakpointContent>
+ </BreakpointProxy>
+ <BreakpointProxy
+ BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
+ <BreakpointContent
+ uuid = "4156F803-B4B2-4ACF-BBB1-7E71EC1C7D77"
+ shouldBeEnabled = "No"
+ ignoreCount = "0"
+ continueAfterRunningActions = "No"
+ filePath = "../source/rsa-test.c"
+ startingColumnNumber = "9223372036854775807"
+ endingColumnNumber = "9223372036854775807"
+ startingLineNumber = "1119"
+ endingLineNumber = "1119"
+ landmarkName = "mont_rsa_tests()"
+ landmarkType = "9">
+ </BreakpointContent>
+ </BreakpointProxy>
+ <BreakpointProxy
+ BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
+ <BreakpointContent
+ uuid = "BE22DB13-E105-456F-8405-4A890569B49D"
+ shouldBeEnabled = "No"
+ ignoreCount = "0"
+ continueAfterRunningActions = "No"
+ filePath = "../source/rsa-test.c"
+ startingColumnNumber = "9223372036854775807"
+ endingColumnNumber = "9223372036854775807"
+ startingLineNumber = "1065"
+ endingLineNumber = "1065"
+ landmarkName = "mont_verify_pairs_with_opencl(bases, b_off, exponents, e_off, moduli, m_off, signatures, s_off, n, pks, result)"
+ landmarkType = "9">
+ </BreakpointContent>
+ </BreakpointProxy>
</Breakpoints>
</Bucket>
diff --git a/xcode/montgomery.cl b/xcode/montgomery.cl
@@ -2836,31 +2836,40 @@ void printmpz(mpz_t n) {
}
-__kernel void montgomery(__constant char *signature,
- __constant char *exponent,
- __constant char *modulus,
- __constant char *base,
- __global unsigned long *valid)
+__kernel void montgomery(__constant char *signature, __constant unsigned long *s_offsets,
+ __constant char *exponent, __constant unsigned long *e_offsets,
+ __constant char *modulus, __constant unsigned long *m_offsets,
+ __constant char *base, __constant unsigned long *b_offsets,
+ __global unsigned long *valid,
+ unsigned int n,
+ unsigned int pks)
{
int index = get_global_id(0);
- if (index == 0) {
+ if (index < n) {
int radix = 16;
- mpz_t b,e,m,res;
+ mpz_t b,e,m,sig,res;
+ mpz_init(res);
+
+ mpz_init_set_str(b,&base[b_offsets[0]],radix); // M
+ mpz_init_set_str(sig,&signature[s_offsets[0]],radix);
+
- mpz_init(res);
-
- mpz_init_set_str(b,base,radix); // M
- mpz_init_set_str(e,exponent,radix);
- mpz_init_set_str(m,modulus,radix); // n
+ if (pks == 1) {
+ mpz_init_set_str(e,exponent,radix);
+ mpz_init_set_str(m,modulus,radix); // n
+ } else {
+ mpz_init_set_str(e,&exponent[e_offsets[0]],radix);
+ mpz_init_set_str(m,&modulus[m_offsets[0]],radix); // n
+ }
mpz_t r, r_1, ni, M, x;
mpz_init(r); // MARK: I think I have to destroy these myself
@@ -2874,10 +2883,6 @@ __kernel void montgomery(__constant char *signature,
mpz_init(xx);
-
- printf((char __constant *)"%lu\n",GMP_LIMB_BITS);
-
-
if (mpz_even_p(m)) {
mpz_t bb, x1, x2, q, powj;
@@ -2921,40 +2926,18 @@ __kernel void montgomery(__constant char *signature,
} else {
-
-
mont_prepare(b, e, m, r, r_1, ni, M, x);
-
-
mont_modexp(xx, x, e, M, m, ni, r, r_1);
-
- // printf((char __constant *)"--\n");
-
- //printmpz(xx);
-
mont_finish(res, xx, m, ni, r, r_1);
- // printf((char __constant *)"--\n");
-
- // printmpz(res);
-
-
}
-
-
-
-
-
-
-
- mpz_t sig;
- mpz_init_set_str(sig,signature,radix);
-
+
+
if (mpz_cmp(sig,res) == 0) {
- *valid = 1;
+ *valid += 1;
} else {