commit 422f4db0f24c2630e75ff2c8c1028f6ecfdc21d5
parent e96d6ec827703c8213bc5ebc1321f2a4954166dd
Author: Cedric <cedric.zwahlen@students.bfh.ch>
Date: Mon, 1 Jan 2024 23:58:51 +0100
Refactor
Diffstat:
38 files changed, 1885 insertions(+), 10557 deletions(-)
diff --git a/source/Makefile b/source/Makefile
@@ -1,3 +1,10 @@
-all:
- gcc -g -O0 -D CL_TARGET_OPENCL_VERSION=100 -o gpu-verify lib-gpu-verify.c rsa-test.c montgomery-test.c reference-test.c montmodmult.c util.c gmp.c -lgcrypt -lgmp -lOpenCL -lm
+linux:
+ gcc -g -O0 -D CL_TARGET_OPENCL_VERSION=100 -o gpu-verify lib-gpu-verify.c gpuv.c gpuv-ref.c gpuv-montg.c util.c -lgmp -lgcrypt -lOpenCL -lm
+
+windows:
+ gcc -g -O0 -D CL_TARGET_OPENCL_VERSION=100 -o gpu-verify lib-gpu-verify.c gpuv.c gpuv-ref.c gpuv-montg.c util.c -lgmp -lgcrypt -lOpenCL -lm
+
+macos:
+ gcc -g -O0 -D CL_TARGET_OPENCL_VERSION=100 -o gpu-verify lib-gpu-verify.c gpuv.c gpuv-ref.c gpuv-montg.c util.c -lgmp -lgcrypt -framework OpenCL -lm
+
diff --git a/source/gmp.c b/source/gmp.c
@@ -1,4627 +0,0 @@
-/* mini-gmp, a minimalistic implementation of a GNU GMP subset.
-
- Contributed to the GNU project by Niels Möller
- Additional functionalities and improvements by Marco Bodrato.
-
-Copyright 1991-1997, 1999-2022 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-/* NOTE: All functions in this file which are not declared in
- mini-gmp.h are internal, and are not intended to be compatible
- with GMP or with future versions of mini-gmp. */
-
-/* Much of the material copied from GMP files, including: gmp-impl.h,
- longlong.h, mpn/generic/add_n.c, mpn/generic/addmul_1.c,
- mpn/generic/lshift.c, mpn/generic/mul_1.c,
- mpn/generic/mul_basecase.c, mpn/generic/rshift.c,
- mpn/generic/sbpi1_div_qr.c, mpn/generic/sub_n.c,
- mpn/generic/submul_1.c. */
-
-#include <assert.h>
-#include <ctype.h>
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "gmp.h"
-
-#if !defined(MINI_GMP_DONT_USE_FLOAT_H)
-#include <float.h>
-#endif
-
-
-/* Macros */
-#define GMP_LIMB_BITS (sizeof(mp_limb_t) * CHAR_BIT)
-
-#define GMP_LIMB_MAX ((mp_limb_t) ~ (mp_limb_t) 0)
-#define GMP_LIMB_HIGHBIT ((mp_limb_t) 1 << (GMP_LIMB_BITS - 1))
-
-#define GMP_HLIMB_BIT ((mp_limb_t) 1 << (GMP_LIMB_BITS / 2))
-#define GMP_LLIMB_MASK (GMP_HLIMB_BIT - 1)
-
-#define GMP_ULONG_BITS (sizeof(unsigned long) * CHAR_BIT)
-#define GMP_ULONG_HIGHBIT ((unsigned long) 1 << (GMP_ULONG_BITS - 1))
-
-#define GMP_ABS(x) ((x) >= 0 ? (x) : -(x))
-#define GMP_NEG_CAST(T,x) (-((T)((x) + 1) - 1))
-
-#define GMP_MIN(a, b) ((a) < (b) ? (a) : (b))
-#define GMP_MAX(a, b) ((a) > (b) ? (a) : (b))
-
-#define GMP_CMP(a,b) (((a) > (b)) - ((a) < (b)))
-
-#if defined(DBL_MANT_DIG) && FLT_RADIX == 2
-#define GMP_DBL_MANT_BITS DBL_MANT_DIG
-#else
-#define GMP_DBL_MANT_BITS (53)
-#endif
-
-/* Return non-zero if xp,xsize and yp,ysize overlap.
- If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
- overlap. If both these are false, there's an overlap. */
-#define GMP_MPN_OVERLAP_P(xp, xsize, yp, ysize) \
- ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
-
-#define gmp_assert_nocarry(x) do { \
- mp_limb_t __cy = (x); \
- assert (__cy == 0); \
- (void) (__cy); \
- } while (0)
-
-#define gmp_clz(count, x) do { \
- mp_limb_t __clz_x = (x); \
- unsigned __clz_c = 0; \
- int LOCAL_SHIFT_BITS = 8; \
- if (GMP_LIMB_BITS > LOCAL_SHIFT_BITS) \
- for (; \
- (__clz_x & ((mp_limb_t) 0xff << (GMP_LIMB_BITS - 8))) == 0; \
- __clz_c += 8) \
- { __clz_x <<= LOCAL_SHIFT_BITS; } \
- for (; (__clz_x & GMP_LIMB_HIGHBIT) == 0; __clz_c++) \
- __clz_x <<= 1; \
- (count) = __clz_c; \
- } while (0)
-
-#define gmp_ctz(count, x) do { \
- mp_limb_t __ctz_x = (x); \
- unsigned __ctz_c = 0; \
- gmp_clz (__ctz_c, __ctz_x & - __ctz_x); \
- (count) = GMP_LIMB_BITS - 1 - __ctz_c; \
- } while (0)
-
-#define gmp_add_ssaaaa(sh, sl, ah, al, bh, bl) \
- do { \
- mp_limb_t __x; \
- __x = (al) + (bl); \
- (sh) = (ah) + (bh) + (__x < (al)); \
- (sl) = __x; \
- } while (0)
-
-#define gmp_sub_ddmmss(sh, sl, ah, al, bh, bl) \
- do { \
- mp_limb_t __x; \
- __x = (al) - (bl); \
- (sh) = (ah) - (bh) - ((al) < (bl)); \
- (sl) = __x; \
- } while (0)
-
-#define gmp_umul_ppmm(w1, w0, u, v) \
- do { \
- int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS; \
- if (sizeof(unsigned int) * CHAR_BIT >= 2 * GMP_LIMB_BITS) \
- { \
- unsigned int __ww = (unsigned int) (u) * (v); \
- w0 = (mp_limb_t) __ww; \
- w1 = (mp_limb_t) (__ww >> LOCAL_GMP_LIMB_BITS); \
- } \
- else if (GMP_ULONG_BITS >= 2 * GMP_LIMB_BITS) \
- { \
- unsigned long int __ww = (unsigned long int) (u) * (v); \
- w0 = (mp_limb_t) __ww; \
- w1 = (mp_limb_t) (__ww >> LOCAL_GMP_LIMB_BITS); \
- } \
- else { \
- mp_limb_t __x0, __x1, __x2, __x3; \
- unsigned __ul, __vl, __uh, __vh; \
- mp_limb_t __u = (u), __v = (v); \
- assert (sizeof (unsigned) * 2 >= sizeof (mp_limb_t)); \
- \
- __ul = __u & GMP_LLIMB_MASK; \
- __uh = __u >> (GMP_LIMB_BITS / 2); \
- __vl = __v & GMP_LLIMB_MASK; \
- __vh = __v >> (GMP_LIMB_BITS / 2); \
- \
- __x0 = (mp_limb_t) __ul * __vl; \
- __x1 = (mp_limb_t) __ul * __vh; \
- __x2 = (mp_limb_t) __uh * __vl; \
- __x3 = (mp_limb_t) __uh * __vh; \
- \
- __x1 += __x0 >> (GMP_LIMB_BITS / 2);/* this can't give carry */ \
- __x1 += __x2; /* but this indeed can */ \
- if (__x1 < __x2) /* did we get it? */ \
- __x3 += GMP_HLIMB_BIT; /* yes, add it in the proper pos. */ \
- \
- (w1) = __x3 + (__x1 >> (GMP_LIMB_BITS / 2)); \
- (w0) = (__x1 << (GMP_LIMB_BITS / 2)) + (__x0 & GMP_LLIMB_MASK); \
- } \
- } while (0)
-
-/* If mp_limb_t is of size smaller than int, plain u*v implies
- automatic promotion to *signed* int, and then multiply may overflow
- and cause undefined behavior. Explicitly cast to unsigned int for
- that case. */
-#define gmp_umullo_limb(u, v) \
- ((sizeof(mp_limb_t) >= sizeof(int)) ? (u)*(v) : (unsigned int)(u) * (v))
-
-#define gmp_udiv_qrnnd_preinv(q, r, nh, nl, d, di) \
- do { \
- mp_limb_t _qh, _ql, _r, _mask; \
- gmp_umul_ppmm (_qh, _ql, (nh), (di)); \
- gmp_add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \
- _r = (nl) - gmp_umullo_limb (_qh, (d)); \
- _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \
- _qh += _mask; \
- _r += _mask & (d); \
- if (_r >= (d)) \
- { \
- _r -= (d); \
- _qh++; \
- } \
- \
- (r) = _r; \
- (q) = _qh; \
- } while (0)
-
-#define gmp_udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv) \
- do { \
- mp_limb_t _q0, _t1, _t0, _mask; \
- gmp_umul_ppmm ((q), _q0, (n2), (dinv)); \
- gmp_add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1)); \
- \
- /* Compute the two most significant limbs of n - q'd */ \
- (r1) = (n1) - gmp_umullo_limb ((d1), (q)); \
- gmp_sub_ddmmss ((r1), (r0), (r1), (n0), (d1), (d0)); \
- gmp_umul_ppmm (_t1, _t0, (d0), (q)); \
- gmp_sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0); \
- (q)++; \
- \
- /* Conditionally adjust q and the remainders */ \
- _mask = - (mp_limb_t) ((r1) >= _q0); \
- (q) += _mask; \
- gmp_add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0)); \
- if ((r1) >= (d1)) \
- { \
- if ((r1) > (d1) || (r0) >= (d0)) \
- { \
- (q)++; \
- gmp_sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0)); \
- } \
- } \
- } while (0)
-
-/* Swap macros. */
-#define MP_LIMB_T_SWAP(x, y) \
- do { \
- mp_limb_t __mp_limb_t_swap__tmp = (x); \
- (x) = (y); \
- (y) = __mp_limb_t_swap__tmp; \
- } while (0)
-#define MP_SIZE_T_SWAP(x, y) \
- do { \
- mp_size_t __mp_size_t_swap__tmp = (x); \
- (x) = (y); \
- (y) = __mp_size_t_swap__tmp; \
- } while (0)
-#define MP_BITCNT_T_SWAP(x,y) \
- do { \
- mp_bitcnt_t __mp_bitcnt_t_swap__tmp = (x); \
- (x) = (y); \
- (y) = __mp_bitcnt_t_swap__tmp; \
- } while (0)
-#define MP_PTR_SWAP(x, y) \
- do { \
- mp_ptr __mp_ptr_swap__tmp = (x); \
- (x) = (y); \
- (y) = __mp_ptr_swap__tmp; \
- } while (0)
-#define MP_SRCPTR_SWAP(x, y) \
- do { \
- mp_srcptr __mp_srcptr_swap__tmp = (x); \
- (x) = (y); \
- (y) = __mp_srcptr_swap__tmp; \
- } while (0)
-
-#define MPN_PTR_SWAP(xp,xs, yp,ys) \
- do { \
- MP_PTR_SWAP (xp, yp); \
- MP_SIZE_T_SWAP (xs, ys); \
- } while(0)
-#define MPN_SRCPTR_SWAP(xp,xs, yp,ys) \
- do { \
- MP_SRCPTR_SWAP (xp, yp); \
- MP_SIZE_T_SWAP (xs, ys); \
- } while(0)
-
-#define MPZ_PTR_SWAP(x, y) \
- do { \
- mpz_ptr __mpz_ptr_swap__tmp = (x); \
- (x) = (y); \
- (y) = __mpz_ptr_swap__tmp; \
- } while (0)
-#define MPZ_SRCPTR_SWAP(x, y) \
- do { \
- mpz_srcptr __mpz_srcptr_swap__tmp = (x); \
- (x) = (y); \
- (y) = __mpz_srcptr_swap__tmp; \
- } while (0)
-
-const int mp_bits_per_limb = GMP_LIMB_BITS;
-
-
-/* Memory allocation and other helper functions. */
-static void
-gmp_die (const char *msg)
-{
- fprintf (stderr, "%s\n", msg);
- abort();
-}
-
-static void *
-gmp_default_alloc (size_t size)
-{
- void *p;
-
- assert (size > 0);
-
- p = malloc (size);
- if (!p)
- gmp_die("gmp_default_alloc: Virtual memory exhausted.");
-
- return p;
-}
-
-static void *
-gmp_default_realloc (void *old, size_t unused_old_size, size_t new_size)
-{
- void * p;
-
- p = realloc (old, new_size);
-
- if (!p)
- gmp_die("gmp_default_realloc: Virtual memory exhausted.");
-
- return p;
-}
-
-static void
-gmp_default_free (void *p, size_t unused_size)
-{
- free (p);
-}
-
-static void * (*gmp_allocate_func) (size_t) = gmp_default_alloc;
-static void * (*gmp_reallocate_func) (void *, size_t, size_t) = gmp_default_realloc;
-static void (*gmp_free_func) (void *, size_t) = gmp_default_free;
-
-void
-mp_get_memory_functions (void *(**alloc_func) (size_t),
- void *(**realloc_func) (void *, size_t, size_t),
- void (**free_func) (void *, size_t))
-{
- if (alloc_func)
- *alloc_func = gmp_allocate_func;
-
- if (realloc_func)
- *realloc_func = gmp_reallocate_func;
-
- if (free_func)
- *free_func = gmp_free_func;
-}
-
-void
-mp_set_memory_functions (void *(*alloc_func) (size_t),
- void *(*realloc_func) (void *, size_t, size_t),
- void (*free_func) (void *, size_t))
-{
- if (!alloc_func)
- alloc_func = gmp_default_alloc;
- if (!realloc_func)
- realloc_func = gmp_default_realloc;
- if (!free_func)
- free_func = gmp_default_free;
-
- gmp_allocate_func = alloc_func;
- gmp_reallocate_func = realloc_func;
- gmp_free_func = free_func;
-}
-
-#define gmp_alloc(size) ((*gmp_allocate_func)((size)))
-#define gmp_free(p, size) ((*gmp_free_func) ((p), (size)))
-#define gmp_realloc(ptr, old_size, size) ((*gmp_reallocate_func)(ptr, old_size, size))
-
-static mp_ptr
-gmp_alloc_limbs (mp_size_t size)
-{
- return (mp_ptr) gmp_alloc (size * sizeof (mp_limb_t));
-}
-
-static mp_ptr
-gmp_realloc_limbs (mp_ptr old, mp_size_t old_size, mp_size_t size)
-{
- assert (size > 0);
- return (mp_ptr) gmp_realloc (old, old_size * sizeof (mp_limb_t), size * sizeof (mp_limb_t));
-}
-
-static void
-gmp_free_limbs (mp_ptr old, mp_size_t size)
-{
- gmp_free (old, size * sizeof (mp_limb_t));
-}
-
-
-/* MPN interface */
-
-void
-mpn_copyi (mp_ptr d, mp_srcptr s, mp_size_t n)
-{
- mp_size_t i;
- for (i = 0; i < n; i++)
- d[i] = s[i];
-}
-
-void
-mpn_copyd (mp_ptr d, mp_srcptr s, mp_size_t n)
-{
- while (--n >= 0)
- d[n] = s[n];
-}
-
-int
-mpn_cmp (mp_srcptr ap, mp_srcptr bp, mp_size_t n)
-{
- while (--n >= 0)
- {
- if (ap[n] != bp[n])
- return ap[n] > bp[n] ? 1 : -1;
- }
- return 0;
-}
-
-static int
-mpn_cmp4 (mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
-{
- if (an != bn)
- return an < bn ? -1 : 1;
- else
- return mpn_cmp (ap, bp, an);
-}
-
-static mp_size_t
-mpn_normalized_size (mp_srcptr xp, mp_size_t n)
-{
- while (n > 0 && xp[n-1] == 0)
- --n;
- return n;
-}
-
-int
-mpn_zero_p(mp_srcptr rp, mp_size_t n)
-{
- return mpn_normalized_size (rp, n) == 0;
-}
-
-void
-mpn_zero (mp_ptr rp, mp_size_t n)
-{
- while (--n >= 0)
- rp[n] = 0;
-}
-
-mp_limb_t
-mpn_add_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b)
-{
- mp_size_t i;
-
- assert (n > 0);
- i = 0;
- do
- {
- mp_limb_t r = ap[i] + b;
- /* Carry out */
- b = (r < b);
- rp[i] = r;
- }
- while (++i < n);
-
- return b;
-}
-
-mp_limb_t
-mpn_add_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
-{
- mp_size_t i;
- mp_limb_t cy;
-
- for (i = 0, cy = 0; i < n; i++)
- {
- mp_limb_t a, b, r;
- a = ap[i]; b = bp[i];
- r = a + cy;
- cy = (r < cy);
- r += b;
- cy += (r < b);
- rp[i] = r;
- }
- return cy;
-}
-
-mp_limb_t
-mpn_add (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
-{
- mp_limb_t cy;
-
- assert (an >= bn);
-
- cy = mpn_add_n (rp, ap, bp, bn);
- if (an > bn)
- cy = mpn_add_1 (rp + bn, ap + bn, an - bn, cy);
- return cy;
-}
-
-mp_limb_t
-mpn_sub_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b)
-{
- mp_size_t i;
-
- assert (n > 0);
-
- i = 0;
- do
- {
- mp_limb_t a = ap[i];
- /* Carry out */
- mp_limb_t cy = a < b;
- rp[i] = a - b;
- b = cy;
- }
- while (++i < n);
-
- return b;
-}
-
-mp_limb_t
-mpn_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
-{
- mp_size_t i;
- mp_limb_t cy;
-
- for (i = 0, cy = 0; i < n; i++)
- {
- mp_limb_t a, b;
- a = ap[i]; b = bp[i];
- b += cy;
- cy = (b < cy);
- cy += (a < b);
- rp[i] = a - b;
- }
- return cy;
-}
-
-mp_limb_t
-mpn_sub (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
-{
- mp_limb_t cy;
-
- assert (an >= bn);
-
- cy = mpn_sub_n (rp, ap, bp, bn);
- if (an > bn)
- cy = mpn_sub_1 (rp + bn, ap + bn, an - bn, cy);
- return cy;
-}
-
-mp_limb_t
-mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
-{
- mp_limb_t ul, cl, hpl, lpl;
-
- assert (n >= 1);
-
- cl = 0;
- do
- {
- ul = *up++;
- gmp_umul_ppmm (hpl, lpl, ul, vl);
-
- lpl += cl;
- cl = (lpl < cl) + hpl;
-
- *rp++ = lpl;
- }
- while (--n != 0);
-
- return cl;
-}
-
-mp_limb_t
-mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
-{
- mp_limb_t ul, cl, hpl, lpl, rl;
-
- assert (n >= 1);
-
- cl = 0;
- do
- {
- ul = *up++;
- gmp_umul_ppmm (hpl, lpl, ul, vl);
-
- lpl += cl;
- cl = (lpl < cl) + hpl;
-
- rl = *rp;
- lpl = rl + lpl;
- cl += lpl < rl;
- *rp++ = lpl;
- }
- while (--n != 0);
-
- return cl;
-}
-
-mp_limb_t
-mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
-{
- mp_limb_t ul, cl, hpl, lpl, rl;
-
- assert (n >= 1);
-
- cl = 0;
- do
- {
- ul = *up++;
- gmp_umul_ppmm (hpl, lpl, ul, vl);
-
- lpl += cl;
- cl = (lpl < cl) + hpl;
-
- rl = *rp;
- lpl = rl - lpl;
- cl += lpl > rl;
- *rp++ = lpl;
- }
- while (--n != 0);
-
- return cl;
-}
-
-mp_limb_t
-mpn_mul (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
-{
- assert (un >= vn);
- assert (vn >= 1);
- assert (!GMP_MPN_OVERLAP_P(rp, un + vn, up, un));
- assert (!GMP_MPN_OVERLAP_P(rp, un + vn, vp, vn));
-
- /* We first multiply by the low order limb. This result can be
- stored, not added, to rp. We also avoid a loop for zeroing this
- way. */
-
- rp[un] = mpn_mul_1 (rp, up, un, vp[0]);
-
- /* Now accumulate the product of up[] and the next higher limb from
- vp[]. */
-
- while (--vn >= 1)
- {
- rp += 1, vp += 1;
- rp[un] = mpn_addmul_1 (rp, up, un, vp[0]);
- }
- return rp[un];
-}
-
-void
-mpn_mul_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
-{
- mpn_mul (rp, ap, n, bp, n);
-}
-
-void
-mpn_sqr (mp_ptr rp, mp_srcptr ap, mp_size_t n)
-{
- mpn_mul (rp, ap, n, ap, n);
-}
-
-mp_limb_t
-mpn_lshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
-{
- mp_limb_t high_limb, low_limb;
- unsigned int tnc;
- mp_limb_t retval;
-
- assert (n >= 1);
- assert (cnt >= 1);
- assert (cnt < GMP_LIMB_BITS);
-
- up += n;
- rp += n;
-
- tnc = GMP_LIMB_BITS - cnt;
- low_limb = *--up;
- retval = low_limb >> tnc;
- high_limb = (low_limb << cnt);
-
- while (--n != 0)
- {
- low_limb = *--up;
- *--rp = high_limb | (low_limb >> tnc);
- high_limb = (low_limb << cnt);
- }
- *--rp = high_limb;
-
- return retval;
-}
-
-mp_limb_t
-mpn_rshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
-{
- mp_limb_t high_limb, low_limb;
- unsigned int tnc;
- mp_limb_t retval;
-
- assert (n >= 1);
- assert (cnt >= 1);
- assert (cnt < GMP_LIMB_BITS);
-
- tnc = GMP_LIMB_BITS - cnt;
- high_limb = *up++;
- retval = (high_limb << tnc);
- low_limb = high_limb >> cnt;
-
- while (--n != 0)
- {
- high_limb = *up++;
- *rp++ = low_limb | (high_limb << tnc);
- low_limb = high_limb >> cnt;
- }
- *rp = low_limb;
-
- return retval;
-}
-
-static mp_bitcnt_t
-mpn_common_scan (mp_limb_t limb, mp_size_t i, mp_srcptr up, mp_size_t un,
- mp_limb_t ux)
-{
- unsigned cnt;
-
- assert (ux == 0 || ux == GMP_LIMB_MAX);
- assert (0 <= i && i <= un );
-
- while (limb == 0)
- {
- i++;
- if (i == un)
- return (ux == 0 ? ~(mp_bitcnt_t) 0 : un * GMP_LIMB_BITS);
- limb = ux ^ up[i];
- }
- gmp_ctz (cnt, limb);
- return (mp_bitcnt_t) i * GMP_LIMB_BITS + cnt;
-}
-
-mp_bitcnt_t
-mpn_scan1 (mp_srcptr ptr, mp_bitcnt_t bit)
-{
- mp_size_t i;
- i = bit / GMP_LIMB_BITS;
-
- return mpn_common_scan ( ptr[i] & (GMP_LIMB_MAX << (bit % GMP_LIMB_BITS)),
- i, ptr, i, 0);
-}
-
-mp_bitcnt_t
-mpn_scan0 (mp_srcptr ptr, mp_bitcnt_t bit)
-{
- mp_size_t i;
- i = bit / GMP_LIMB_BITS;
-
- return mpn_common_scan (~ptr[i] & (GMP_LIMB_MAX << (bit % GMP_LIMB_BITS)),
- i, ptr, i, GMP_LIMB_MAX);
-}
-
-void
-mpn_com (mp_ptr rp, mp_srcptr up, mp_size_t n)
-{
- while (--n >= 0)
- *rp++ = ~ *up++;
-}
-
-mp_limb_t
-mpn_neg (mp_ptr rp, mp_srcptr up, mp_size_t n)
-{
- while (*up == 0)
- {
- *rp = 0;
- if (!--n)
- return 0;
- ++up; ++rp;
- }
- *rp = - *up;
- mpn_com (++rp, ++up, --n);
- return 1;
-}
-
-
-/* MPN division interface. */
-
-/* The 3/2 inverse is defined as
-
- m = floor( (B^3-1) / (B u1 + u0)) - B
-*/
-mp_limb_t
-mpn_invert_3by2 (mp_limb_t u1, mp_limb_t u0)
-{
- mp_limb_t r, m;
-
- {
- mp_limb_t p, ql;
- unsigned ul, uh, qh;
-
- assert (sizeof (unsigned) * 2 >= sizeof (mp_limb_t));
- /* For notation, let b denote the half-limb base, so that B = b^2.
- Split u1 = b uh + ul. */
- ul = u1 & GMP_LLIMB_MASK;
- uh = u1 >> (GMP_LIMB_BITS / 2);
-
- /* Approximation of the high half of quotient. Differs from the 2/1
- inverse of the half limb uh, since we have already subtracted
- u0. */
- qh = (u1 ^ GMP_LIMB_MAX) / uh;
-
- /* Adjust to get a half-limb 3/2 inverse, i.e., we want
-
- qh' = floor( (b^3 - 1) / u) - b = floor ((b^3 - b u - 1) / u
- = floor( (b (~u) + b-1) / u),
-
- and the remainder
-
- r = b (~u) + b-1 - qh (b uh + ul)
- = b (~u - qh uh) + b-1 - qh ul
-
- Subtraction of qh ul may underflow, which implies adjustments.
- But by normalization, 2 u >= B > qh ul, so we need to adjust by
- at most 2.
- */
-
- r = ((~u1 - (mp_limb_t) qh * uh) << (GMP_LIMB_BITS / 2)) | GMP_LLIMB_MASK;
-
- p = (mp_limb_t) qh * ul;
- /* Adjustment steps taken from udiv_qrnnd_c */
- if (r < p)
- {
- qh--;
- r += u1;
- if (r >= u1) /* i.e. we didn't get carry when adding to r */
- if (r < p)
- {
- qh--;
- r += u1;
- }
- }
- r -= p;
-
- /* Low half of the quotient is
-
- ql = floor ( (b r + b-1) / u1).
-
- This is a 3/2 division (on half-limbs), for which qh is a
- suitable inverse. */
-
- p = (r >> (GMP_LIMB_BITS / 2)) * qh + r;
- /* Unlike full-limb 3/2, we can add 1 without overflow. For this to
- work, it is essential that ql is a full mp_limb_t. */
- ql = (p >> (GMP_LIMB_BITS / 2)) + 1;
-
- /* By the 3/2 trick, we don't need the high half limb. */
- r = (r << (GMP_LIMB_BITS / 2)) + GMP_LLIMB_MASK - ql * u1;
-
- if (r >= (GMP_LIMB_MAX & (p << (GMP_LIMB_BITS / 2))))
- {
- ql--;
- r += u1;
- }
- m = ((mp_limb_t) qh << (GMP_LIMB_BITS / 2)) + ql;
- if (r >= u1)
- {
- m++;
- r -= u1;
- }
- }
-
- /* Now m is the 2/1 inverse of u1. If u0 > 0, adjust it to become a
- 3/2 inverse. */
- if (u0 > 0)
- {
- mp_limb_t th, tl;
- r = ~r;
- r += u0;
- if (r < u0)
- {
- m--;
- if (r >= u1)
- {
- m--;
- r -= u1;
- }
- r -= u1;
- }
- gmp_umul_ppmm (th, tl, u0, m);
- r += th;
- if (r < th)
- {
- m--;
- m -= ((r > u1) | ((r == u1) & (tl > u0)));
- }
- }
-
- return m;
-}
-
-struct gmp_div_inverse
-{
- /* Normalization shift count. */
- unsigned shift;
- /* Normalized divisor (d0 unused for mpn_div_qr_1) */
- mp_limb_t d1, d0;
- /* Inverse, for 2/1 or 3/2. */
- mp_limb_t di;
-};
-
-static void
-mpn_div_qr_1_invert (struct gmp_div_inverse *inv, mp_limb_t d)
-{
- unsigned shift;
-
- assert (d > 0);
- gmp_clz (shift, d);
- inv->shift = shift;
- inv->d1 = d << shift;
- inv->di = mpn_invert_limb (inv->d1);
-}
-
-static void
-mpn_div_qr_2_invert (struct gmp_div_inverse *inv,
- mp_limb_t d1, mp_limb_t d0)
-{
- unsigned shift;
-
- assert (d1 > 0);
- gmp_clz (shift, d1);
- inv->shift = shift;
- if (shift > 0)
- {
- d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift));
- d0 <<= shift;
- }
- inv->d1 = d1;
- inv->d0 = d0;
- inv->di = mpn_invert_3by2 (d1, d0);
-}
-
-static void
-mpn_div_qr_invert (struct gmp_div_inverse *inv,
- mp_srcptr dp, mp_size_t dn)
-{
- assert (dn > 0);
-
- if (dn == 1)
- mpn_div_qr_1_invert (inv, dp[0]);
- else if (dn == 2)
- mpn_div_qr_2_invert (inv, dp[1], dp[0]);
- else
- {
- unsigned shift;
- mp_limb_t d1, d0;
-
- d1 = dp[dn-1];
- d0 = dp[dn-2];
- assert (d1 > 0);
- gmp_clz (shift, d1);
- inv->shift = shift;
- if (shift > 0)
- {
- d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift));
- d0 = (d0 << shift) | (dp[dn-3] >> (GMP_LIMB_BITS - shift));
- }
- inv->d1 = d1;
- inv->d0 = d0;
- inv->di = mpn_invert_3by2 (d1, d0);
- }
-}
-
-/* Not matching current public gmp interface, rather corresponding to
- the sbpi1_div_* functions. */
-static mp_limb_t
-mpn_div_qr_1_preinv (mp_ptr qp, mp_srcptr np, mp_size_t nn,
- const struct gmp_div_inverse *inv)
-{
- mp_limb_t d, di;
- mp_limb_t r;
- mp_ptr tp = NULL;
- mp_size_t tn = 0;
-
- if (inv->shift > 0)
- {
- /* Shift, reusing qp area if possible. In-place shift if qp == np. */
- tp = qp;
- if (!tp)
- {
- tn = nn;
- tp = gmp_alloc_limbs (tn);
- }
- r = mpn_lshift (tp, np, nn, inv->shift);
- np = tp;
- }
- else
- r = 0;
-
- d = inv->d1;
- di = inv->di;
- while (--nn >= 0)
- {
- mp_limb_t q;
-
- gmp_udiv_qrnnd_preinv (q, r, r, np[nn], d, di);
- if (qp)
- qp[nn] = q;
- }
- if (tn)
- gmp_free_limbs (tp, tn);
-
- return r >> inv->shift;
-}
-
-static void
-mpn_div_qr_2_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn,
- const struct gmp_div_inverse *inv)
-{
- unsigned shift;
- mp_size_t i;
- mp_limb_t d1, d0, di, r1, r0;
-
- assert (nn >= 2);
- shift = inv->shift;
- d1 = inv->d1;
- d0 = inv->d0;
- di = inv->di;
-
- if (shift > 0)
- r1 = mpn_lshift (np, np, nn, shift);
- else
- r1 = 0;
-
- r0 = np[nn - 1];
-
- i = nn - 2;
- do
- {
- mp_limb_t n0, q;
- n0 = np[i];
- gmp_udiv_qr_3by2 (q, r1, r0, r1, r0, n0, d1, d0, di);
-
- if (qp)
- qp[i] = q;
- }
- while (--i >= 0);
-
- if (shift > 0)
- {
- assert ((r0 & (GMP_LIMB_MAX >> (GMP_LIMB_BITS - shift))) == 0);
- r0 = (r0 >> shift) | (r1 << (GMP_LIMB_BITS - shift));
- r1 >>= shift;
- }
-
- np[1] = r1;
- np[0] = r0;
-}
-
-static void
-mpn_div_qr_pi1 (mp_ptr qp,
- mp_ptr np, mp_size_t nn, mp_limb_t n1,
- mp_srcptr dp, mp_size_t dn,
- mp_limb_t dinv)
-{
- mp_size_t i;
-
- mp_limb_t d1, d0;
- mp_limb_t cy, cy1;
- mp_limb_t q;
-
- assert (dn > 2);
- assert (nn >= dn);
-
- d1 = dp[dn - 1];
- d0 = dp[dn - 2];
-
- assert ((d1 & GMP_LIMB_HIGHBIT) != 0);
- /* Iteration variable is the index of the q limb.
- *
- * We divide <n1, np[dn-1+i], np[dn-2+i], np[dn-3+i],..., np[i]>
- * by <d1, d0, dp[dn-3], ..., dp[0] >
- */
-
- i = nn - dn;
- do
- {
- mp_limb_t n0 = np[dn-1+i];
-
- if (n1 == d1 && n0 == d0)
- {
- q = GMP_LIMB_MAX;
- mpn_submul_1 (np+i, dp, dn, q);
- n1 = np[dn-1+i]; /* update n1, last loop's value will now be invalid */
- }
- else
- {
- gmp_udiv_qr_3by2 (q, n1, n0, n1, n0, np[dn-2+i], d1, d0, dinv);
-
- cy = mpn_submul_1 (np + i, dp, dn-2, q);
-
- cy1 = n0 < cy;
- n0 = n0 - cy;
- cy = n1 < cy1;
- n1 = n1 - cy1;
- np[dn-2+i] = n0;
-
- if (cy != 0)
- {
- n1 += d1 + mpn_add_n (np + i, np + i, dp, dn - 1);
- q--;
- }
- }
-
- if (qp)
- qp[i] = q;
- }
- while (--i >= 0);
-
- np[dn - 1] = n1;
-}
-
-static void
-mpn_div_qr_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn,
- mp_srcptr dp, mp_size_t dn,
- const struct gmp_div_inverse *inv)
-{
- assert (dn > 0);
- assert (nn >= dn);
-
- if (dn == 1)
- np[0] = mpn_div_qr_1_preinv (qp, np, nn, inv);
- else if (dn == 2)
- mpn_div_qr_2_preinv (qp, np, nn, inv);
- else
- {
- mp_limb_t nh;
- unsigned shift;
-
- assert (inv->d1 == dp[dn-1]);
- assert (inv->d0 == dp[dn-2]);
- assert ((inv->d1 & GMP_LIMB_HIGHBIT) != 0);
-
- shift = inv->shift;
- if (shift > 0)
- nh = mpn_lshift (np, np, nn, shift);
- else
- nh = 0;
-
- mpn_div_qr_pi1 (qp, np, nn, nh, dp, dn, inv->di);
-
- if (shift > 0)
- gmp_assert_nocarry (mpn_rshift (np, np, dn, shift));
- }
-}
-
-static void
-mpn_div_qr (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
-{
- struct gmp_div_inverse inv;
- mp_ptr tp = NULL;
-
- assert (dn > 0);
- assert (nn >= dn);
-
- mpn_div_qr_invert (&inv, dp, dn);
- if (dn > 2 && inv.shift > 0)
- {
- tp = gmp_alloc_limbs (dn);
- gmp_assert_nocarry (mpn_lshift (tp, dp, dn, inv.shift));
- dp = tp;
- }
- mpn_div_qr_preinv (qp, np, nn, dp, dn, &inv);
- if (tp)
- gmp_free_limbs (tp, dn);
-}
-
-
-/* MPN base conversion. */
-static unsigned
-mpn_base_power_of_two_p (unsigned b)
-{
- switch (b)
- {
- case 2: return 1;
- case 4: return 2;
- case 8: return 3;
- case 16: return 4;
- case 32: return 5;
- case 64: return 6;
- case 128: return 7;
- case 256: return 8;
- default: return 0;
- }
-}
-
-struct mpn_base_info
-{
- /* bb is the largest power of the base which fits in one limb, and
- exp is the corresponding exponent. */
- unsigned exp;
- mp_limb_t bb;
-};
-
-static void
-mpn_get_base_info (struct mpn_base_info *info, mp_limb_t b)
-{
- mp_limb_t m;
- mp_limb_t p;
- unsigned exp;
-
- m = GMP_LIMB_MAX / b;
- for (exp = 1, p = b; p <= m; exp++)
- p *= b;
-
- info->exp = exp;
- info->bb = p;
-}
-
-static mp_bitcnt_t
-mpn_limb_size_in_base_2 (mp_limb_t u)
-{
- unsigned shift;
-
- assert (u > 0);
- gmp_clz (shift, u);
- return GMP_LIMB_BITS - shift;
-}
-
-static size_t
-mpn_get_str_bits (unsigned char *sp, unsigned bits, mp_srcptr up, mp_size_t un)
-{
- unsigned char mask;
- size_t sn, j;
- mp_size_t i;
- unsigned shift;
-
- sn = ((un - 1) * GMP_LIMB_BITS + mpn_limb_size_in_base_2 (up[un-1])
- + bits - 1) / bits;
-
- mask = (1U << bits) - 1;
-
- for (i = 0, j = sn, shift = 0; j-- > 0;)
- {
- unsigned char digit = up[i] >> shift;
-
- shift += bits;
-
- if (shift >= GMP_LIMB_BITS && ++i < un)
- {
- shift -= GMP_LIMB_BITS;
- digit |= up[i] << (bits - shift);
- }
- sp[j] = digit & mask;
- }
- return sn;
-}
-
-/* We generate digits from the least significant end, and reverse at
- the end. */
-static size_t
-mpn_limb_get_str (unsigned char *sp, mp_limb_t w,
- const struct gmp_div_inverse *binv)
-{
- mp_size_t i;
- for (i = 0; w > 0; i++)
- {
- mp_limb_t h, l, r;
-
- h = w >> (GMP_LIMB_BITS - binv->shift);
- l = w << binv->shift;
-
- gmp_udiv_qrnnd_preinv (w, r, h, l, binv->d1, binv->di);
- assert ((r & (GMP_LIMB_MAX >> (GMP_LIMB_BITS - binv->shift))) == 0);
- r >>= binv->shift;
-
- sp[i] = r;
- }
- return i;
-}
-
-static size_t
-mpn_get_str_other (unsigned char *sp,
- int base, const struct mpn_base_info *info,
- mp_ptr up, mp_size_t un)
-{
- struct gmp_div_inverse binv;
- size_t sn;
- size_t i;
-
- mpn_div_qr_1_invert (&binv, base);
-
- sn = 0;
-
- if (un > 1)
- {
- struct gmp_div_inverse bbinv;
- mpn_div_qr_1_invert (&bbinv, info->bb);
-
- do
- {
- mp_limb_t w;
- size_t done;
- w = mpn_div_qr_1_preinv (up, up, un, &bbinv);
- un -= (up[un-1] == 0);
- done = mpn_limb_get_str (sp + sn, w, &binv);
-
- for (sn += done; done < info->exp; done++)
- sp[sn++] = 0;
- }
- while (un > 1);
- }
- sn += mpn_limb_get_str (sp + sn, up[0], &binv);
-
- /* Reverse order */
- for (i = 0; 2*i + 1 < sn; i++)
- {
- unsigned char t = sp[i];
- sp[i] = sp[sn - i - 1];
- sp[sn - i - 1] = t;
- }
-
- return sn;
-}
-
-size_t
-mpn_get_str (unsigned char *sp, int base, mp_ptr up, mp_size_t un)
-{
- unsigned bits;
-
- assert (un > 0);
- assert (up[un-1] > 0);
-
- bits = mpn_base_power_of_two_p (base);
- if (bits)
- return mpn_get_str_bits (sp, bits, up, un);
- else
- {
- struct mpn_base_info info;
-
- mpn_get_base_info (&info, base);
- return mpn_get_str_other (sp, base, &info, up, un);
- }
-}
-
-static mp_size_t
-mpn_set_str_bits (mp_ptr rp, const unsigned char *sp, size_t sn,
- unsigned bits)
-{
- mp_size_t rn;
- mp_limb_t limb;
- unsigned shift;
-
- for (limb = 0, rn = 0, shift = 0; sn-- > 0; )
- {
- limb |= (mp_limb_t) sp[sn] << shift;
- shift += bits;
- if (shift >= GMP_LIMB_BITS)
- {
- shift -= GMP_LIMB_BITS;
- rp[rn++] = limb;
- /* Next line is correct also if shift == 0,
- bits == 8, and mp_limb_t == unsigned char. */
- limb = (unsigned int) sp[sn] >> (bits - shift);
- }
- }
- if (limb != 0)
- rp[rn++] = limb;
- else
- rn = mpn_normalized_size (rp, rn);
- return rn;
-}
-
-/* Result is usually normalized, except for all-zero input, in which
- case a single zero limb is written at *RP, and 1 is returned. */
-static mp_size_t
-mpn_set_str_other (mp_ptr rp, const unsigned char *sp, size_t sn,
- mp_limb_t b, const struct mpn_base_info *info)
-{
- mp_size_t rn;
- mp_limb_t w;
- unsigned k;
- size_t j;
-
- assert (sn > 0);
-
- k = 1 + (sn - 1) % info->exp;
-
- j = 0;
- w = sp[j++];
- while (--k != 0)
- w = w * b + sp[j++];
-
- rp[0] = w;
-
- for (rn = 1; j < sn;)
- {
- mp_limb_t cy;
-
- w = sp[j++];
- for (k = 1; k < info->exp; k++)
- w = w * b + sp[j++];
-
- cy = mpn_mul_1 (rp, rp, rn, info->bb);
- cy += mpn_add_1 (rp, rp, rn, w);
- if (cy > 0)
- rp[rn++] = cy;
- }
- assert (j == sn);
-
- return rn;
-}
-
-mp_size_t
-mpn_set_str (mp_ptr rp, const unsigned char *sp, size_t sn, int base)
-{
- unsigned bits;
-
- if (sn == 0)
- return 0;
-
- bits = mpn_base_power_of_two_p (base);
- if (bits)
- return mpn_set_str_bits (rp, sp, sn, bits);
- else
- {
- struct mpn_base_info info;
-
- mpn_get_base_info (&info, base);
- return mpn_set_str_other (rp, sp, sn, base, &info);
- }
-}
-
-
-/* MPZ interface */
-void
-mpz_init (mpz_t r)
-{
- static const mp_limb_t dummy_limb = GMP_LIMB_MAX & 0xc1a0;
-
- r->_mp_alloc = 0;
- r->_mp_size = 0;
- r->_mp_d = (mp_ptr) &dummy_limb;
-}
-
-/* The utility of this function is a bit limited, since many functions
- assigns the result variable using mpz_swap. */
-void
-mpz_init2 (mpz_t r, mp_bitcnt_t bits)
-{
- mp_size_t rn;
-
- bits -= (bits != 0); /* Round down, except if 0 */
- rn = 1 + bits / GMP_LIMB_BITS;
-
- r->_mp_alloc = rn;
- r->_mp_size = 0;
- r->_mp_d = gmp_alloc_limbs (rn);
-}
-
-void
-mpz_clear (mpz_t r)
-{
- if (r->_mp_alloc)
- gmp_free_limbs (r->_mp_d, r->_mp_alloc);
-}
-
-static mp_ptr
-mpz_realloc (mpz_t r, mp_size_t size)
-{
- size = GMP_MAX (size, 1);
-
- if (r->_mp_alloc)
- r->_mp_d = gmp_realloc_limbs (r->_mp_d, r->_mp_alloc, size);
- else
- r->_mp_d = gmp_alloc_limbs (size);
- r->_mp_alloc = size;
-
- if (GMP_ABS (r->_mp_size) > size)
- r->_mp_size = 0;
-
- return r->_mp_d;
-}
-
-/* Realloc for an mpz_t WHAT if it has less than NEEDED limbs. */
-#define MPZ_REALLOC(z,n) ((n) > (z)->_mp_alloc \
- ? mpz_realloc(z,n) \
- : (z)->_mp_d)
-
-/* MPZ assignment and basic conversions. */
-void
-mpz_set_si (mpz_t r, signed long int x)
-{
- if (x >= 0)
- mpz_set_ui (r, x);
- else /* (x < 0) */
- if (GMP_LIMB_BITS < GMP_ULONG_BITS)
- {
- mpz_set_ui (r, GMP_NEG_CAST (unsigned long int, x));
- mpz_neg (r, r);
- }
- else
- {
- r->_mp_size = -1;
- MPZ_REALLOC (r, 1)[0] = GMP_NEG_CAST (unsigned long int, x);
- }
-}
-
-void
-mpz_set_ui (mpz_t r, unsigned long int x)
-{
- if (x > 0)
- {
- r->_mp_size = 1;
- MPZ_REALLOC (r, 1)[0] = x;
- if (GMP_LIMB_BITS < GMP_ULONG_BITS)
- {
- int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS;
- while (x >>= LOCAL_GMP_LIMB_BITS)
- {
- ++ r->_mp_size;
- MPZ_REALLOC (r, r->_mp_size)[r->_mp_size - 1] = x;
- }
- }
- }
- else
- r->_mp_size = 0;
-}
-
-void
-mpz_set (mpz_t r, const mpz_t x)
-{
- /* Allow the NOP r == x */
- if (r != x)
- {
- mp_size_t n;
- mp_ptr rp;
-
- n = GMP_ABS (x->_mp_size);
- rp = MPZ_REALLOC (r, n);
-
- mpn_copyi (rp, x->_mp_d, n);
- r->_mp_size = x->_mp_size;
- }
-}
-
-void
-mpz_init_set_si (mpz_t r, signed long int x)
-{
- mpz_init (r);
- mpz_set_si (r, x);
-}
-
-void
-mpz_init_set_ui (mpz_t r, unsigned long int x)
-{
- mpz_init (r);
- mpz_set_ui (r, x);
-}
-
-void
-mpz_init_set (mpz_t r, const mpz_t x)
-{
- mpz_init (r);
- mpz_set (r, x);
-}
-
-int
-mpz_fits_slong_p (const mpz_t u)
-{
- return mpz_cmp_si (u, LONG_MAX) <= 0 && mpz_cmp_si (u, LONG_MIN) >= 0;
-}
-
-static int
-mpn_absfits_ulong_p (mp_srcptr up, mp_size_t un)
-{
- int ulongsize = GMP_ULONG_BITS / GMP_LIMB_BITS;
- mp_limb_t ulongrem = 0;
-
- if (GMP_ULONG_BITS % GMP_LIMB_BITS != 0)
- ulongrem = (mp_limb_t) (ULONG_MAX >> GMP_LIMB_BITS * ulongsize) + 1;
-
- return un <= ulongsize || (up[ulongsize] < ulongrem && un == ulongsize + 1);
-}
-
-int
-mpz_fits_ulong_p (const mpz_t u)
-{
- mp_size_t us = u->_mp_size;
-
- return us >= 0 && mpn_absfits_ulong_p (u->_mp_d, us);
-}
-
-int
-mpz_fits_sint_p (const mpz_t u)
-{
- return mpz_cmp_si (u, INT_MAX) <= 0 && mpz_cmp_si (u, INT_MIN) >= 0;
-}
-
-int
-mpz_fits_uint_p (const mpz_t u)
-{
- return u->_mp_size >= 0 && mpz_cmpabs_ui (u, UINT_MAX) <= 0;
-}
-
-int
-mpz_fits_sshort_p (const mpz_t u)
-{
- return mpz_cmp_si (u, SHRT_MAX) <= 0 && mpz_cmp_si (u, SHRT_MIN) >= 0;
-}
-
-int
-mpz_fits_ushort_p (const mpz_t u)
-{
- return u->_mp_size >= 0 && mpz_cmpabs_ui (u, USHRT_MAX) <= 0;
-}
-
-long int
-mpz_get_si (const mpz_t u)
-{
- unsigned long r = mpz_get_ui (u);
- unsigned long c = -LONG_MAX - LONG_MIN;
-
- if (u->_mp_size < 0)
- /* This expression is necessary to properly handle -LONG_MIN */
- return -(long) c - (long) ((r - c) & LONG_MAX);
- else
- return (long) (r & LONG_MAX);
-}
-
-unsigned long int
-mpz_get_ui (const mpz_t u)
-{
- if (GMP_LIMB_BITS < GMP_ULONG_BITS)
- {
- int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS;
- unsigned long r = 0;
- mp_size_t n = GMP_ABS (u->_mp_size);
- n = GMP_MIN (n, 1 + (mp_size_t) (GMP_ULONG_BITS - 1) / GMP_LIMB_BITS);
- while (--n >= 0)
- r = (r << LOCAL_GMP_LIMB_BITS) + u->_mp_d[n];
- return r;
- }
-
- return u->_mp_size == 0 ? 0 : u->_mp_d[0];
-}
-
-size_t
-mpz_size (const mpz_t u)
-{
- return GMP_ABS (u->_mp_size);
-}
-
-mp_limb_t
-mpz_getlimbn (const mpz_t u, mp_size_t n)
-{
- if (n >= 0 && n < GMP_ABS (u->_mp_size))
- return u->_mp_d[n];
- else
- return 0;
-}
-
-void
-mpz_realloc2 (mpz_t x, mp_bitcnt_t n)
-{
- mpz_realloc (x, 1 + (n - (n != 0)) / GMP_LIMB_BITS);
-}
-
-mp_srcptr
-mpz_limbs_read (mpz_srcptr x)
-{
- return x->_mp_d;
-}
-
-mp_ptr
-mpz_limbs_modify (mpz_t x, mp_size_t n)
-{
- assert (n > 0);
- return MPZ_REALLOC (x, n);
-}
-
-mp_ptr
-mpz_limbs_write (mpz_t x, mp_size_t n)
-{
- return mpz_limbs_modify (x, n);
-}
-
-void
-mpz_limbs_finish (mpz_t x, mp_size_t xs)
-{
- mp_size_t xn;
- xn = mpn_normalized_size (x->_mp_d, GMP_ABS (xs));
- x->_mp_size = xs < 0 ? -xn : xn;
-}
-
-static mpz_srcptr
-mpz_roinit_normal_n (mpz_t x, mp_srcptr xp, mp_size_t xs)
-{
- x->_mp_alloc = 0;
- x->_mp_d = (mp_ptr) xp;
- x->_mp_size = xs;
- return x;
-}
-
-mpz_srcptr
-mpz_roinit_n (mpz_t x, mp_srcptr xp, mp_size_t xs)
-{
- mpz_roinit_normal_n (x, xp, xs);
- mpz_limbs_finish (x, xs);
- return x;
-}
-
-
-/* Conversions and comparison to double. */
-void
-mpz_set_d (mpz_t r, double x)
-{
- int sign;
- mp_ptr rp;
- mp_size_t rn, i;
- double B;
- double Bi;
- mp_limb_t f;
-
- /* x != x is true when x is a NaN, and x == x * 0.5 is true when x is
- zero or infinity. */
- if (x != x || x == x * 0.5)
- {
- r->_mp_size = 0;
- return;
- }
-
- sign = x < 0.0 ;
- if (sign)
- x = - x;
-
- if (x < 1.0)
- {
- r->_mp_size = 0;
- return;
- }
- B = 4.0 * (double) (GMP_LIMB_HIGHBIT >> 1);
- Bi = 1.0 / B;
- for (rn = 1; x >= B; rn++)
- x *= Bi;
-
- rp = MPZ_REALLOC (r, rn);
-
- f = (mp_limb_t) x;
- x -= f;
- assert (x < 1.0);
- i = rn-1;
- rp[i] = f;
- while (--i >= 0)
- {
- x = B * x;
- f = (mp_limb_t) x;
- x -= f;
- assert (x < 1.0);
- rp[i] = f;
- }
-
- r->_mp_size = sign ? - rn : rn;
-}
-
-void
-mpz_init_set_d (mpz_t r, double x)
-{
- mpz_init (r);
- mpz_set_d (r, x);
-}
-
-double
-mpz_get_d (const mpz_t u)
-{
- int m;
- mp_limb_t l;
- mp_size_t un;
- double x;
- double B = 4.0 * (double) (GMP_LIMB_HIGHBIT >> 1);
-
- un = GMP_ABS (u->_mp_size);
-
- if (un == 0)
- return 0.0;
-
- l = u->_mp_d[--un];
- gmp_clz (m, l);
- m = m + GMP_DBL_MANT_BITS - GMP_LIMB_BITS;
- if (m < 0)
- l &= GMP_LIMB_MAX << -m;
-
- for (x = l; --un >= 0;)
- {
- x = B*x;
- if (m > 0) {
- l = u->_mp_d[un];
- m -= GMP_LIMB_BITS;
- if (m < 0)
- l &= GMP_LIMB_MAX << -m;
- x += l;
- }
- }
-
- if (u->_mp_size < 0)
- x = -x;
-
- return x;
-}
-
-int
-mpz_cmpabs_d (const mpz_t x, double d)
-{
- mp_size_t xn;
- double B, Bi;
- mp_size_t i;
-
- xn = x->_mp_size;
- d = GMP_ABS (d);
-
- if (xn != 0)
- {
- xn = GMP_ABS (xn);
-
- B = 4.0 * (double) (GMP_LIMB_HIGHBIT >> 1);
- Bi = 1.0 / B;
-
- /* Scale d so it can be compared with the top limb. */
- for (i = 1; i < xn; i++)
- d *= Bi;
-
- if (d >= B)
- return -1;
-
- /* Compare floor(d) to top limb, subtract and cancel when equal. */
- for (i = xn; i-- > 0;)
- {
- mp_limb_t f, xl;
-
- f = (mp_limb_t) d;
- xl = x->_mp_d[i];
- if (xl > f)
- return 1;
- else if (xl < f)
- return -1;
- d = B * (d - f);
- }
- }
- return - (d > 0.0);
-}
-
-int
-mpz_cmp_d (const mpz_t x, double d)
-{
- if (x->_mp_size < 0)
- {
- if (d >= 0.0)
- return -1;
- else
- return -mpz_cmpabs_d (x, d);
- }
- else
- {
- if (d < 0.0)
- return 1;
- else
- return mpz_cmpabs_d (x, d);
- }
-}
-
-
-/* MPZ comparisons and the like. */
-int
-mpz_sgn (const mpz_t u)
-{
- return GMP_CMP (u->_mp_size, 0);
-}
-
-int
-mpz_cmp_si (const mpz_t u, long v)
-{
- mp_size_t usize = u->_mp_size;
-
- if (v >= 0)
- return mpz_cmp_ui (u, v);
- else if (usize >= 0)
- return 1;
- else
- return - mpz_cmpabs_ui (u, GMP_NEG_CAST (unsigned long int, v));
-}
-
-int
-mpz_cmp_ui (const mpz_t u, unsigned long v)
-{
- mp_size_t usize = u->_mp_size;
-
- if (usize < 0)
- return -1;
- else
- return mpz_cmpabs_ui (u, v);
-}
-
-int
-mpz_cmp (const mpz_t a, const mpz_t b)
-{
- mp_size_t asize = a->_mp_size;
- mp_size_t bsize = b->_mp_size;
-
- if (asize != bsize)
- return (asize < bsize) ? -1 : 1;
- else if (asize >= 0)
- return mpn_cmp (a->_mp_d, b->_mp_d, asize);
- else
- return mpn_cmp (b->_mp_d, a->_mp_d, -asize);
-}
-
-int
-mpz_cmpabs_ui (const mpz_t u, unsigned long v)
-{
- mp_size_t un = GMP_ABS (u->_mp_size);
-
- if (! mpn_absfits_ulong_p (u->_mp_d, un))
- return 1;
- else
- {
- unsigned long uu = mpz_get_ui (u);
- return GMP_CMP(uu, v);
- }
-}
-
-int
-mpz_cmpabs (const mpz_t u, const mpz_t v)
-{
- return mpn_cmp4 (u->_mp_d, GMP_ABS (u->_mp_size),
- v->_mp_d, GMP_ABS (v->_mp_size));
-}
-
-void
-mpz_abs (mpz_t r, const mpz_t u)
-{
- mpz_set (r, u);
- r->_mp_size = GMP_ABS (r->_mp_size);
-}
-
-void
-mpz_neg (mpz_t r, const mpz_t u)
-{
- mpz_set (r, u);
- r->_mp_size = -r->_mp_size;
-}
-
-void
-mpz_swap (mpz_t u, mpz_t v)
-{
- MP_SIZE_T_SWAP (u->_mp_alloc, v->_mp_alloc);
- MPN_PTR_SWAP (u->_mp_d, u->_mp_size, v->_mp_d, v->_mp_size);
-}
-
-
-/* MPZ addition and subtraction */
-
-
-void
-mpz_add_ui (mpz_t r, const mpz_t a, unsigned long b)
-{
- mpz_t bb;
- mpz_init_set_ui (bb, b);
- mpz_add (r, a, bb);
- mpz_clear (bb);
-}
-
-void
-mpz_sub_ui (mpz_t r, const mpz_t a, unsigned long b)
-{
- mpz_ui_sub (r, b, a);
- mpz_neg (r, r);
-}
-
-void
-mpz_ui_sub (mpz_t r, unsigned long a, const mpz_t b)
-{
- mpz_neg (r, b);
- mpz_add_ui (r, r, a);
-}
-
-static mp_size_t
-mpz_abs_add (mpz_t r, const mpz_t a, const mpz_t b)
-{
- mp_size_t an = GMP_ABS (a->_mp_size);
- mp_size_t bn = GMP_ABS (b->_mp_size);
- mp_ptr rp;
- mp_limb_t cy;
-
- if (an < bn)
- {
- MPZ_SRCPTR_SWAP (a, b);
- MP_SIZE_T_SWAP (an, bn);
- }
-
- rp = MPZ_REALLOC (r, an + 1);
- cy = mpn_add (rp, a->_mp_d, an, b->_mp_d, bn);
-
- rp[an] = cy;
-
- return an + cy;
-}
-
-static mp_size_t
-mpz_abs_sub (mpz_t r, const mpz_t a, const mpz_t b)
-{
- mp_size_t an = GMP_ABS (a->_mp_size);
- mp_size_t bn = GMP_ABS (b->_mp_size);
- int cmp;
- mp_ptr rp;
-
- cmp = mpn_cmp4 (a->_mp_d, an, b->_mp_d, bn);
- if (cmp > 0)
- {
- rp = MPZ_REALLOC (r, an);
- gmp_assert_nocarry (mpn_sub (rp, a->_mp_d, an, b->_mp_d, bn));
- return mpn_normalized_size (rp, an);
- }
- else if (cmp < 0)
- {
- rp = MPZ_REALLOC (r, bn);
- gmp_assert_nocarry (mpn_sub (rp, b->_mp_d, bn, a->_mp_d, an));
- return -mpn_normalized_size (rp, bn);
- }
- else
- return 0;
-}
-
-void
-mpz_add (mpz_t r, const mpz_t a, const mpz_t b)
-{
- mp_size_t rn;
-
- if ( (a->_mp_size ^ b->_mp_size) >= 0)
- rn = mpz_abs_add (r, a, b);
- else
- rn = mpz_abs_sub (r, a, b);
-
- r->_mp_size = a->_mp_size >= 0 ? rn : - rn;
-}
-
-void
-mpz_sub (mpz_t r, const mpz_t a, const mpz_t b)
-{
- mp_size_t rn;
-
- if ( (a->_mp_size ^ b->_mp_size) >= 0)
- rn = mpz_abs_sub (r, a, b);
- else
- rn = mpz_abs_add (r, a, b);
-
- r->_mp_size = a->_mp_size >= 0 ? rn : - rn;
-}
-
-
-/* MPZ multiplication */
-void
-mpz_mul_si (mpz_t r, const mpz_t u, long int v)
-{
- if (v < 0)
- {
- mpz_mul_ui (r, u, GMP_NEG_CAST (unsigned long int, v));
- mpz_neg (r, r);
- }
- else
- mpz_mul_ui (r, u, v);
-}
-
-void
-mpz_mul_ui (mpz_t r, const mpz_t u, unsigned long int v)
-{
- mpz_t vv;
- mpz_init_set_ui (vv, v);
- mpz_mul (r, u, vv);
- mpz_clear (vv);
- return;
-}
-
-void
-mpz_mul (mpz_t r, const mpz_t u, const mpz_t v)
-{
- int sign;
- mp_size_t un, vn, rn;
- mpz_t t;
- mp_ptr tp;
-
- un = u->_mp_size;
- vn = v->_mp_size;
-
- if (un == 0 || vn == 0)
- {
- r->_mp_size = 0;
- return;
- }
-
- sign = (un ^ vn) < 0;
-
- un = GMP_ABS (un);
- vn = GMP_ABS (vn);
-
- mpz_init2 (t, (un + vn) * GMP_LIMB_BITS);
-
- tp = t->_mp_d;
- if (un >= vn)
- mpn_mul (tp, u->_mp_d, un, v->_mp_d, vn);
- else
- mpn_mul (tp, v->_mp_d, vn, u->_mp_d, un);
-
- rn = un + vn;
- rn -= tp[rn-1] == 0;
-
- t->_mp_size = sign ? - rn : rn;
- mpz_swap (r, t);
- mpz_clear (t);
-}
-
-void
-mpz_mul_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bits)
-{
- mp_size_t un, rn;
- mp_size_t limbs;
- unsigned shift;
- mp_ptr rp;
-
- un = GMP_ABS (u->_mp_size);
- if (un == 0)
- {
- r->_mp_size = 0;
- return;
- }
-
- limbs = bits / GMP_LIMB_BITS;
- shift = bits % GMP_LIMB_BITS;
-
- rn = un + limbs + (shift > 0);
- rp = MPZ_REALLOC (r, rn);
- if (shift > 0)
- {
- mp_limb_t cy = mpn_lshift (rp + limbs, u->_mp_d, un, shift);
- rp[rn-1] = cy;
- rn -= (cy == 0);
- }
- else
- mpn_copyd (rp + limbs, u->_mp_d, un);
-
- mpn_zero (rp, limbs);
-
- r->_mp_size = (u->_mp_size < 0) ? - rn : rn;
-}
-
-void
-mpz_addmul_ui (mpz_t r, const mpz_t u, unsigned long int v)
-{
- mpz_t t;
- mpz_init_set_ui (t, v);
- mpz_mul (t, u, t);
- mpz_add (r, r, t);
- mpz_clear (t);
-}
-
-void
-mpz_submul_ui (mpz_t r, const mpz_t u, unsigned long int v)
-{
- mpz_t t;
- mpz_init_set_ui (t, v);
- mpz_mul (t, u, t);
- mpz_sub (r, r, t);
- mpz_clear (t);
-}
-
-void
-mpz_addmul (mpz_t r, const mpz_t u, const mpz_t v)
-{
- mpz_t t;
- mpz_init (t);
- mpz_mul (t, u, v);
- mpz_add (r, r, t);
- mpz_clear (t);
-}
-
-void
-mpz_submul (mpz_t r, const mpz_t u, const mpz_t v)
-{
- mpz_t t;
- mpz_init (t);
- mpz_mul (t, u, v);
- mpz_sub (r, r, t);
- mpz_clear (t);
-}
-
-
-/* MPZ division */
-enum mpz_div_round_mode { GMP_DIV_FLOOR, GMP_DIV_CEIL, GMP_DIV_TRUNC };
-
-/* Allows q or r to be zero. Returns 1 iff remainder is non-zero. */
-static int
-mpz_div_qr (mpz_t q, mpz_t r,
- const mpz_t n, const mpz_t d, enum mpz_div_round_mode mode)
-{
- mp_size_t ns, ds, nn, dn, qs;
- ns = n->_mp_size;
- ds = d->_mp_size;
-
- if (ds == 0)
- gmp_die("mpz_div_qr: Divide by zero.");
-
- if (ns == 0)
- {
- if (q)
- q->_mp_size = 0;
- if (r)
- r->_mp_size = 0;
- return 0;
- }
-
- nn = GMP_ABS (ns);
- dn = GMP_ABS (ds);
-
- qs = ds ^ ns;
-
- if (nn < dn)
- {
- if (mode == GMP_DIV_CEIL && qs >= 0)
- {
- /* q = 1, r = n - d */
- if (r)
- mpz_sub (r, n, d);
- if (q)
- mpz_set_ui (q, 1);
- }
- else if (mode == GMP_DIV_FLOOR && qs < 0)
- {
- /* q = -1, r = n + d */
- if (r)
- mpz_add (r, n, d);
- if (q)
- mpz_set_si (q, -1);
- }
- else
- {
- /* q = 0, r = d */
- if (r)
- mpz_set (r, n);
- if (q)
- q->_mp_size = 0;
- }
- return 1;
- }
- else
- {
- mp_ptr np, qp;
- mp_size_t qn, rn;
- mpz_t tq, tr;
-
- mpz_init_set (tr, n);
- np = tr->_mp_d;
-
- qn = nn - dn + 1;
-
- if (q)
- {
- mpz_init2 (tq, qn * GMP_LIMB_BITS);
- qp = tq->_mp_d;
- }
- else
- qp = NULL;
-
- mpn_div_qr (qp, np, nn, d->_mp_d, dn);
-
- if (qp)
- {
- qn -= (qp[qn-1] == 0);
-
- tq->_mp_size = qs < 0 ? -qn : qn;
- }
- rn = mpn_normalized_size (np, dn);
- tr->_mp_size = ns < 0 ? - rn : rn;
-
- if (mode == GMP_DIV_FLOOR && qs < 0 && rn != 0)
- {
- if (q)
- mpz_sub_ui (tq, tq, 1);
- if (r)
- mpz_add (tr, tr, d);
- }
- else if (mode == GMP_DIV_CEIL && qs >= 0 && rn != 0)
- {
- if (q)
- mpz_add_ui (tq, tq, 1);
- if (r)
- mpz_sub (tr, tr, d);
- }
-
- if (q)
- {
- mpz_swap (tq, q);
- mpz_clear (tq);
- }
- if (r)
- mpz_swap (tr, r);
-
- mpz_clear (tr);
-
- return rn != 0;
- }
-}
-
-void
-mpz_cdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d)
-{
- mpz_div_qr (q, r, n, d, GMP_DIV_CEIL);
-}
-
-void
-mpz_fdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d)
-{
- mpz_div_qr (q, r, n, d, GMP_DIV_FLOOR);
-}
-
-void
-mpz_tdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d)
-{
- mpz_div_qr (q, r, n, d, GMP_DIV_TRUNC);
-}
-
-void
-mpz_cdiv_q (mpz_t q, const mpz_t n, const mpz_t d)
-{
- mpz_div_qr (q, NULL, n, d, GMP_DIV_CEIL);
-}
-
-void
-mpz_fdiv_q (mpz_t q, const mpz_t n, const mpz_t d)
-{
- mpz_div_qr (q, NULL, n, d, GMP_DIV_FLOOR);
-}
-
-void
-mpz_tdiv_q (mpz_t q, const mpz_t n, const mpz_t d)
-{
- mpz_div_qr (q, NULL, n, d, GMP_DIV_TRUNC);
-}
-
-void
-mpz_cdiv_r (mpz_t r, const mpz_t n, const mpz_t d)
-{
- mpz_div_qr (NULL, r, n, d, GMP_DIV_CEIL);
-}
-
-void
-mpz_fdiv_r (mpz_t r, const mpz_t n, const mpz_t d)
-{
- mpz_div_qr (NULL, r, n, d, GMP_DIV_FLOOR);
-}
-
-void
-mpz_tdiv_r (mpz_t r, const mpz_t n, const mpz_t d)
-{
- mpz_div_qr (NULL, r, n, d, GMP_DIV_TRUNC);
-}
-
-void
-mpz_mod (mpz_t r, const mpz_t n, const mpz_t d)
-{
- mpz_div_qr (NULL, r, n, d, d->_mp_size >= 0 ? GMP_DIV_FLOOR : GMP_DIV_CEIL);
-}
-
-static void
-mpz_div_q_2exp (mpz_t q, const mpz_t u, mp_bitcnt_t bit_index,
- enum mpz_div_round_mode mode)
-{
- mp_size_t un, qn;
- mp_size_t limb_cnt;
- mp_ptr qp;
- int adjust;
-
- un = u->_mp_size;
- if (un == 0)
- {
- q->_mp_size = 0;
- return;
- }
- limb_cnt = bit_index / GMP_LIMB_BITS;
- qn = GMP_ABS (un) - limb_cnt;
- bit_index %= GMP_LIMB_BITS;
-
- if (mode == ((un > 0) ? GMP_DIV_CEIL : GMP_DIV_FLOOR)) /* un != 0 here. */
- /* Note: Below, the final indexing at limb_cnt is valid because at
- that point we have qn > 0. */
- adjust = (qn <= 0
- || !mpn_zero_p (u->_mp_d, limb_cnt)
- || (u->_mp_d[limb_cnt]
- & (((mp_limb_t) 1 << bit_index) - 1)));
- else
- adjust = 0;
-
- if (qn <= 0)
- qn = 0;
- else
- {
- qp = MPZ_REALLOC (q, qn);
-
- if (bit_index != 0)
- {
- mpn_rshift (qp, u->_mp_d + limb_cnt, qn, bit_index);
- qn -= qp[qn - 1] == 0;
- }
- else
- {
- mpn_copyi (qp, u->_mp_d + limb_cnt, qn);
- }
- }
-
- q->_mp_size = qn;
-
- if (adjust)
- mpz_add_ui (q, q, 1);
- if (un < 0)
- mpz_neg (q, q);
-}
-
-static void
-mpz_div_r_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bit_index,
- enum mpz_div_round_mode mode)
-{
- mp_size_t us, un, rn;
- mp_ptr rp;
- mp_limb_t mask;
-
- us = u->_mp_size;
- if (us == 0 || bit_index == 0)
- {
- r->_mp_size = 0;
- return;
- }
- rn = (bit_index + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS;
- assert (rn > 0);
-
- rp = MPZ_REALLOC (r, rn);
- un = GMP_ABS (us);
-
- mask = GMP_LIMB_MAX >> (rn * GMP_LIMB_BITS - bit_index);
-
- if (rn > un)
- {
- /* Quotient (with truncation) is zero, and remainder is
- non-zero */
- if (mode == ((us > 0) ? GMP_DIV_CEIL : GMP_DIV_FLOOR)) /* us != 0 here. */
- {
- /* Have to negate and sign extend. */
- mp_size_t i;
-
- gmp_assert_nocarry (! mpn_neg (rp, u->_mp_d, un));
- for (i = un; i < rn - 1; i++)
- rp[i] = GMP_LIMB_MAX;
-
- rp[rn-1] = mask;
- us = -us;
- }
- else
- {
- /* Just copy */
- if (r != u)
- mpn_copyi (rp, u->_mp_d, un);
-
- rn = un;
- }
- }
- else
- {
- if (r != u)
- mpn_copyi (rp, u->_mp_d, rn - 1);
-
- rp[rn-1] = u->_mp_d[rn-1] & mask;
-
- if (mode == ((us > 0) ? GMP_DIV_CEIL : GMP_DIV_FLOOR)) /* us != 0 here. */
- {
- /* If r != 0, compute 2^{bit_count} - r. */
- mpn_neg (rp, rp, rn);
-
- rp[rn-1] &= mask;
-
- /* us is not used for anything else, so we can modify it
- here to indicate flipped sign. */
- us = -us;
- }
- }
- rn = mpn_normalized_size (rp, rn);
- r->_mp_size = us < 0 ? -rn : rn;
-}
-
-void
-mpz_cdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
-{
- mpz_div_q_2exp (r, u, cnt, GMP_DIV_CEIL);
-}
-
-void
-mpz_fdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
-{
- mpz_div_q_2exp (r, u, cnt, GMP_DIV_FLOOR);
-}
-
-void
-mpz_tdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
-{
- mpz_div_q_2exp (r, u, cnt, GMP_DIV_TRUNC);
-}
-
-void
-mpz_cdiv_r_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
-{
- mpz_div_r_2exp (r, u, cnt, GMP_DIV_CEIL);
-}
-
-void
-mpz_fdiv_r_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
-{
- mpz_div_r_2exp (r, u, cnt, GMP_DIV_FLOOR);
-}
-
-void
-mpz_tdiv_r_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
-{
- mpz_div_r_2exp (r, u, cnt, GMP_DIV_TRUNC);
-}
-
-void
-mpz_divexact (mpz_t q, const mpz_t n, const mpz_t d)
-{
- gmp_assert_nocarry (mpz_div_qr (q, NULL, n, d, GMP_DIV_TRUNC));
-}
-
-int
-mpz_divisible_p (const mpz_t n, const mpz_t d)
-{
- return mpz_div_qr (NULL, NULL, n, d, GMP_DIV_TRUNC) == 0;
-}
-
-int
-mpz_congruent_p (const mpz_t a, const mpz_t b, const mpz_t m)
-{
- mpz_t t;
- int res;
-
- /* a == b (mod 0) iff a == b */
- if (mpz_sgn (m) == 0)
- return (mpz_cmp (a, b) == 0);
-
- mpz_init (t);
- mpz_sub (t, a, b);
- res = mpz_divisible_p (t, m);
- mpz_clear (t);
-
- return res;
-}
-
-static unsigned long
-mpz_div_qr_ui (mpz_t q, mpz_t r,
- const mpz_t n, unsigned long d, enum mpz_div_round_mode mode)
-{
- unsigned long ret;
- mpz_t rr, dd;
-
- mpz_init (rr);
- mpz_init_set_ui (dd, d);
- mpz_div_qr (q, rr, n, dd, mode);
- mpz_clear (dd);
- ret = mpz_get_ui (rr);
-
- if (r)
- mpz_swap (r, rr);
- mpz_clear (rr);
-
- return ret;
-}
-
-unsigned long
-mpz_cdiv_qr_ui (mpz_t q, mpz_t r, const mpz_t n, unsigned long d)
-{
- return mpz_div_qr_ui (q, r, n, d, GMP_DIV_CEIL);
-}
-
-unsigned long
-mpz_fdiv_qr_ui (mpz_t q, mpz_t r, const mpz_t n, unsigned long d)
-{
- return mpz_div_qr_ui (q, r, n, d, GMP_DIV_FLOOR);
-}
-
-unsigned long
-mpz_tdiv_qr_ui (mpz_t q, mpz_t r, const mpz_t n, unsigned long d)
-{
- return mpz_div_qr_ui (q, r, n, d, GMP_DIV_TRUNC);
-}
-
-unsigned long
-mpz_cdiv_q_ui (mpz_t q, const mpz_t n, unsigned long d)
-{
- return mpz_div_qr_ui (q, NULL, n, d, GMP_DIV_CEIL);
-}
-
-unsigned long
-mpz_fdiv_q_ui (mpz_t q, const mpz_t n, unsigned long d)
-{
- return mpz_div_qr_ui (q, NULL, n, d, GMP_DIV_FLOOR);
-}
-
-unsigned long
-mpz_tdiv_q_ui (mpz_t q, const mpz_t n, unsigned long d)
-{
- return mpz_div_qr_ui (q, NULL, n, d, GMP_DIV_TRUNC);
-}
-
-unsigned long
-mpz_cdiv_r_ui (mpz_t r, const mpz_t n, unsigned long d)
-{
- return mpz_div_qr_ui (NULL, r, n, d, GMP_DIV_CEIL);
-}
-unsigned long
-mpz_fdiv_r_ui (mpz_t r, const mpz_t n, unsigned long d)
-{
- return mpz_div_qr_ui (NULL, r, n, d, GMP_DIV_FLOOR);
-}
-unsigned long
-mpz_tdiv_r_ui (mpz_t r, const mpz_t n, unsigned long d)
-{
- return mpz_div_qr_ui (NULL, r, n, d, GMP_DIV_TRUNC);
-}
-
-unsigned long
-mpz_cdiv_ui (const mpz_t n, unsigned long d)
-{
- return mpz_div_qr_ui (NULL, NULL, n, d, GMP_DIV_CEIL);
-}
-
-unsigned long
-mpz_fdiv_ui (const mpz_t n, unsigned long d)
-{
- return mpz_div_qr_ui (NULL, NULL, n, d, GMP_DIV_FLOOR);
-}
-
-unsigned long
-mpz_tdiv_ui (const mpz_t n, unsigned long d)
-{
- return mpz_div_qr_ui (NULL, NULL, n, d, GMP_DIV_TRUNC);
-}
-
-unsigned long
-mpz_mod_ui (mpz_t r, const mpz_t n, unsigned long d)
-{
- return mpz_div_qr_ui (NULL, r, n, d, GMP_DIV_FLOOR);
-}
-
-void
-mpz_divexact_ui (mpz_t q, const mpz_t n, unsigned long d)
-{
- gmp_assert_nocarry (mpz_div_qr_ui (q, NULL, n, d, GMP_DIV_TRUNC));
-}
-
-int
-mpz_divisible_ui_p (const mpz_t n, unsigned long d)
-{
- return mpz_div_qr_ui (NULL, NULL, n, d, GMP_DIV_TRUNC) == 0;
-}
-
-
-/* GCD */
-static mp_limb_t
-mpn_gcd_11 (mp_limb_t u, mp_limb_t v)
-{
- unsigned shift;
-
- assert ( (u | v) > 0);
-
- if (u == 0)
- return v;
- else if (v == 0)
- return u;
-
- gmp_ctz (shift, u | v);
-
- u >>= shift;
- v >>= shift;
-
- if ( (u & 1) == 0)
- MP_LIMB_T_SWAP (u, v);
-
- while ( (v & 1) == 0)
- v >>= 1;
-
- while (u != v)
- {
- if (u > v)
- {
- u -= v;
- do
- u >>= 1;
- while ( (u & 1) == 0);
- }
- else
- {
- v -= u;
- do
- v >>= 1;
- while ( (v & 1) == 0);
- }
- }
- return u << shift;
-}
-
-unsigned long
-mpz_gcd_ui (mpz_t g, const mpz_t u, unsigned long v)
-{
- mpz_t t;
- mpz_init_set_ui(t, v);
- mpz_gcd (t, u, t);
- if (v > 0)
- v = mpz_get_ui (t);
-
- if (g)
- mpz_swap (t, g);
-
- mpz_clear (t);
-
- return v;
-}
-
-static mp_bitcnt_t
-mpz_make_odd (mpz_t r)
-{
- mp_bitcnt_t shift;
-
- assert (r->_mp_size > 0);
- /* Count trailing zeros, equivalent to mpn_scan1, because we know that there is a 1 */
- shift = mpn_scan1 (r->_mp_d, 0);
- mpz_tdiv_q_2exp (r, r, shift);
-
- return shift;
-}
-
-void
-mpz_gcd (mpz_t g, const mpz_t u, const mpz_t v)
-{
- mpz_t tu, tv;
- mp_bitcnt_t uz, vz, gz;
-
- if (u->_mp_size == 0)
- {
- mpz_abs (g, v);
- return;
- }
- if (v->_mp_size == 0)
- {
- mpz_abs (g, u);
- return;
- }
-
- mpz_init (tu);
- mpz_init (tv);
-
- mpz_abs (tu, u);
- uz = mpz_make_odd (tu);
- mpz_abs (tv, v);
- vz = mpz_make_odd (tv);
- gz = GMP_MIN (uz, vz);
-
- if (tu->_mp_size < tv->_mp_size)
- mpz_swap (tu, tv);
-
- mpz_tdiv_r (tu, tu, tv);
- if (tu->_mp_size == 0)
- {
- mpz_swap (g, tv);
- }
- else
- for (;;)
- {
- int c;
-
- mpz_make_odd (tu);
- c = mpz_cmp (tu, tv);
- if (c == 0)
- {
- mpz_swap (g, tu);
- break;
- }
- if (c < 0)
- mpz_swap (tu, tv);
-
- if (tv->_mp_size == 1)
- {
- mp_limb_t *gp;
-
- mpz_tdiv_r (tu, tu, tv);
- gp = MPZ_REALLOC (g, 1); /* gp = mpz_limbs_modify (g, 1); */
- *gp = mpn_gcd_11 (tu->_mp_d[0], tv->_mp_d[0]);
-
- g->_mp_size = *gp != 0; /* mpz_limbs_finish (g, 1); */
- break;
- }
- mpz_sub (tu, tu, tv);
- }
- mpz_clear (tu);
- mpz_clear (tv);
- mpz_mul_2exp (g, g, gz);
-}
-
-void
-mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v)
-{
- mpz_t tu, tv, s0, s1, t0, t1;
- mp_bitcnt_t uz, vz, gz;
- mp_bitcnt_t power;
-
- if (u->_mp_size == 0)
- {
- /* g = 0 u + sgn(v) v */
- signed long sign = mpz_sgn (v);
- mpz_abs (g, v);
- if (s)
- s->_mp_size = 0;
- if (t)
- mpz_set_si (t, sign);
- return;
- }
-
- if (v->_mp_size == 0)
- {
- /* g = sgn(u) u + 0 v */
- signed long sign = mpz_sgn (u);
- mpz_abs (g, u);
- if (s)
- mpz_set_si (s, sign);
- if (t)
- t->_mp_size = 0;
- return;
- }
-
- mpz_init (tu);
- mpz_init (tv);
- mpz_init (s0);
- mpz_init (s1);
- mpz_init (t0);
- mpz_init (t1);
-
- mpz_abs (tu, u);
- uz = mpz_make_odd (tu);
- mpz_abs (tv, v);
- vz = mpz_make_odd (tv);
- gz = GMP_MIN (uz, vz);
-
- uz -= gz;
- vz -= gz;
-
- /* Cofactors corresponding to odd gcd. gz handled later. */
- if (tu->_mp_size < tv->_mp_size)
- {
- mpz_swap (tu, tv);
- MPZ_SRCPTR_SWAP (u, v);
- MPZ_PTR_SWAP (s, t);
- MP_BITCNT_T_SWAP (uz, vz);
- }
-
- /* Maintain
- *
- * u = t0 tu + t1 tv
- * v = s0 tu + s1 tv
- *
- * where u and v denote the inputs with common factors of two
- * eliminated, and det (s0, t0; s1, t1) = 2^p. Then
- *
- * 2^p tu = s1 u - t1 v
- * 2^p tv = -s0 u + t0 v
- */
-
- /* After initial division, tu = q tv + tu', we have
- *
- * u = 2^uz (tu' + q tv)
- * v = 2^vz tv
- *
- * or
- *
- * t0 = 2^uz, t1 = 2^uz q
- * s0 = 0, s1 = 2^vz
- */
-
- mpz_tdiv_qr (t1, tu, tu, tv);
- mpz_mul_2exp (t1, t1, uz);
-
- mpz_setbit (s1, vz);
- power = uz + vz;
-
- if (tu->_mp_size > 0)
- {
- mp_bitcnt_t shift;
- shift = mpz_make_odd (tu);
- mpz_setbit (t0, uz + shift);
- power += shift;
-
- for (;;)
- {
- int c;
- c = mpz_cmp (tu, tv);
- if (c == 0)
- break;
-
- if (c < 0)
- {
- /* tv = tv' + tu
- *
- * u = t0 tu + t1 (tv' + tu) = (t0 + t1) tu + t1 tv'
- * v = s0 tu + s1 (tv' + tu) = (s0 + s1) tu + s1 tv' */
-
- mpz_sub (tv, tv, tu);
- mpz_add (t0, t0, t1);
- mpz_add (s0, s0, s1);
-
- shift = mpz_make_odd (tv);
- mpz_mul_2exp (t1, t1, shift);
- mpz_mul_2exp (s1, s1, shift);
- }
- else
- {
- mpz_sub (tu, tu, tv);
- mpz_add (t1, t0, t1);
- mpz_add (s1, s0, s1);
-
- shift = mpz_make_odd (tu);
- mpz_mul_2exp (t0, t0, shift);
- mpz_mul_2exp (s0, s0, shift);
- }
- power += shift;
- }
- }
- else
- mpz_setbit (t0, uz);
-
- /* Now tv = odd part of gcd, and -s0 and t0 are corresponding
- cofactors. */
-
- mpz_mul_2exp (tv, tv, gz);
- mpz_neg (s0, s0);
-
- /* 2^p g = s0 u + t0 v. Eliminate one factor of two at a time. To
- adjust cofactors, we need u / g and v / g */
-
- mpz_divexact (s1, v, tv);
- mpz_abs (s1, s1);
- mpz_divexact (t1, u, tv);
- mpz_abs (t1, t1);
-
- while (power-- > 0)
- {
- /* s0 u + t0 v = (s0 - v/g) u - (t0 + u/g) v */
- if (mpz_odd_p (s0) || mpz_odd_p (t0))
- {
- mpz_sub (s0, s0, s1);
- mpz_add (t0, t0, t1);
- }
- assert (mpz_even_p (t0) && mpz_even_p (s0));
- mpz_tdiv_q_2exp (s0, s0, 1);
- mpz_tdiv_q_2exp (t0, t0, 1);
- }
-
- /* Arrange so that |s| < |u| / 2g */
- mpz_add (s1, s0, s1);
- if (mpz_cmpabs (s0, s1) > 0)
- {
- mpz_swap (s0, s1);
- mpz_sub (t0, t0, t1);
- }
- if (u->_mp_size < 0)
- mpz_neg (s0, s0);
- if (v->_mp_size < 0)
- mpz_neg (t0, t0);
-
- mpz_swap (g, tv);
- if (s)
- mpz_swap (s, s0);
- if (t)
- mpz_swap (t, t0);
-
- mpz_clear (tu);
- mpz_clear (tv);
- mpz_clear (s0);
- mpz_clear (s1);
- mpz_clear (t0);
- mpz_clear (t1);
-}
-
-void
-mpz_lcm (mpz_t r, const mpz_t u, const mpz_t v)
-{
- mpz_t g;
-
- if (u->_mp_size == 0 || v->_mp_size == 0)
- {
- r->_mp_size = 0;
- return;
- }
-
- mpz_init (g);
-
- mpz_gcd (g, u, v);
- mpz_divexact (g, u, g);
- mpz_mul (r, g, v);
-
- mpz_clear (g);
- mpz_abs (r, r);
-}
-
-void
-mpz_lcm_ui (mpz_t r, const mpz_t u, unsigned long v)
-{
- if (v == 0 || u->_mp_size == 0)
- {
- r->_mp_size = 0;
- return;
- }
-
- v /= mpz_gcd_ui (NULL, u, v);
- mpz_mul_ui (r, u, v);
-
- mpz_abs (r, r);
-}
-
-int
-mpz_invert (mpz_t r, const mpz_t u, const mpz_t m)
-{
- mpz_t g, tr;
- int invertible;
-
- if (u->_mp_size == 0 || mpz_cmpabs_ui (m, 1) <= 0)
- return 0;
-
- mpz_init (g);
- mpz_init (tr);
-
- mpz_gcdext (g, tr, NULL, u, m);
- invertible = (mpz_cmp_ui (g, 1) == 0);
-
- if (invertible)
- {
- if (tr->_mp_size < 0)
- {
- if (m->_mp_size >= 0)
- mpz_add (tr, tr, m);
- else
- mpz_sub (tr, tr, m);
- }
- mpz_swap (r, tr);
- }
-
- mpz_clear (g);
- mpz_clear (tr);
- return invertible;
-}
-
-
-/* Higher level operations (sqrt, pow and root) */
-
-void
-mpz_pow_ui (mpz_t r, const mpz_t b, unsigned long e)
-{
- unsigned long bit;
- mpz_t tr;
- mpz_init_set_ui (tr, 1);
-
- bit = GMP_ULONG_HIGHBIT;
- do
- {
- mpz_mul (tr, tr, tr);
- if (e & bit)
- mpz_mul (tr, tr, b);
- bit >>= 1;
- }
- while (bit > 0);
-
- mpz_swap (r, tr);
- mpz_clear (tr);
-}
-
-void
-mpz_ui_pow_ui (mpz_t r, unsigned long blimb, unsigned long e)
-{
- mpz_t b;
-
- mpz_init_set_ui (b, blimb);
- mpz_pow_ui (r, b, e);
- mpz_clear (b);
-}
-
-void
-mpz_powm (mpz_t r, const mpz_t b, const mpz_t e, const mpz_t m)
-{
- mpz_t tr;
- mpz_t base;
- mp_size_t en, mn;
- mp_srcptr mp;
- struct gmp_div_inverse minv;
- unsigned shift;
- mp_ptr tp = NULL;
-
- en = GMP_ABS (e->_mp_size);
- mn = GMP_ABS (m->_mp_size);
- if (mn == 0)
- gmp_die ("mpz_powm: Zero modulo.");
-
- if (en == 0)
- {
- mpz_set_ui (r, mpz_cmpabs_ui (m, 1));
- return;
- }
-
- mp = m->_mp_d;
- mpn_div_qr_invert (&minv, mp, mn);
- shift = minv.shift;
-
- if (shift > 0)
- {
- /* To avoid shifts, we do all our reductions, except the final
- one, using a *normalized* m. */
- minv.shift = 0;
-
- tp = gmp_alloc_limbs (mn);
- gmp_assert_nocarry (mpn_lshift (tp, mp, mn, shift));
- mp = tp;
- }
-
- mpz_init (base);
-
- if (e->_mp_size < 0)
- {
- if (!mpz_invert (base, b, m))
- gmp_die ("mpz_powm: Negative exponent and non-invertible base.");
- }
- else
- {
- mp_size_t bn;
- mpz_abs (base, b);
-
- bn = base->_mp_size;
- if (bn >= mn)
- {
- mpn_div_qr_preinv (NULL, base->_mp_d, base->_mp_size, mp, mn, &minv);
- bn = mn;
- }
-
- /* We have reduced the absolute value. Now take care of the
- sign. Note that we get zero represented non-canonically as
- m. */
- if (b->_mp_size < 0)
- {
- mp_ptr bp = MPZ_REALLOC (base, mn);
- gmp_assert_nocarry (mpn_sub (bp, mp, mn, bp, bn));
- bn = mn;
- }
- base->_mp_size = mpn_normalized_size (base->_mp_d, bn);
- }
- mpz_init_set_ui (tr, 1);
-
- while (--en >= 0)
- {
- mp_limb_t w = e->_mp_d[en];
- mp_limb_t bit;
-
- bit = GMP_LIMB_HIGHBIT;
- do
- {
- mpz_mul (tr, tr, tr);
- if (w & bit)
- mpz_mul (tr, tr, base);
- if (tr->_mp_size > mn)
- {
- mpn_div_qr_preinv (NULL, tr->_mp_d, tr->_mp_size, mp, mn, &minv);
- tr->_mp_size = mpn_normalized_size (tr->_mp_d, mn);
- }
- bit >>= 1;
- }
- while (bit > 0);
- }
-
- /* Final reduction */
- if (tr->_mp_size >= mn)
- {
- minv.shift = shift;
- mpn_div_qr_preinv (NULL, tr->_mp_d, tr->_mp_size, mp, mn, &minv);
- tr->_mp_size = mpn_normalized_size (tr->_mp_d, mn);
- }
- if (tp)
- gmp_free_limbs (tp, mn);
-
- mpz_swap (r, tr);
- mpz_clear (tr);
- mpz_clear (base);
-}
-
-void
-mpz_powm_ui (mpz_t r, const mpz_t b, unsigned long elimb, const mpz_t m)
-{
- mpz_t e;
-
- mpz_init_set_ui (e, elimb);
- mpz_powm (r, b, e, m);
- mpz_clear (e);
-}
-
-/* x=trunc(y^(1/z)), r=y-x^z */
-void
-mpz_rootrem (mpz_t x, mpz_t r, const mpz_t y, unsigned long z)
-{
- int sgn;
- mp_bitcnt_t bc;
- mpz_t t, u;
-
- sgn = y->_mp_size < 0;
- if ((~z & sgn) != 0)
- gmp_die ("mpz_rootrem: Negative argument, with even root.");
- if (z == 0)
- gmp_die ("mpz_rootrem: Zeroth root.");
-
- if (mpz_cmpabs_ui (y, 1) <= 0) {
- if (x)
- mpz_set (x, y);
- if (r)
- r->_mp_size = 0;
- return;
- }
-
- mpz_init (u);
- mpz_init (t);
- bc = (mpz_sizeinbase (y, 2) - 1) / z + 1;
- mpz_setbit (t, bc);
-
- if (z == 2) /* simplify sqrt loop: z-1 == 1 */
- do {
- mpz_swap (u, t); /* u = x */
- mpz_tdiv_q (t, y, u); /* t = y/x */
- mpz_add (t, t, u); /* t = y/x + x */
- mpz_tdiv_q_2exp (t, t, 1); /* x'= (y/x + x)/2 */
- } while (mpz_cmpabs (t, u) < 0); /* |x'| < |x| */
- else /* z != 2 */ {
- mpz_t v;
-
- mpz_init (v);
- if (sgn)
- mpz_neg (t, t);
-
- do {
- mpz_swap (u, t); /* u = x */
- mpz_pow_ui (t, u, z - 1); /* t = x^(z-1) */
- mpz_tdiv_q (t, y, t); /* t = y/x^(z-1) */
- mpz_mul_ui (v, u, z - 1); /* v = x*(z-1) */
- mpz_add (t, t, v); /* t = y/x^(z-1) + x*(z-1) */
- mpz_tdiv_q_ui (t, t, z); /* x'=(y/x^(z-1) + x*(z-1))/z */
- } while (mpz_cmpabs (t, u) < 0); /* |x'| < |x| */
-
- mpz_clear (v);
- }
-
- if (r) {
- mpz_pow_ui (t, u, z);
- mpz_sub (r, y, t);
- }
- if (x)
- mpz_swap (x, u);
- mpz_clear (u);
- mpz_clear (t);
-}
-
-int
-mpz_root (mpz_t x, const mpz_t y, unsigned long z)
-{
- int res;
- mpz_t r;
-
- mpz_init (r);
- mpz_rootrem (x, r, y, z);
- res = r->_mp_size == 0;
- mpz_clear (r);
-
- return res;
-}
-
-/* Compute s = floor(sqrt(u)) and r = u - s^2. Allows r == NULL */
-void
-mpz_sqrtrem (mpz_t s, mpz_t r, const mpz_t u)
-{
- mpz_rootrem (s, r, u, 2);
-}
-
-void
-mpz_sqrt (mpz_t s, const mpz_t u)
-{
- mpz_rootrem (s, NULL, u, 2);
-}
-
-int
-mpz_perfect_square_p (const mpz_t u)
-{
- if (u->_mp_size <= 0)
- return (u->_mp_size == 0);
- else
- return mpz_root (NULL, u, 2);
-}
-
-int
-mpn_perfect_square_p (mp_srcptr p, mp_size_t n)
-{
- mpz_t t;
-
- assert (n > 0);
- assert (p [n-1] != 0);
- return mpz_root (NULL, mpz_roinit_normal_n (t, p, n), 2);
-}
-
-mp_size_t
-mpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr p, mp_size_t n)
-{
- mpz_t s, r, u;
- mp_size_t res;
-
- assert (n > 0);
- assert (p [n-1] != 0);
-
- mpz_init (r);
- mpz_init (s);
- mpz_rootrem (s, r, mpz_roinit_normal_n (u, p, n), 2);
-
- assert (s->_mp_size == (n+1)/2);
- mpn_copyd (sp, s->_mp_d, s->_mp_size);
- mpz_clear (s);
- res = r->_mp_size;
- if (rp)
- mpn_copyd (rp, r->_mp_d, res);
- mpz_clear (r);
- return res;
-}
-
-/* Combinatorics */
-
-void
-mpz_mfac_uiui (mpz_t x, unsigned long n, unsigned long m)
-{
- mpz_set_ui (x, n + (n == 0));
- if (m + 1 < 2) return;
- while (n > m + 1)
- mpz_mul_ui (x, x, n -= m);
-}
-
-void
-mpz_2fac_ui (mpz_t x, unsigned long n)
-{
- mpz_mfac_uiui (x, n, 2);
-}
-
-void
-mpz_fac_ui (mpz_t x, unsigned long n)
-{
- mpz_mfac_uiui (x, n, 1);
-}
-
-void
-mpz_bin_uiui (mpz_t r, unsigned long n, unsigned long k)
-{
- mpz_t t;
-
- mpz_set_ui (r, k <= n);
-
- if (k > (n >> 1))
- k = (k <= n) ? n - k : 0;
-
- mpz_init (t);
- mpz_fac_ui (t, k);
-
- for (; k > 0; --k)
- mpz_mul_ui (r, r, n--);
-
- mpz_divexact (r, r, t);
- mpz_clear (t);
-}
-
-
-/* Primality testing */
-
-/* Computes Kronecker (a/b) with odd b, a!=0 and GCD(a,b) = 1 */
-/* Adapted from JACOBI_BASE_METHOD==4 in mpn/generic/jacbase.c */
-static int
-gmp_jacobi_coprime (mp_limb_t a, mp_limb_t b)
-{
- int c, bit = 0;
-
- assert (b & 1);
- assert (a != 0);
- /* assert (mpn_gcd_11 (a, b) == 1); */
-
- /* Below, we represent a and b shifted right so that the least
- significant one bit is implicit. */
- b >>= 1;
-
- gmp_ctz(c, a);
- a >>= 1;
-
- for (;;)
- {
- a >>= c;
- /* (2/b) = -1 if b = 3 or 5 mod 8 */
- bit ^= c & (b ^ (b >> 1));
- if (a < b)
- {
- if (a == 0)
- return bit & 1 ? -1 : 1;
- bit ^= a & b;
- a = b - a;
- b -= a;
- }
- else
- {
- a -= b;
- assert (a != 0);
- }
-
- gmp_ctz(c, a);
- ++c;
- }
-}
-
-static void
-gmp_lucas_step_k_2k (mpz_t V, mpz_t Qk, const mpz_t n)
-{
- mpz_mod (Qk, Qk, n);
- /* V_{2k} <- V_k ^ 2 - 2Q^k */
- mpz_mul (V, V, V);
- mpz_submul_ui (V, Qk, 2);
- mpz_tdiv_r (V, V, n);
- /* Q^{2k} = (Q^k)^2 */
- mpz_mul (Qk, Qk, Qk);
-}
-
-/* Computes V_k, Q^k (mod n) for the Lucas' sequence */
-/* with P=1, Q=Q; k = (n>>b0)|1. */
-/* Requires an odd n > 4; b0 > 0; -2*Q must not overflow a long */
-/* Returns (U_k == 0) and sets V=V_k and Qk=Q^k. */
-static int
-gmp_lucas_mod (mpz_t V, mpz_t Qk, long Q,
- mp_bitcnt_t b0, const mpz_t n)
-{
- mp_bitcnt_t bs;
- mpz_t U;
- int res;
-
- assert (b0 > 0);
- assert (Q <= - (LONG_MIN / 2));
- assert (Q >= - (LONG_MAX / 2));
- assert (mpz_cmp_ui (n, 4) > 0);
- assert (mpz_odd_p (n));
-
- mpz_init_set_ui (U, 1); /* U1 = 1 */
- mpz_set_ui (V, 1); /* V1 = 1 */
- mpz_set_si (Qk, Q);
-
- for (bs = mpz_sizeinbase (n, 2) - 1; --bs >= b0;)
- {
- /* U_{2k} <- U_k * V_k */
- mpz_mul (U, U, V);
- /* V_{2k} <- V_k ^ 2 - 2Q^k */
- /* Q^{2k} = (Q^k)^2 */
- gmp_lucas_step_k_2k (V, Qk, n);
-
- /* A step k->k+1 is performed if the bit in $n$ is 1 */
- /* mpz_tstbit(n,bs) or the bit is 0 in $n$ but */
- /* should be 1 in $n+1$ (bs == b0) */
- if (b0 == bs || mpz_tstbit (n, bs))
- {
- /* Q^{k+1} <- Q^k * Q */
- mpz_mul_si (Qk, Qk, Q);
- /* U_{k+1} <- (U_k + V_k) / 2 */
- mpz_swap (U, V); /* Keep in V the old value of U_k */
- mpz_add (U, U, V);
- /* We have to compute U/2, so we need an even value, */
- /* equivalent (mod n) */
- if (mpz_odd_p (U))
- mpz_add (U, U, n);
- mpz_tdiv_q_2exp (U, U, 1);
- /* V_{k+1} <-(D*U_k + V_k) / 2 =
- U_{k+1} + (D-1)/2*U_k = U_{k+1} - 2Q*U_k */
- mpz_mul_si (V, V, -2*Q);
- mpz_add (V, U, V);
- mpz_tdiv_r (V, V, n);
- }
- mpz_tdiv_r (U, U, n);
- }
-
- res = U->_mp_size == 0;
- mpz_clear (U);
- return res;
-}
-
-/* Performs strong Lucas' test on x, with parameters suggested */
-/* for the BPSW test. Qk is only passed to recycle a variable. */
-/* Requires GCD (x,6) = 1.*/
-static int
-gmp_stronglucas (const mpz_t x, mpz_t Qk)
-{
- mp_bitcnt_t b0;
- mpz_t V, n;
- mp_limb_t maxD, D; /* The absolute value is stored. */
- long Q;
- mp_limb_t tl;
-
- /* Test on the absolute value. */
- mpz_roinit_normal_n (n, x->_mp_d, GMP_ABS (x->_mp_size));
-
- assert (mpz_odd_p (n));
- /* assert (mpz_gcd_ui (NULL, n, 6) == 1); */
- if (mpz_root (Qk, n, 2))
- return 0; /* A square is composite. */
-
- /* Check Ds up to square root (in case, n is prime)
- or avoid overflows */
- maxD = (Qk->_mp_size == 1) ? Qk->_mp_d [0] - 1 : GMP_LIMB_MAX;
-
- D = 3;
- /* Search a D such that (D/n) = -1 in the sequence 5,-7,9,-11,.. */
- /* For those Ds we have (D/n) = (n/|D|) */
- do
- {
- if (D >= maxD)
- return 1 + (D != GMP_LIMB_MAX); /* (1 + ! ~ D) */
- D += 2;
- tl = mpz_tdiv_ui (n, D);
- if (tl == 0)
- return 0;
- }
- while (gmp_jacobi_coprime (tl, D) == 1);
-
- mpz_init (V);
-
- /* n-(D/n) = n+1 = d*2^{b0}, with d = (n>>b0) | 1 */
- b0 = mpn_common_scan (~ n->_mp_d[0], 0, n->_mp_d, n->_mp_size, GMP_LIMB_MAX);
- /* b0 = mpz_scan0 (n, 0); */
-
- /* D= P^2 - 4Q; P = 1; Q = (1-D)/4 */
- Q = (D & 2) ? (long) (D >> 2) + 1 : -(long) (D >> 2);
-
- if (! gmp_lucas_mod (V, Qk, Q, b0, n)) /* If Ud != 0 */
- while (V->_mp_size != 0 && --b0 != 0) /* while Vk != 0 */
- /* V <- V ^ 2 - 2Q^k */
- /* Q^{2k} = (Q^k)^2 */
- gmp_lucas_step_k_2k (V, Qk, n);
-
- mpz_clear (V);
- return (b0 != 0);
-}
-
-static int
-gmp_millerrabin (const mpz_t n, const mpz_t nm1, mpz_t y,
- const mpz_t q, mp_bitcnt_t k)
-{
- assert (k > 0);
-
- /* Caller must initialize y to the base. */
- mpz_powm (y, y, q, n);
-
- if (mpz_cmp_ui (y, 1) == 0 || mpz_cmp (y, nm1) == 0)
- return 1;
-
- while (--k > 0)
- {
- mpz_powm_ui (y, y, 2, n);
- if (mpz_cmp (y, nm1) == 0)
- return 1;
- }
- return 0;
-}
-
-/* This product is 0xc0cfd797, and fits in 32 bits. */
-#define GMP_PRIME_PRODUCT \
- (3UL*5UL*7UL*11UL*13UL*17UL*19UL*23UL*29UL)
-
-/* Bit (p+1)/2 is set, for each odd prime <= 61 */
-#define GMP_PRIME_MASK 0xc96996dcUL
-
-int
-mpz_probab_prime_p (const mpz_t n, int reps)
-{
- mpz_t nm1;
- mpz_t q;
- mpz_t y;
- mp_bitcnt_t k;
- int is_prime;
- int j;
-
- /* Note that we use the absolute value of n only, for compatibility
- with the real GMP. */
- if (mpz_even_p (n))
- return (mpz_cmpabs_ui (n, 2) == 0) ? 2 : 0;
-
- /* Above test excludes n == 0 */
- assert (n->_mp_size != 0);
-
- if (mpz_cmpabs_ui (n, 64) < 0)
- return (GMP_PRIME_MASK >> (n->_mp_d[0] >> 1)) & 2;
-
- if (mpz_gcd_ui (NULL, n, GMP_PRIME_PRODUCT) != 1)
- return 0;
-
- /* All prime factors are >= 31. */
- if (mpz_cmpabs_ui (n, 31*31) < 0)
- return 2;
-
- mpz_init (nm1);
- mpz_init (q);
-
- /* Find q and k, where q is odd and n = 1 + 2**k * q. */
- mpz_abs (nm1, n);
- nm1->_mp_d[0] -= 1;
- /* Count trailing zeros, equivalent to mpn_scan1, because we know that there is a 1 */
- k = mpn_scan1 (nm1->_mp_d, 0);
- mpz_tdiv_q_2exp (q, nm1, k);
-
- /* BPSW test */
- mpz_init_set_ui (y, 2);
- is_prime = gmp_millerrabin (n, nm1, y, q, k) && gmp_stronglucas (n, y);
- reps -= 24; /* skip the first 24 repetitions */
-
- /* Use Miller-Rabin, with a deterministic sequence of bases, a[j] =
- j^2 + j + 41 using Euler's polynomial. We potentially stop early,
- if a[j] >= n - 1. Since n >= 31*31, this can happen only if reps >
- 30 (a[30] == 971 > 31*31 == 961). */
-
- for (j = 0; is_prime & (j < reps); j++)
- {
- mpz_set_ui (y, (unsigned long) j*j+j+41);
- if (mpz_cmp (y, nm1) >= 0)
- {
- /* Don't try any further bases. This "early" break does not affect
- the result for any reasonable reps value (<=5000 was tested) */
- assert (j >= 30);
- break;
- }
- is_prime = gmp_millerrabin (n, nm1, y, q, k);
- }
- mpz_clear (nm1);
- mpz_clear (q);
- mpz_clear (y);
-
- return is_prime;
-}
-
-
-/* Logical operations and bit manipulation. */
-
-/* Numbers are treated as if represented in two's complement (and
- infinitely sign extended). For a negative values we get the two's
- complement from -x = ~x + 1, where ~ is bitwise complement.
- Negation transforms
-
- xxxx10...0
-
- into
-
- yyyy10...0
-
- where yyyy is the bitwise complement of xxxx. So least significant
- bits, up to and including the first one bit, are unchanged, and
- the more significant bits are all complemented.
-
- To change a bit from zero to one in a negative number, subtract the
- corresponding power of two from the absolute value. This can never
- underflow. To change a bit from one to zero, add the corresponding
- power of two, and this might overflow. E.g., if x = -001111, the
- two's complement is 110001. Clearing the least significant bit, we
- get two's complement 110000, and -010000. */
-
-int
-mpz_tstbit (const mpz_t d, mp_bitcnt_t bit_index)
-{
- mp_size_t limb_index;
- unsigned shift;
- mp_size_t ds;
- mp_size_t dn;
- mp_limb_t w;
- int bit;
-
- ds = d->_mp_size;
- dn = GMP_ABS (ds);
- limb_index = bit_index / GMP_LIMB_BITS;
- if (limb_index >= dn)
- return ds < 0;
-
- shift = bit_index % GMP_LIMB_BITS;
- w = d->_mp_d[limb_index];
- bit = (w >> shift) & 1;
-
- if (ds < 0)
- {
- /* d < 0. Check if any of the bits below is set: If so, our bit
- must be complemented. */
- if (shift > 0 && (mp_limb_t) (w << (GMP_LIMB_BITS - shift)) > 0)
- return bit ^ 1;
- while (--limb_index >= 0)
- if (d->_mp_d[limb_index] > 0)
- return bit ^ 1;
- }
- return bit;
-}
-
-static void
-mpz_abs_add_bit (mpz_t d, mp_bitcnt_t bit_index)
-{
- mp_size_t dn, limb_index;
- mp_limb_t bit;
- mp_ptr dp;
-
- dn = GMP_ABS (d->_mp_size);
-
- limb_index = bit_index / GMP_LIMB_BITS;
- bit = (mp_limb_t) 1 << (bit_index % GMP_LIMB_BITS);
-
- if (limb_index >= dn)
- {
- mp_size_t i;
- /* The bit should be set outside of the end of the number.
- We have to increase the size of the number. */
- dp = MPZ_REALLOC (d, limb_index + 1);
-
- dp[limb_index] = bit;
- for (i = dn; i < limb_index; i++)
- dp[i] = 0;
- dn = limb_index + 1;
- }
- else
- {
- mp_limb_t cy;
-
- dp = d->_mp_d;
-
- cy = mpn_add_1 (dp + limb_index, dp + limb_index, dn - limb_index, bit);
- if (cy > 0)
- {
- dp = MPZ_REALLOC (d, dn + 1);
- dp[dn++] = cy;
- }
- }
-
- d->_mp_size = (d->_mp_size < 0) ? - dn : dn;
-}
-
-static void
-mpz_abs_sub_bit (mpz_t d, mp_bitcnt_t bit_index)
-{
- mp_size_t dn, limb_index;
- mp_ptr dp;
- mp_limb_t bit;
-
- dn = GMP_ABS (d->_mp_size);
- dp = d->_mp_d;
-
- limb_index = bit_index / GMP_LIMB_BITS;
- bit = (mp_limb_t) 1 << (bit_index % GMP_LIMB_BITS);
-
- assert (limb_index < dn);
-
- gmp_assert_nocarry (mpn_sub_1 (dp + limb_index, dp + limb_index,
- dn - limb_index, bit));
- dn = mpn_normalized_size (dp, dn);
- d->_mp_size = (d->_mp_size < 0) ? - dn : dn;
-}
-
-void
-mpz_setbit (mpz_t d, mp_bitcnt_t bit_index)
-{
- if (!mpz_tstbit (d, bit_index))
- {
- if (d->_mp_size >= 0)
- mpz_abs_add_bit (d, bit_index);
- else
- mpz_abs_sub_bit (d, bit_index);
- }
-}
-
-void
-mpz_clrbit (mpz_t d, mp_bitcnt_t bit_index)
-{
- if (mpz_tstbit (d, bit_index))
- {
- if (d->_mp_size >= 0)
- mpz_abs_sub_bit (d, bit_index);
- else
- mpz_abs_add_bit (d, bit_index);
- }
-}
-
-void
-mpz_combit (mpz_t d, mp_bitcnt_t bit_index)
-{
- if (mpz_tstbit (d, bit_index) ^ (d->_mp_size < 0))
- mpz_abs_sub_bit (d, bit_index);
- else
- mpz_abs_add_bit (d, bit_index);
-}
-
-void
-mpz_com (mpz_t r, const mpz_t u)
-{
- mpz_add_ui (r, u, 1);
- mpz_neg (r, r);
-}
-
-void
-mpz_and (mpz_t r, const mpz_t u, const mpz_t v)
-{
- mp_size_t un, vn, rn, i;
- mp_ptr up, vp, rp;
-
- mp_limb_t ux, vx, rx;
- mp_limb_t uc, vc, rc;
- mp_limb_t ul, vl, rl;
-
- un = GMP_ABS (u->_mp_size);
- vn = GMP_ABS (v->_mp_size);
- if (un < vn)
- {
- MPZ_SRCPTR_SWAP (u, v);
- MP_SIZE_T_SWAP (un, vn);
- }
- if (vn == 0)
- {
- r->_mp_size = 0;
- return;
- }
-
- uc = u->_mp_size < 0;
- vc = v->_mp_size < 0;
- rc = uc & vc;
-
- ux = -uc;
- vx = -vc;
- rx = -rc;
-
- /* If the smaller input is positive, higher limbs don't matter. */
- rn = vx ? un : vn;
-
- rp = MPZ_REALLOC (r, rn + (mp_size_t) rc);
-
- up = u->_mp_d;
- vp = v->_mp_d;
-
- i = 0;
- do
- {
- ul = (up[i] ^ ux) + uc;
- uc = ul < uc;
-
- vl = (vp[i] ^ vx) + vc;
- vc = vl < vc;
-
- rl = ( (ul & vl) ^ rx) + rc;
- rc = rl < rc;
- rp[i] = rl;
- }
- while (++i < vn);
- assert (vc == 0);
-
- for (; i < rn; i++)
- {
- ul = (up[i] ^ ux) + uc;
- uc = ul < uc;
-
- rl = ( (ul & vx) ^ rx) + rc;
- rc = rl < rc;
- rp[i] = rl;
- }
- if (rc)
- rp[rn++] = rc;
- else
- rn = mpn_normalized_size (rp, rn);
-
- r->_mp_size = rx ? -rn : rn;
-}
-
-void
-mpz_ior (mpz_t r, const mpz_t u, const mpz_t v)
-{
- mp_size_t un, vn, rn, i;
- mp_ptr up, vp, rp;
-
- mp_limb_t ux, vx, rx;
- mp_limb_t uc, vc, rc;
- mp_limb_t ul, vl, rl;
-
- un = GMP_ABS (u->_mp_size);
- vn = GMP_ABS (v->_mp_size);
- if (un < vn)
- {
- MPZ_SRCPTR_SWAP (u, v);
- MP_SIZE_T_SWAP (un, vn);
- }
- if (vn == 0)
- {
- mpz_set (r, u);
- return;
- }
-
- uc = u->_mp_size < 0;
- vc = v->_mp_size < 0;
- rc = uc | vc;
-
- ux = -uc;
- vx = -vc;
- rx = -rc;
-
- /* If the smaller input is negative, by sign extension higher limbs
- don't matter. */
- rn = vx ? vn : un;
-
- rp = MPZ_REALLOC (r, rn + (mp_size_t) rc);
-
- up = u->_mp_d;
- vp = v->_mp_d;
-
- i = 0;
- do
- {
- ul = (up[i] ^ ux) + uc;
- uc = ul < uc;
-
- vl = (vp[i] ^ vx) + vc;
- vc = vl < vc;
-
- rl = ( (ul | vl) ^ rx) + rc;
- rc = rl < rc;
- rp[i] = rl;
- }
- while (++i < vn);
- assert (vc == 0);
-
- for (; i < rn; i++)
- {
- ul = (up[i] ^ ux) + uc;
- uc = ul < uc;
-
- rl = ( (ul | vx) ^ rx) + rc;
- rc = rl < rc;
- rp[i] = rl;
- }
- if (rc)
- rp[rn++] = rc;
- else
- rn = mpn_normalized_size (rp, rn);
-
- r->_mp_size = rx ? -rn : rn;
-}
-
-void
-mpz_xor (mpz_t r, const mpz_t u, const mpz_t v)
-{
- mp_size_t un, vn, i;
- mp_ptr up, vp, rp;
-
- mp_limb_t ux, vx, rx;
- mp_limb_t uc, vc, rc;
- mp_limb_t ul, vl, rl;
-
- un = GMP_ABS (u->_mp_size);
- vn = GMP_ABS (v->_mp_size);
- if (un < vn)
- {
- MPZ_SRCPTR_SWAP (u, v);
- MP_SIZE_T_SWAP (un, vn);
- }
- if (vn == 0)
- {
- mpz_set (r, u);
- return;
- }
-
- uc = u->_mp_size < 0;
- vc = v->_mp_size < 0;
- rc = uc ^ vc;
-
- ux = -uc;
- vx = -vc;
- rx = -rc;
-
- rp = MPZ_REALLOC (r, un + (mp_size_t) rc);
-
- up = u->_mp_d;
- vp = v->_mp_d;
-
- i = 0;
- do
- {
- ul = (up[i] ^ ux) + uc;
- uc = ul < uc;
-
- vl = (vp[i] ^ vx) + vc;
- vc = vl < vc;
-
- rl = (ul ^ vl ^ rx) + rc;
- rc = rl < rc;
- rp[i] = rl;
- }
- while (++i < vn);
- assert (vc == 0);
-
- for (; i < un; i++)
- {
- ul = (up[i] ^ ux) + uc;
- uc = ul < uc;
-
- rl = (ul ^ ux) + rc;
- rc = rl < rc;
- rp[i] = rl;
- }
- if (rc)
- rp[un++] = rc;
- else
- un = mpn_normalized_size (rp, un);
-
- r->_mp_size = rx ? -un : un;
-}
-
-static unsigned
-gmp_popcount_limb (mp_limb_t x)
-{
- unsigned c;
-
- /* Do 16 bits at a time, to avoid limb-sized constants. */
- int LOCAL_SHIFT_BITS = 16;
- for (c = 0; x > 0;)
- {
- unsigned w = x - ((x >> 1) & 0x5555);
- w = ((w >> 2) & 0x3333) + (w & 0x3333);
- w = (w >> 4) + w;
- w = ((w >> 8) & 0x000f) + (w & 0x000f);
- c += w;
- if (GMP_LIMB_BITS > LOCAL_SHIFT_BITS)
- x >>= LOCAL_SHIFT_BITS;
- else
- x = 0;
- }
- return c;
-}
-
-mp_bitcnt_t
-mpn_popcount (mp_srcptr p, mp_size_t n)
-{
- mp_size_t i;
- mp_bitcnt_t c;
-
- for (c = 0, i = 0; i < n; i++)
- c += gmp_popcount_limb (p[i]);
-
- return c;
-}
-
-mp_bitcnt_t
-mpz_popcount (const mpz_t u)
-{
- mp_size_t un;
-
- un = u->_mp_size;
-
- if (un < 0)
- return ~(mp_bitcnt_t) 0;
-
- return mpn_popcount (u->_mp_d, un);
-}
-
-mp_bitcnt_t
-mpz_hamdist (const mpz_t u, const mpz_t v)
-{
- mp_size_t un, vn, i;
- mp_limb_t uc, vc, ul, vl, comp;
- mp_srcptr up, vp;
- mp_bitcnt_t c;
-
- un = u->_mp_size;
- vn = v->_mp_size;
-
- if ( (un ^ vn) < 0)
- return ~(mp_bitcnt_t) 0;
-
- comp = - (uc = vc = (un < 0));
- if (uc)
- {
- assert (vn < 0);
- un = -un;
- vn = -vn;
- }
-
- up = u->_mp_d;
- vp = v->_mp_d;
-
- if (un < vn)
- MPN_SRCPTR_SWAP (up, un, vp, vn);
-
- for (i = 0, c = 0; i < vn; i++)
- {
- ul = (up[i] ^ comp) + uc;
- uc = ul < uc;
-
- vl = (vp[i] ^ comp) + vc;
- vc = vl < vc;
-
- c += gmp_popcount_limb (ul ^ vl);
- }
- assert (vc == 0);
-
- for (; i < un; i++)
- {
- ul = (up[i] ^ comp) + uc;
- uc = ul < uc;
-
- c += gmp_popcount_limb (ul ^ comp);
- }
-
- return c;
-}
-
-mp_bitcnt_t
-mpz_scan1 (const mpz_t u, mp_bitcnt_t starting_bit)
-{
- mp_ptr up;
- mp_size_t us, un, i;
- mp_limb_t limb, ux;
-
- us = u->_mp_size;
- un = GMP_ABS (us);
- i = starting_bit / GMP_LIMB_BITS;
-
- /* Past the end there's no 1 bits for u>=0, or an immediate 1 bit
- for u<0. Notice this test picks up any u==0 too. */
- if (i >= un)
- return (us >= 0 ? ~(mp_bitcnt_t) 0 : starting_bit);
-
- up = u->_mp_d;
- ux = 0;
- limb = up[i];
-
- if (starting_bit != 0)
- {
- if (us < 0)
- {
- ux = mpn_zero_p (up, i);
- limb = ~ limb + ux;
- ux = - (mp_limb_t) (limb >= ux);
- }
-
- /* Mask to 0 all bits before starting_bit, thus ignoring them. */
- limb &= GMP_LIMB_MAX << (starting_bit % GMP_LIMB_BITS);
- }
-
- return mpn_common_scan (limb, i, up, un, ux);
-}
-
-mp_bitcnt_t
-mpz_scan0 (const mpz_t u, mp_bitcnt_t starting_bit)
-{
- mp_ptr up;
- mp_size_t us, un, i;
- mp_limb_t limb, ux;
-
- us = u->_mp_size;
- ux = - (mp_limb_t) (us >= 0);
- un = GMP_ABS (us);
- i = starting_bit / GMP_LIMB_BITS;
-
- /* When past end, there's an immediate 0 bit for u>=0, or no 0 bits for
- u<0. Notice this test picks up all cases of u==0 too. */
- if (i >= un)
- return (ux ? starting_bit : ~(mp_bitcnt_t) 0);
-
- up = u->_mp_d;
- limb = up[i] ^ ux;
-
- if (ux == 0)
- limb -= mpn_zero_p (up, i); /* limb = ~(~limb + zero_p) */
-
- /* Mask all bits before starting_bit, thus ignoring them. */
- limb &= GMP_LIMB_MAX << (starting_bit % GMP_LIMB_BITS);
-
- return mpn_common_scan (limb, i, up, un, ux);
-}
-
-
-/* MPZ base conversion. */
-
-size_t
-mpz_sizeinbase (const mpz_t u, int base)
-{
- mp_size_t un, tn;
- mp_srcptr up;
- mp_ptr tp;
- mp_bitcnt_t bits;
- struct gmp_div_inverse bi;
- size_t ndigits;
-
- assert (base >= 2);
- assert (base <= 62);
-
- un = GMP_ABS (u->_mp_size);
- if (un == 0)
- return 1;
-
- up = u->_mp_d;
-
- bits = (un - 1) * GMP_LIMB_BITS + mpn_limb_size_in_base_2 (up[un-1]);
- switch (base)
- {
- case 2:
- return bits;
- case 4:
- return (bits + 1) / 2;
- case 8:
- return (bits + 2) / 3;
- case 16:
- return (bits + 3) / 4;
- case 32:
- return (bits + 4) / 5;
- /* FIXME: Do something more clever for the common case of base
- 10. */
- }
-
- tp = gmp_alloc_limbs (un);
- mpn_copyi (tp, up, un);
- mpn_div_qr_1_invert (&bi, base);
-
- tn = un;
- ndigits = 0;
- do
- {
- ndigits++;
- mpn_div_qr_1_preinv (tp, tp, tn, &bi);
- tn -= (tp[tn-1] == 0);
- }
- while (tn > 0);
-
- gmp_free_limbs (tp, un);
- return ndigits;
-}
-
-char *
-mpz_get_str (char *sp, int base, const mpz_t u)
-{
- unsigned bits;
- const char *digits;
- mp_size_t un;
- size_t i, sn, osn;
-
- digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
- if (base > 1)
- {
- if (base <= 36)
- digits = "0123456789abcdefghijklmnopqrstuvwxyz";
- else if (base > 62)
- return NULL;
- }
- else if (base >= -1)
- base = 10;
- else
- {
- base = -base;
- if (base > 36)
- return NULL;
- }
-
- sn = 1 + mpz_sizeinbase (u, base);
- if (!sp)
- {
- osn = 1 + sn;
- sp = (char *) gmp_alloc (osn);
- }
- else
- osn = 0;
- un = GMP_ABS (u->_mp_size);
-
- if (un == 0)
- {
- sp[0] = '0';
- sn = 1;
- goto ret;
- }
-
- i = 0;
-
- if (u->_mp_size < 0)
- sp[i++] = '-';
-
- bits = mpn_base_power_of_two_p (base);
-
- if (bits)
- /* Not modified in this case. */
- sn = i + mpn_get_str_bits ((unsigned char *) sp + i, bits, u->_mp_d, un);
- else
- {
- struct mpn_base_info info;
- mp_ptr tp;
-
- mpn_get_base_info (&info, base);
- tp = gmp_alloc_limbs (un);
- mpn_copyi (tp, u->_mp_d, un);
-
- sn = i + mpn_get_str_other ((unsigned char *) sp + i, base, &info, tp, un);
- gmp_free_limbs (tp, un);
- }
-
- for (; i < sn; i++)
- sp[i] = digits[(unsigned char) sp[i]];
-
-ret:
- sp[sn] = '\0';
- if (osn && osn != sn + 1)
- sp = (char*) gmp_realloc (sp, osn, sn + 1);
- return sp;
-}
-
-int
-mpz_set_str (mpz_t r, const char *sp, int base)
-{
- unsigned bits, value_of_a;
- mp_size_t rn, alloc;
- mp_ptr rp;
- size_t dn, sn;
- int sign;
- unsigned char *dp;
-
- assert (base == 0 || (base >= 2 && base <= 62));
-
- while (isspace( (unsigned char) *sp))
- sp++;
-
- sign = (*sp == '-');
- sp += sign;
-
- if (base == 0)
- {
- if (sp[0] == '0')
- {
- if (sp[1] == 'x' || sp[1] == 'X')
- {
- base = 16;
- sp += 2;
- }
- else if (sp[1] == 'b' || sp[1] == 'B')
- {
- base = 2;
- sp += 2;
- }
- else
- base = 8;
- }
- else
- base = 10;
- }
-
- if (!*sp)
- {
- r->_mp_size = 0;
- return -1;
- }
- sn = strlen(sp);
- dp = (unsigned char *) gmp_alloc (sn);
-
- value_of_a = (base > 36) ? 36 : 10;
- for (dn = 0; *sp; sp++)
- {
- unsigned digit;
-
- if (isspace ((unsigned char) *sp))
- continue;
- else if (*sp >= '0' && *sp <= '9')
- digit = *sp - '0';
- else if (*sp >= 'a' && *sp <= 'z')
- digit = *sp - 'a' + value_of_a;
- else if (*sp >= 'A' && *sp <= 'Z')
- digit = *sp - 'A' + 10;
- else
- digit = base; /* fail */
-
- if (digit >= (unsigned) base)
- {
- gmp_free (dp, sn);
- r->_mp_size = 0;
- return -1;
- }
-
- dp[dn++] = digit;
- }
-
- if (!dn)
- {
- gmp_free (dp, sn);
- r->_mp_size = 0;
- return -1;
- }
- bits = mpn_base_power_of_two_p (base);
-
- if (bits > 0)
- {
- alloc = (dn * bits + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS;
- rp = MPZ_REALLOC (r, alloc);
- rn = mpn_set_str_bits (rp, dp, dn, bits);
- }
- else
- {
- struct mpn_base_info info;
- mpn_get_base_info (&info, base);
- alloc = (dn + info.exp - 1) / info.exp;
- rp = MPZ_REALLOC (r, alloc);
- rn = mpn_set_str_other (rp, dp, dn, base, &info);
- /* Normalization, needed for all-zero input. */
- assert (rn > 0);
- rn -= rp[rn-1] == 0;
- }
- assert (rn <= alloc);
- gmp_free (dp, sn);
-
- r->_mp_size = sign ? - rn : rn;
-
- return 0;
-}
-
-int
-mpz_init_set_str (mpz_t r, const char *sp, int base)
-{
- mpz_init (r);
- return mpz_set_str (r, sp, base);
-}
-
-size_t
-mpz_out_str (FILE *stream, int base, const mpz_t x)
-{
- char *str;
- size_t len, n;
-
- str = mpz_get_str (NULL, base, x);
- if (!str)
- return 0;
- len = strlen (str);
- n = fwrite (str, 1, len, stream);
- gmp_free (str, len + 1);
- return n;
-}
-
-
-static int
-gmp_detect_endian (void)
-{
- static const int i = 2;
- const unsigned char *p = (const unsigned char *) &i;
- return 1 - *p;
-}
-
-/* Import and export. Does not support nails. */
-void
-mpz_import (mpz_t r, size_t count, int order, size_t size, int endian,
- size_t nails, const void *src)
-{
- const unsigned char *p;
- ptrdiff_t word_step;
- mp_ptr rp;
- mp_size_t rn;
-
- /* The current (partial) limb. */
- mp_limb_t limb;
- /* The number of bytes already copied to this limb (starting from
- the low end). */
- size_t bytes;
- /* The index where the limb should be stored, when completed. */
- mp_size_t i;
-
- if (nails != 0)
- gmp_die ("mpz_import: Nails not supported.");
-
- assert (order == 1 || order == -1);
- assert (endian >= -1 && endian <= 1);
-
- if (endian == 0)
- endian = gmp_detect_endian ();
-
- p = (unsigned char *) src;
-
- word_step = (order != endian) ? 2 * size : 0;
-
- /* Process bytes from the least significant end, so point p at the
- least significant word. */
- if (order == 1)
- {
- p += size * (count - 1);
- word_step = - word_step;
- }
-
- /* And at least significant byte of that word. */
- if (endian == 1)
- p += (size - 1);
-
- rn = (size * count + sizeof(mp_limb_t) - 1) / sizeof(mp_limb_t);
- rp = MPZ_REALLOC (r, rn);
-
- for (limb = 0, bytes = 0, i = 0; count > 0; count--, p += word_step)
- {
- size_t j;
- for (j = 0; j < size; j++, p -= (ptrdiff_t) endian)
- {
- limb |= (mp_limb_t) *p << (bytes++ * CHAR_BIT);
- if (bytes == sizeof(mp_limb_t))
- {
- rp[i++] = limb;
- bytes = 0;
- limb = 0;
- }
- }
- }
- assert (i + (bytes > 0) == rn);
- if (limb != 0)
- rp[i++] = limb;
- else
- i = mpn_normalized_size (rp, i);
-
- r->_mp_size = i;
-}
-
-void *
-mpz_export (void *r, size_t *countp, int order, size_t size, int endian,
- size_t nails, const mpz_t u)
-{
- size_t count;
- mp_size_t un;
-
- if (nails != 0)
- gmp_die ("mpz_export: Nails not supported.");
-
- assert (order == 1 || order == -1);
- assert (endian >= -1 && endian <= 1);
- assert (size > 0 || u->_mp_size == 0);
-
- un = u->_mp_size;
- count = 0;
- if (un != 0)
- {
- size_t k;
- unsigned char *p;
- ptrdiff_t word_step;
- /* The current (partial) limb. */
- mp_limb_t limb;
- /* The number of bytes left to do in this limb. */
- size_t bytes;
- /* The index where the limb was read. */
- mp_size_t i;
-
- un = GMP_ABS (un);
-
- /* Count bytes in top limb. */
- limb = u->_mp_d[un-1];
- assert (limb != 0);
-
- k = (GMP_LIMB_BITS <= CHAR_BIT);
- if (!k)
- {
- do {
- int LOCAL_CHAR_BIT = CHAR_BIT;
- k++; limb >>= LOCAL_CHAR_BIT;
- } while (limb != 0);
- }
- /* else limb = 0; */
-
- count = (k + (un-1) * sizeof (mp_limb_t) + size - 1) / size;
-
- if (!r)
- r = gmp_alloc (count * size);
-
- if (endian == 0)
- endian = gmp_detect_endian ();
-
- p = (unsigned char *) r;
-
- word_step = (order != endian) ? 2 * size : 0;
-
- /* Process bytes from the least significant end, so point p at the
- least significant word. */
- if (order == 1)
- {
- p += size * (count - 1);
- word_step = - word_step;
- }
-
- /* And at least significant byte of that word. */
- if (endian == 1)
- p += (size - 1);
-
- for (bytes = 0, i = 0, k = 0; k < count; k++, p += word_step)
- {
- size_t j;
- for (j = 0; j < size; ++j, p -= (ptrdiff_t) endian)
- {
- if (sizeof (mp_limb_t) == 1)
- {
- if (i < un)
- *p = u->_mp_d[i++];
- else
- *p = 0;
- }
- else
- {
- int LOCAL_CHAR_BIT = CHAR_BIT;
- if (bytes == 0)
- {
- if (i < un)
- limb = u->_mp_d[i++];
- bytes = sizeof (mp_limb_t);
- }
- *p = limb;
- limb >>= LOCAL_CHAR_BIT;
- bytes--;
- }
- }
- }
- assert (i == un);
- assert (k == count);
- }
-
- if (countp)
- *countp = count;
-
- return r;
-}
diff --git a/source/gmp.h b/source/gmp.h
@@ -1,310 +0,0 @@
-/* mini-gmp, a minimalistic implementation of a GNU GMP subset.
-
-Copyright 2011-2015, 2017, 2019-2021 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-/* About mini-gmp: This is a minimal implementation of a subset of the
- GMP interface. It is intended for inclusion into applications which
- have modest bignums needs, as a fallback when the real GMP library
- is not installed.
-
- This file defines the public interface. */
-
-#ifndef __MINI_GMP_H__
-#define __MINI_GMP_H__
-
-/* For size_t */
-#include <stddef.h>
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-void mp_set_memory_functions (void *(*) (size_t),
- void *(*) (void *, size_t, size_t),
- void (*) (void *, size_t));
-
-void mp_get_memory_functions (void *(**) (size_t),
- void *(**) (void *, size_t, size_t),
- void (**) (void *, size_t));
-
-#ifndef MINI_GMP_LIMB_TYPE
-#define MINI_GMP_LIMB_TYPE long
-#endif
-
-typedef unsigned MINI_GMP_LIMB_TYPE mp_limb_t;
-typedef long mp_size_t;
-typedef unsigned long mp_bitcnt_t;
-
-typedef mp_limb_t *mp_ptr;
-typedef const mp_limb_t *mp_srcptr;
-
-typedef struct
-{
- int _mp_alloc; /* Number of *limbs* allocated and pointed
- to by the _mp_d field. */
- int _mp_size; /* abs(_mp_size) is the number of limbs the
- last field points to. If _mp_size is
- negative this is a negative number. */
- mp_limb_t *_mp_d; /* Pointer to the limbs. */
-} __mpz_struct;
-
-typedef __mpz_struct mpz_t[1];
-
-typedef __mpz_struct *mpz_ptr;
-typedef const __mpz_struct *mpz_srcptr;
-
-extern const int mp_bits_per_limb;
-
-void mpn_copyi (mp_ptr, mp_srcptr, mp_size_t);
-void mpn_copyd (mp_ptr, mp_srcptr, mp_size_t);
-void mpn_zero (mp_ptr, mp_size_t);
-
-int mpn_cmp (mp_srcptr, mp_srcptr, mp_size_t);
-int mpn_zero_p (mp_srcptr, mp_size_t);
-
-mp_limb_t mpn_add_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
-mp_limb_t mpn_add_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
-mp_limb_t mpn_add (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
-
-mp_limb_t mpn_sub_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
-mp_limb_t mpn_sub_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
-mp_limb_t mpn_sub (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
-
-mp_limb_t mpn_mul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
-mp_limb_t mpn_addmul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
-mp_limb_t mpn_submul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
-
-mp_limb_t mpn_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
-void mpn_mul_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
-void mpn_sqr (mp_ptr, mp_srcptr, mp_size_t);
-int mpn_perfect_square_p (mp_srcptr, mp_size_t);
-mp_size_t mpn_sqrtrem (mp_ptr, mp_ptr, mp_srcptr, mp_size_t);
-
-mp_limb_t mpn_lshift (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
-mp_limb_t mpn_rshift (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
-
-mp_bitcnt_t mpn_scan0 (mp_srcptr, mp_bitcnt_t);
-mp_bitcnt_t mpn_scan1 (mp_srcptr, mp_bitcnt_t);
-
-void mpn_com (mp_ptr, mp_srcptr, mp_size_t);
-mp_limb_t mpn_neg (mp_ptr, mp_srcptr, mp_size_t);
-
-mp_bitcnt_t mpn_popcount (mp_srcptr, mp_size_t);
-
-mp_limb_t mpn_invert_3by2 (mp_limb_t, mp_limb_t);
-#define mpn_invert_limb(x) mpn_invert_3by2 ((x), 0)
-
-size_t mpn_get_str (unsigned char *, int, mp_ptr, mp_size_t);
-mp_size_t mpn_set_str (mp_ptr, const unsigned char *, size_t, int);
-
-void mpz_init (mpz_t);
-void mpz_init2 (mpz_t, mp_bitcnt_t);
-void mpz_clear (mpz_t);
-
-#define mpz_odd_p(z) (((z)->_mp_size != 0) & (int) (z)->_mp_d[0])
-#define mpz_even_p(z) (! mpz_odd_p (z))
-
-int mpz_sgn (const mpz_t);
-int mpz_cmp_si (const mpz_t, long);
-int mpz_cmp_ui (const mpz_t, unsigned long);
-int mpz_cmp (const mpz_t, const mpz_t);
-int mpz_cmpabs_ui (const mpz_t, unsigned long);
-int mpz_cmpabs (const mpz_t, const mpz_t);
-int mpz_cmp_d (const mpz_t, double);
-int mpz_cmpabs_d (const mpz_t, double);
-
-void mpz_abs (mpz_t, const mpz_t);
-void mpz_neg (mpz_t, const mpz_t);
-void mpz_swap (mpz_t, mpz_t);
-
-void mpz_add_ui (mpz_t, const mpz_t, unsigned long);
-void mpz_add (mpz_t, const mpz_t, const mpz_t);
-void mpz_sub_ui (mpz_t, const mpz_t, unsigned long);
-void mpz_ui_sub (mpz_t, unsigned long, const mpz_t);
-void mpz_sub (mpz_t, const mpz_t, const mpz_t);
-
-void mpz_mul_si (mpz_t, const mpz_t, long int);
-void mpz_mul_ui (mpz_t, const mpz_t, unsigned long int);
-void mpz_mul (mpz_t, const mpz_t, const mpz_t);
-void mpz_mul_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
-void mpz_addmul_ui (mpz_t, const mpz_t, unsigned long int);
-void mpz_addmul (mpz_t, const mpz_t, const mpz_t);
-void mpz_submul_ui (mpz_t, const mpz_t, unsigned long int);
-void mpz_submul (mpz_t, const mpz_t, const mpz_t);
-
-void mpz_cdiv_qr (mpz_t, mpz_t, const mpz_t, const mpz_t);
-void mpz_fdiv_qr (mpz_t, mpz_t, const mpz_t, const mpz_t);
-void mpz_tdiv_qr (mpz_t, mpz_t, const mpz_t, const mpz_t);
-void mpz_cdiv_q (mpz_t, const mpz_t, const mpz_t);
-void mpz_fdiv_q (mpz_t, const mpz_t, const mpz_t);
-void mpz_tdiv_q (mpz_t, const mpz_t, const mpz_t);
-void mpz_cdiv_r (mpz_t, const mpz_t, const mpz_t);
-void mpz_fdiv_r (mpz_t, const mpz_t, const mpz_t);
-void mpz_tdiv_r (mpz_t, const mpz_t, const mpz_t);
-
-void mpz_cdiv_q_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
-void mpz_fdiv_q_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
-void mpz_tdiv_q_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
-void mpz_cdiv_r_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
-void mpz_fdiv_r_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
-void mpz_tdiv_r_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
-
-void mpz_mod (mpz_t, const mpz_t, const mpz_t);
-
-void mpz_divexact (mpz_t, const mpz_t, const mpz_t);
-
-int mpz_divisible_p (const mpz_t, const mpz_t);
-int mpz_congruent_p (const mpz_t, const mpz_t, const mpz_t);
-
-unsigned long mpz_cdiv_qr_ui (mpz_t, mpz_t, const mpz_t, unsigned long);
-unsigned long mpz_fdiv_qr_ui (mpz_t, mpz_t, const mpz_t, unsigned long);
-unsigned long mpz_tdiv_qr_ui (mpz_t, mpz_t, const mpz_t, unsigned long);
-unsigned long mpz_cdiv_q_ui (mpz_t, const mpz_t, unsigned long);
-unsigned long mpz_fdiv_q_ui (mpz_t, const mpz_t, unsigned long);
-unsigned long mpz_tdiv_q_ui (mpz_t, const mpz_t, unsigned long);
-unsigned long mpz_cdiv_r_ui (mpz_t, const mpz_t, unsigned long);
-unsigned long mpz_fdiv_r_ui (mpz_t, const mpz_t, unsigned long);
-unsigned long mpz_tdiv_r_ui (mpz_t, const mpz_t, unsigned long);
-unsigned long mpz_cdiv_ui (const mpz_t, unsigned long);
-unsigned long mpz_fdiv_ui (const mpz_t, unsigned long);
-unsigned long mpz_tdiv_ui (const mpz_t, unsigned long);
-
-unsigned long mpz_mod_ui (mpz_t, const mpz_t, unsigned long);
-
-void mpz_divexact_ui (mpz_t, const mpz_t, unsigned long);
-
-int mpz_divisible_ui_p (const mpz_t, unsigned long);
-
-unsigned long mpz_gcd_ui (mpz_t, const mpz_t, unsigned long);
-void mpz_gcd (mpz_t, const mpz_t, const mpz_t);
-void mpz_gcdext (mpz_t, mpz_t, mpz_t, const mpz_t, const mpz_t);
-void mpz_lcm_ui (mpz_t, const mpz_t, unsigned long);
-void mpz_lcm (mpz_t, const mpz_t, const mpz_t);
-int mpz_invert (mpz_t, const mpz_t, const mpz_t);
-
-void mpz_sqrtrem (mpz_t, mpz_t, const mpz_t);
-void mpz_sqrt (mpz_t, const mpz_t);
-int mpz_perfect_square_p (const mpz_t);
-
-void mpz_pow_ui (mpz_t, const mpz_t, unsigned long);
-void mpz_ui_pow_ui (mpz_t, unsigned long, unsigned long);
-void mpz_powm (mpz_t, const mpz_t, const mpz_t, const mpz_t);
-void mpz_powm_ui (mpz_t, const mpz_t, unsigned long, const mpz_t);
-
-void mpz_rootrem (mpz_t, mpz_t, const mpz_t, unsigned long);
-int mpz_root (mpz_t, const mpz_t, unsigned long);
-
-void mpz_fac_ui (mpz_t, unsigned long);
-void mpz_2fac_ui (mpz_t, unsigned long);
-void mpz_mfac_uiui (mpz_t, unsigned long, unsigned long);
-void mpz_bin_uiui (mpz_t, unsigned long, unsigned long);
-
-int mpz_probab_prime_p (const mpz_t, int);
-
-int mpz_tstbit (const mpz_t, mp_bitcnt_t);
-void mpz_setbit (mpz_t, mp_bitcnt_t);
-void mpz_clrbit (mpz_t, mp_bitcnt_t);
-void mpz_combit (mpz_t, mp_bitcnt_t);
-
-void mpz_com (mpz_t, const mpz_t);
-void mpz_and (mpz_t, const mpz_t, const mpz_t);
-void mpz_ior (mpz_t, const mpz_t, const mpz_t);
-void mpz_xor (mpz_t, const mpz_t, const mpz_t);
-
-mp_bitcnt_t mpz_popcount (const mpz_t);
-mp_bitcnt_t mpz_hamdist (const mpz_t, const mpz_t);
-mp_bitcnt_t mpz_scan0 (const mpz_t, mp_bitcnt_t);
-mp_bitcnt_t mpz_scan1 (const mpz_t, mp_bitcnt_t);
-
-int mpz_fits_slong_p (const mpz_t);
-int mpz_fits_ulong_p (const mpz_t);
-int mpz_fits_sint_p (const mpz_t);
-int mpz_fits_uint_p (const mpz_t);
-int mpz_fits_sshort_p (const mpz_t);
-int mpz_fits_ushort_p (const mpz_t);
-long int mpz_get_si (const mpz_t);
-unsigned long int mpz_get_ui (const mpz_t);
-double mpz_get_d (const mpz_t);
-size_t mpz_size (const mpz_t);
-mp_limb_t mpz_getlimbn (const mpz_t, mp_size_t);
-
-void mpz_realloc2 (mpz_t, mp_bitcnt_t);
-mp_srcptr mpz_limbs_read (mpz_srcptr);
-mp_ptr mpz_limbs_modify (mpz_t, mp_size_t);
-mp_ptr mpz_limbs_write (mpz_t, mp_size_t);
-void mpz_limbs_finish (mpz_t, mp_size_t);
-mpz_srcptr mpz_roinit_n (mpz_t, mp_srcptr, mp_size_t);
-
-#define MPZ_ROINIT_N(xp, xs) {{0, (xs),(xp) }}
-
-void mpz_set_si (mpz_t, signed long int);
-void mpz_set_ui (mpz_t, unsigned long int);
-void mpz_set (mpz_t, const mpz_t);
-void mpz_set_d (mpz_t, double);
-
-void mpz_init_set_si (mpz_t, signed long int);
-void mpz_init_set_ui (mpz_t, unsigned long int);
-void mpz_init_set (mpz_t, const mpz_t);
-void mpz_init_set_d (mpz_t, double);
-
-size_t mpz_sizeinbase (const mpz_t, int);
-char *mpz_get_str (char *, int, const mpz_t);
-int mpz_set_str (mpz_t, const char *, int);
-int mpz_init_set_str (mpz_t, const char *, int);
-
-/* This long list taken from gmp.h. */
-/* For reference, "defined(EOF)" cannot be used here. In g++ 2.95.4,
- <iostream> defines EOF but not FILE. */
-#if defined (FILE) \
- || defined (H_STDIO) \
- || defined (_H_STDIO) /* AIX */ \
- || defined (_STDIO_H) /* glibc, Sun, SCO */ \
- || defined (_STDIO_H_) /* BSD, OSF */ \
- || defined (__STDIO_H) /* Borland */ \
- || defined (__STDIO_H__) /* IRIX */ \
- || defined (_STDIO_INCLUDED) /* HPUX */ \
- || defined (__dj_include_stdio_h_) /* DJGPP */ \
- || defined (_FILE_DEFINED) /* Microsoft */ \
- || defined (__STDIO__) /* Apple MPW MrC */ \
- || defined (_MSL_STDIO_H) /* Metrowerks */ \
- || defined (_STDIO_H_INCLUDED) /* QNX4 */ \
- || defined (_ISO_STDIO_ISO_H) /* Sun C++ */ \
- || defined (__STDIO_LOADED) /* VMS */ \
- || defined (_STDIO) /* HPE NonStop */ \
- || defined (__DEFINED_FILE) /* musl */
-size_t mpz_out_str (FILE *, int, const mpz_t);
-#endif
-
-void mpz_import (mpz_t, size_t, int, size_t, int, size_t, const void *);
-void *mpz_export (void *, size_t *, int, size_t, int, size_t, const mpz_t);
-
-#if defined (__cplusplus)
-}
-#endif
-#endif /* __MINI_GMP_H__ */
diff --git a/source/gpu-verify b/source/gpu-verify
Binary files differ.
diff --git a/source/gpu-verify.dSYM/Contents/Info.plist b/source/gpu-verify.dSYM/Contents/Info.plist
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+ <dict>
+ <key>CFBundleDevelopmentRegion</key>
+ <string>English</string>
+ <key>CFBundleIdentifier</key>
+ <string>com.apple.xcode.dsym.gpu-verify</string>
+ <key>CFBundleInfoDictionaryVersion</key>
+ <string>6.0</string>
+ <key>CFBundlePackageType</key>
+ <string>dSYM</string>
+ <key>CFBundleSignature</key>
+ <string>????</string>
+ <key>CFBundleShortVersionString</key>
+ <string>1.0</string>
+ <key>CFBundleVersion</key>
+ <string>1</string>
+ </dict>
+</plist>
diff --git a/source/gpu-verify.dSYM/Contents/Resources/DWARF/gpu-verify b/source/gpu-verify.dSYM/Contents/Resources/DWARF/gpu-verify
Binary files differ.
diff --git a/source/gpu-verify.dSYM/Contents/Resources/Relocations/x86_64/gpu-verify.yml b/source/gpu-verify.dSYM/Contents/Resources/Relocations/x86_64/gpu-verify.yml
@@ -0,0 +1,54 @@
+---
+triple: 'x86_64-apple-darwin'
+binary-path: gpu-verify
+relocations:
+ - { offsetInCU: 0x26, offset: 0x26, size: 0x8, addend: 0x0, symName: _main, symObjAddr: 0x0, symBinAddr: 0x100001EA0, symSize: 0x32 }
+ - { offsetInCU: 0x33, offset: 0x33, size: 0x8, addend: 0x0, symName: _main, symObjAddr: 0x0, symBinAddr: 0x100001EA0, symSize: 0x32 }
+ - { offsetInCU: 0x26, offset: 0xA7, size: 0x8, addend: 0x0, symName: _mpSizeof, symObjAddr: 0x0, symBinAddr: 0x100001EE0, symSize: 0x60 }
+ - { offsetInCU: 0x95, offset: 0x116, size: 0x8, addend: 0x0, symName: _mpSizeof, symObjAddr: 0x0, symBinAddr: 0x100001EE0, symSize: 0x60 }
+ - { offsetInCU: 0xCB, offset: 0x14C, size: 0x8, addend: 0x0, symName: _mpSetZero, symObjAddr: 0x60, symBinAddr: 0x100001F40, symSize: 0x50 }
+ - { offsetInCU: 0x11D, offset: 0x19E, size: 0x8, addend: 0x0, symName: _uiceil, symObjAddr: 0xB0, symBinAddr: 0x100001F90, symSize: 0xB0 }
+ - { offsetInCU: 0x153, offset: 0x1D4, size: 0x8, addend: 0x0, symName: _zeroise_bytes, symObjAddr: 0x160, symBinAddr: 0x100002040, symSize: 0x50 }
+ - { offsetInCU: 0x1A5, offset: 0x226, size: 0x8, addend: 0x0, symName: _mpConvFromOctets, symObjAddr: 0x1B0, symBinAddr: 0x100002090, symSize: 0x100 }
+ - { offsetInCU: 0x22F, offset: 0x2B0, size: 0x8, addend: 0x0, symName: _mpConvFromHex, symObjAddr: 0x2B0, symBinAddr: 0x100002190, symSize: 0x2F0 }
+ - { offsetInCU: 0x2D0, offset: 0x351, size: 0x8, addend: 0x0, symName: _opencl_pairs_from_files, symObjAddr: 0x5A0, symBinAddr: 0x100002480, symSize: 0xBF0 }
+ - { offsetInCU: 0x586, offset: 0x607, size: 0x8, addend: 0x0, symName: _opencl_prepare, symObjAddr: 0x1190, symBinAddr: 0x100003070, symSize: 0x740 }
+ - { offsetInCU: 0x673, offset: 0x6F4, size: 0x8, addend: 0x0, symName: _opencl_exec_kernel, symObjAddr: 0x18D0, symBinAddr: 0x1000037B0, symSize: 0xC0 }
+ - { offsetInCU: 0x6CA, offset: 0x74B, size: 0x8, addend: 0x0, symName: _opencl_results, symObjAddr: 0x1990, symBinAddr: 0x100003870, symSize: 0x1D0 }
+ - { offsetInCU: 0x730, offset: 0x7B1, size: 0x8, addend: 0x0, symName: _opencl_cleanup, symObjAddr: 0x1B60, symBinAddr: 0x100003A40, symSize: 0x50 }
+ - { offsetInCU: 0x756, offset: 0x7D7, size: 0x8, addend: 0x0, symName: _opencl_release, symObjAddr: 0x1BB0, symBinAddr: 0x100003A90, symSize: 0x90 }
+ - { offsetInCU: 0x77C, offset: 0x7FD, size: 0x8, addend: 0x0, symName: _gpuv_init, symObjAddr: 0x1C40, symBinAddr: 0x100003B20, symSize: 0x1D0 }
+ - { offsetInCU: 0x7CF, offset: 0x850, size: 0x8, addend: 0x0, symName: _gpuv_execute, symObjAddr: 0x1E10, symBinAddr: 0x100003CF0, symSize: 0xD0 }
+ - { offsetInCU: 0x89B, offset: 0x91C, size: 0x8, addend: 0x0, symName: _gpuv_finish, symObjAddr: 0x1EE0, symBinAddr: 0x100003DC0, symSize: 0x70 }
+ - { offsetInCU: 0x8E3, offset: 0x964, size: 0x8, addend: 0x0, symName: _gpuv_test, symObjAddr: 0x1F50, symBinAddr: 0x100003E30, symSize: 0x2E8 }
+ - { offsetInCU: 0x26, offset: 0xEF4, size: 0x8, addend: 0x0, symName: _ref_pairs_from_files, symObjAddr: 0x0, symBinAddr: 0x100004120, symSize: 0x570 }
+ - { offsetInCU: 0x50, offset: 0xF1E, size: 0x8, addend: 0x0, symName: _ref_pairs_from_files, symObjAddr: 0x0, symBinAddr: 0x100004120, symSize: 0x570 }
+ - { offsetInCU: 0x265, offset: 0x1133, size: 0x8, addend: 0x0, symName: _sexp_from_string, symObjAddr: 0x570, symBinAddr: 0x100004690, symSize: 0x70 }
+ - { offsetInCU: 0x2C5, offset: 0x1193, size: 0x8, addend: 0x0, symName: _sexp_from_string_key, symObjAddr: 0x5E0, symBinAddr: 0x100004700, symSize: 0xB0 }
+ - { offsetInCU: 0x341, offset: 0x120F, size: 0x8, addend: 0x0, symName: _gpuv_test_ref, symObjAddr: 0x690, symBinAddr: 0x1000047B0, symSize: 0x59C }
+ - { offsetInCU: 0x26, offset: 0x1626, size: 0x8, addend: 0x0, symName: _montmodmult_pairs_from_files, symObjAddr: 0x0, symBinAddr: 0x100004D50, symSize: 0x630 }
+ - { offsetInCU: 0x4D, offset: 0x164D, size: 0x8, addend: 0x0, symName: _len_in_bytes, symObjAddr: 0x5DB8, symBinAddr: 0x10000C050, symSize: 0x0 }
+ - { offsetInCU: 0x6C, offset: 0x166C, size: 0x8, addend: 0x0, symName: _montmodmult_pairs_from_files, symObjAddr: 0x0, symBinAddr: 0x100004D50, symSize: 0x630 }
+ - { offsetInCU: 0x279, offset: 0x1879, size: 0x8, addend: 0x0, symName: _convert_thread, symObjAddr: 0x630, symBinAddr: 0x100005380, symSize: 0x4B0 }
+ - { offsetInCU: 0x3E7, offset: 0x19E7, size: 0x8, addend: 0x0, symName: _modmult_opencl_convert, symObjAddr: 0xAE0, symBinAddr: 0x100005830, symSize: 0x490 }
+ - { offsetInCU: 0x5C2, offset: 0x1BC2, size: 0x8, addend: 0x0, symName: _modmult_opencl_prepare, symObjAddr: 0xF70, symBinAddr: 0x100005CC0, symSize: 0x600 }
+ - { offsetInCU: 0x6C1, offset: 0x1CC1, size: 0x8, addend: 0x0, symName: _modmult_opencl_exec_kernel, symObjAddr: 0x1570, symBinAddr: 0x1000062C0, symSize: 0xC0 }
+ - { offsetInCU: 0x718, offset: 0x1D18, size: 0x8, addend: 0x0, symName: _modmult_opencl_results, symObjAddr: 0x1630, symBinAddr: 0x100006380, symSize: 0x230 }
+ - { offsetInCU: 0x7F8, offset: 0x1DF8, size: 0x8, addend: 0x0, symName: _modmult_opencl_cleanup, symObjAddr: 0x1860, symBinAddr: 0x1000065B0, symSize: 0x50 }
+ - { offsetInCU: 0x81E, offset: 0x1E1E, size: 0x8, addend: 0x0, symName: _modmult_opencl_release, symObjAddr: 0x18B0, symBinAddr: 0x100006600, symSize: 0x70 }
+ - { offsetInCU: 0x844, offset: 0x1E44, size: 0x8, addend: 0x0, symName: _gpuv_init_montg, symObjAddr: 0x1920, symBinAddr: 0x100006670, symSize: 0x1F0 }
+ - { offsetInCU: 0x897, offset: 0x1E97, size: 0x8, addend: 0x0, symName: _gpuv_execute_montg, symObjAddr: 0x1B10, symBinAddr: 0x100006860, symSize: 0xF0 }
+ - { offsetInCU: 0x963, offset: 0x1F63, size: 0x8, addend: 0x0, symName: _gpuv_finish_montg, symObjAddr: 0x1C00, symBinAddr: 0x100006950, symSize: 0x90 }
+ - { offsetInCU: 0x9F6, offset: 0x1FF6, size: 0x8, addend: 0x0, symName: _gpuv_test_montg, symObjAddr: 0x1C90, symBinAddr: 0x1000069E0, symSize: 0x2DE }
+ - { offsetInCU: 0x26, offset: 0x26C2, size: 0x8, addend: 0x0, symName: _gpuv_estimate_pairs, symObjAddr: 0x0, symBinAddr: 0x100006CC0, symSize: 0x100 }
+ - { offsetInCU: 0x41, offset: 0x26DD, size: 0x8, addend: 0x0, symName: _select_platform, symObjAddr: 0x190, symBinAddr: 0x100006E50, symSize: 0x2B0 }
+ - { offsetInCU: 0x66, offset: 0x2702, size: 0x8, addend: 0x0, symName: _select_platform.param, symObjAddr: 0xD50, symBinAddr: 0x10000C000, symSize: 0x0 }
+ - { offsetInCU: 0x1BE, offset: 0x285A, size: 0x8, addend: 0x0, symName: _gpuv_estimate_pairs, symObjAddr: 0x0, symBinAddr: 0x100006CC0, symSize: 0x100 }
+ - { offsetInCU: 0x223, offset: 0x28BF, size: 0x8, addend: 0x0, symName: _gpuv_prepare_gcry, symObjAddr: 0x100, symBinAddr: 0x100006DC0, symSize: 0x90 }
+ - { offsetInCU: 0x238, offset: 0x28D4, size: 0x8, addend: 0x0, symName: _select_device, symObjAddr: 0x440, symBinAddr: 0x100007100, symSize: 0x180 }
+ - { offsetInCU: 0x2AC, offset: 0x2948, size: 0x8, addend: 0x0, symName: _logger, symObjAddr: 0x5C0, symBinAddr: 0x100007280, symSize: 0x40 }
+ - { offsetInCU: 0x2FA, offset: 0x2996, size: 0x8, addend: 0x0, symName: _create_compute_context, symObjAddr: 0x600, symBinAddr: 0x1000072C0, symSize: 0x70 }
+ - { offsetInCU: 0x33E, offset: 0x29DA, size: 0x8, addend: 0x0, symName: _create_command_queue, symObjAddr: 0x670, symBinAddr: 0x100007330, symSize: 0x70 }
+ - { offsetInCU: 0x390, offset: 0x2A2C, size: 0x8, addend: 0x0, symName: _compile_program, symObjAddr: 0x6E0, symBinAddr: 0x1000073A0, symSize: 0x320 }
+ - { offsetInCU: 0x46E, offset: 0x2B0A, size: 0x8, addend: 0x0, symName: _create_kernel, symObjAddr: 0xA00, symBinAddr: 0x1000076C0, symSize: 0x63 }
+...
diff --git a/source/gpuv-montg.c b/source/gpuv-montg.c
@@ -0,0 +1,657 @@
+//
+// gpuv-montg.c
+// lib-gpu-verify
+//
+// Created by Cedric Zwahlen on 16.12.2023.
+//
+
+#include "gpuv-montg.h"
+#include <pthread.h>
+
+static unsigned long len_in_bytes = 0;
+
+#define ORDER -1 // I think we need to do this, because we want to write it in the 'wrong' way
+#define END 0
+
+#define BIT_LENGTH (2048)
+
+#define BITS 64
+
+// sizes are always the same 32 units for all, except exp_buf
+void montmodmult_pairs_from_files(void *x_buf, void *m_buf,
+ void *r_1_buf,
+ void *n_buf, void *ni_buf,
+ void *msg_buf,
+ void *exp_buf,
+ void *s_buf,
+ unsigned long *pks, unsigned long *n) {
+
+ FILE * pk;
+ FILE * ms;
+
+ pk = fopen("lib-gpu-generate/publickey.txt", "r");
+ ms = fopen("lib-gpu-generate/msgsig.txt", "r");
+
+ if (pk == NULL || ms == NULL) {
+ printf("Auxiliary files not found.");
+ abort();
+ }
+
+ fseek (ms, 0, SEEK_END);
+ long ms_l = ftell(ms);
+ fseek (ms, 0, SEEK_SET);
+ char *ms_ptr = malloc(ms_l);
+ char *ms_ptr_rest = malloc(ms_l);
+ if (ms_ptr || ms_ptr_rest)
+ {
+ fread (ms_ptr, 1, ms_l, ms);
+ memcpy(ms_ptr_rest, ms_ptr, ms_l);
+ }
+ fclose (ms);
+
+ fseek (pk, 0, SEEK_END);
+ long pk_l = ftell(pk);
+ fseek (pk, 0, SEEK_SET);
+ char *pk_ptr = malloc(pk_l);
+ char *pk_ptr_rest = malloc(pk_l);
+ if (pk_ptr && pk_ptr_rest)
+ {
+ fread (pk_ptr, 1, pk_l, pk);
+ memcpy(pk_ptr_rest, pk_ptr, pk_l);
+ }
+ fclose (pk);
+
+ gpu_register *n_buf_t = n_buf;
+ gpu_register *msg_buf_t = msg_buf;
+ gpu_register *s_buf_t = s_buf;
+ gpu_register *exp_buf_t = exp_buf;
+
+ int len = (BIT_LENGTH / 8) / sizeof(gpu_register);
+
+ char* message = strtok_r(ms_ptr, "\n", &ms_ptr_rest);
+ char* signature = strtok_r(0, "\n", &ms_ptr_rest);
+ char* modulus = strtok_r(pk_ptr, "\n", &pk_ptr_rest);
+ char* exponent = strtok_r(0, "\n", &pk_ptr_rest);
+ char* offs = strtok_r(0, "\n", &pk_ptr_rest);
+
+ int i = 0;
+ int j = 0;
+
+ mpz_t e,mod,msg,s;
+
+ mpz_init(e);
+ mpz_init(mod);
+ mpz_init(msg);
+ mpz_init(s);
+
+ while (message != NULL && signature != NULL) {
+
+ if (i == 0 || pks[j - 1] < i) {
+
+ mpz_set_str(mod,modulus,16);
+ mpz_set_str(e,exponent,16);
+
+ pks[j] = atoi(offs);
+
+ modulus = strtok_r(0, "\n", &pk_ptr_rest);
+ exponent = strtok_r(0, "\n", &pk_ptr_rest);
+ offs = strtok_r(0, "\n", &pk_ptr_rest);
+
+ mpz_export(&n_buf_t[len * j], NULL, ORDER, sizeof(gpu_register), END, 0, mod);
+ mpz_export(&exp_buf_t[j], NULL, ORDER, sizeof(gpu_register), END, 0, e);
+
+
+ j++;
+
+ }
+
+ mpz_set_str(msg,message,16);
+ mpz_set_str(s,signature,16);
+
+ message = strtok_r(0, "\n",&ms_ptr_rest);
+ signature = strtok_r(0, "\n",&ms_ptr_rest);
+
+ mpz_export(&msg_buf_t[len * i], NULL, ORDER, sizeof(gpu_register), END, 0, msg);
+ mpz_export(&s_buf_t[len * i], NULL, ORDER, sizeof(gpu_register), END, 0, s);
+
+ i++;
+ }
+
+ mpz_clear(e);
+ mpz_clear(mod);
+ mpz_clear(msg);
+ mpz_clear(s);
+
+
+
+ *n = i;
+
+}
+
+struct thread_args {
+
+ void *x_buf;
+ void *m_buf;
+ void *r_1_buf;
+ void *n_buf;
+ void *ni_buf;
+ void *s_buf;
+ unsigned long *pks;
+ unsigned long n_start;
+ unsigned long n_end;
+};
+
+void *convert_thread(void * vargp) {
+
+ struct thread_args *args = vargp;
+
+ int len = (BIT_LENGTH / 8) / sizeof(gpu_register);
+
+
+ int j = 0;
+
+ while(1) {
+ if (args->pks[j] > args->n_start)
+ break;
+ j++;
+ }
+
+
+ //printf("pks[%i] == %i, start at: %i, end at: %i\n",j,args->pks[j], args->n_start, args->n_end);
+
+ gpu_register *s_buf_t = args->s_buf;
+ gpu_register *n_buf_t = args->n_buf;
+
+ gpu_register *x_buf_t = args->x_buf;
+ gpu_register *m_buf_t = args->m_buf;
+ gpu_register *r_1_buf_t = args->r_1_buf;
+ gpu_register *ni_buf_t = args->ni_buf;
+
+ mpz_t mod, s;
+ mpz_init(s);
+ mpz_init(mod);
+
+ mpz_t r, r_1, ni, M, x;
+
+ mpz_init(r);
+ mpz_init(r_1);
+ mpz_init(ni);
+ mpz_init(M);
+ mpz_init(x);
+
+ mpz_t one; // helper variable
+ mpz_init_set_si(one,1);
+
+ mpz_set_si(one, 1);
+ mpz_mul_2exp(r,one,BIT_LENGTH); // r
+
+ int start = (int)args->n_start;
+
+
+
+ for(int i = start; i < args->n_end; i++) {
+
+ if (i == start || args->pks[j - 1] < i) {
+
+ mpz_import(mod, len, ORDER, sizeof(gpu_register), END, 0, &n_buf_t[len * j]);
+
+ mpz_gcdext(one, r_1, ni, r, mod); // set r_1 and ni
+
+ int sgn = mpz_sgn(r_1);
+
+ mpz_abs(r_1, r_1);
+ mpz_abs(ni, ni);
+
+ if (sgn == -1) {
+ mpz_sub(ni, r, ni);
+ mpz_sub(r_1, mod, r_1);
+ }
+
+
+ mpz_export(&ni_buf_t[len * j], NULL, ORDER, sizeof(gpu_register), END, 0, ni);
+ mpz_export(&r_1_buf_t[len * j], NULL, ORDER, sizeof(gpu_register), END, 0, r_1);
+
+
+
+ j++;
+ }
+
+ mpz_import(s, len, ORDER, sizeof(gpu_register), END, 0, &s_buf_t[len * i]);
+
+ // set x (the number to 'square' (multiply by itself))
+ mpz_mul(M, s, r);
+ mpz_mod(M, M, mod);
+
+ mpz_mod(x, r, mod);
+
+
+ mpz_export(&x_buf_t[len * i], NULL, ORDER, sizeof(gpu_register), END, 0, x);
+ mpz_export(&m_buf_t[len * i], NULL, ORDER, sizeof(gpu_register), END, 0, M);
+
+ }
+
+ mpz_clear(r);
+ mpz_clear(r_1);
+ mpz_clear(ni);
+ mpz_clear(M);
+ mpz_clear(x);
+
+ mpz_clear(one);
+
+ return NULL;
+
+}
+
+void modmult_opencl_convert(struct gpu_state_alt *state, void *x_buf, void *m_buf,
+ void *r_1_buf,
+ void *n_buf, void *ni_buf,
+ void *msg_buf,
+ void *exp_buf,
+ void *s_buf,
+ unsigned long *pks, unsigned long n) {
+
+ long nr_of_threads = 4;
+
+ #if __APPLE__ || unix
+
+ nr_of_threads = sysconf(_SC_NPROCESSORS_ONLN);
+
+ #elif _WIN32
+
+ SYSTEM_INFO sysinfo;
+ GetSystemInfo(&sysinfo);
+ nr_of_threads = sysinfo.dwNumberOfProcessors;
+
+ #endif
+
+
+ struct timespec p1, p2;
+
+ clock_gettime(CLOCK_REALTIME, &p1);
+
+ pthread_t tid[nr_of_threads];
+ // not the best, but it is safe
+ int ids[nr_of_threads];
+
+ struct thread_args args[nr_of_threads];
+
+ unsigned long range = n / nr_of_threads;
+
+ for (int i = 0; i < nr_of_threads - 1; i++)
+ {
+ args[i].n_start = i * range;
+ args[i].n_end = (i + 1) * range;
+ }
+
+ // last one takes the 'rest'
+ args[nr_of_threads - 1].n_start = (nr_of_threads - 1) * range;
+ args[nr_of_threads - 1].n_end = n;
+
+
+ for (int i = 0; i < nr_of_threads; i++) {
+
+ args[i].m_buf = m_buf;
+ args[i].n_buf = n_buf;
+ args[i].ni_buf = ni_buf;
+ args[i].pks = pks;
+ args[i].r_1_buf = r_1_buf;
+ args[i].s_buf = s_buf;
+ args[i].x_buf = x_buf;
+
+ ids[i] = i;
+ int err = pthread_create(&tid[i], NULL, convert_thread, (void *)&args[i]);
+ if ( err != 0 )
+ printf("Error creating threads");
+ }
+
+ for (int j = 0; j < nr_of_threads; j++) {
+ int err = pthread_join(tid[j], NULL);
+ if ( err != 0 )
+ printf("Error joining threads");
+ }
+
+ clock_gettime(CLOCK_REALTIME, &p2);
+
+ state->p.tv_sec += ( p2.tv_nsec < p1.tv_nsec ? p2.tv_sec - (p1.tv_sec + 1) : p2.tv_sec - p1.tv_sec );
+ state->p.tv_nsec += ( p2.tv_nsec < p1.tv_nsec ? ((999999999 - p1.tv_nsec) + p2.tv_nsec) : (p2.tv_nsec - p1.tv_nsec) ) / 1000;
+
+
+}
+
+int modmult_opencl_prepare(struct gpu_info *info, struct gpu_state_alt *state,
+ void *x_buf, void *m_buf,
+ void *r_1_buf,
+ void *n_buf, void *ni_buf,
+ void *msg_buf,
+ void *exp_buf,
+ void *s_buf,
+ unsigned long *pks, unsigned long n
+ ) {
+
+
+
+ modmult_opencl_convert(state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n);
+
+
+ int err; // error code returned from api calls
+
+ unsigned long pk = 0;
+
+ while (1) {
+ if (pks[pk] + 1 >= n)
+ break;
+ pk++;
+ }
+
+ unsigned long len = len_in_bytes;
+
+ state->x_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, len, NULL, NULL);
+ state->m_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, len, NULL, NULL);
+ state->n_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, len, NULL, NULL);
+ state->ni_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, len, NULL, NULL);
+
+ state->exp_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, n * sizeof(gpu_register), NULL, NULL);
+
+ state->msg_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, len, NULL, NULL);
+
+ state->pks_indices = clCreateBuffer(info->context, CL_MEM_READ_ONLY, sizeof(unsigned long) * (pk + 2), NULL, NULL); // plus 2 because the first index will contain how many elements are in the array
+
+ if (!state->x_mem || !state->m_mem || !state->n_mem || !state->ni_mem || !state->exp_mem)
+ {
+ printf("Error: Failed to allocate device memory!\n");
+ exit(1);
+ }
+
+ // Write our data set into the input array in device memory
+ //
+ err = clEnqueueWriteBuffer(info->commands, state->x_mem, CL_TRUE, 0, len, x_buf, 0, NULL, NULL);
+ err |= clEnqueueWriteBuffer(info->commands, state->m_mem, CL_TRUE, 0, len, m_buf, 0, NULL, NULL);
+
+ //err |= clEnqueueWriteBuffer(info->commands, state->res_mem, CL_TRUE, 0, len, res, 0, NULL, NULL);
+ err |= clEnqueueWriteBuffer(info->commands, state->n_mem, CL_TRUE, 0, len, n_buf, 0, NULL, NULL);
+ err |= clEnqueueWriteBuffer(info->commands, state->ni_mem, CL_TRUE, 0, len, ni_buf, 0, NULL, NULL);
+
+ err |= clEnqueueWriteBuffer(info->commands, state->exp_mem, CL_TRUE, 0, n * sizeof(gpu_register), exp_buf, 0, NULL, NULL);
+
+ err |= clEnqueueWriteBuffer(info->commands, state->msg_mem, CL_TRUE, 0, len, msg_buf, 0, NULL, NULL);
+
+ err |= clEnqueueWriteBuffer(info->commands, state->pks_indices, CL_TRUE, sizeof(unsigned long), sizeof(unsigned long) * (pk + 1), pks, 0, NULL, NULL);
+ err |= clEnqueueWriteBuffer(info->commands, state->pks_indices, CL_TRUE, 0, sizeof(unsigned long), &pk, 0, NULL, NULL);
+
+ if (err != CL_SUCCESS)
+ {
+ printf("Error: Failed to write to source array!\n");
+ exit(1);
+ }
+
+ // Set the arguments to our compute kernel
+ //
+ err = 0;
+ err = clSetKernelArg(info->kernel, 0, sizeof(cl_mem), &state->x_mem);
+ err |= clSetKernelArg(info->kernel, 1, sizeof(cl_mem), &state->m_mem);
+ err |= clSetKernelArg(info->kernel, 2, sizeof(cl_mem), &state->n_mem);
+ err |= clSetKernelArg(info->kernel, 3, sizeof(cl_mem), &state->ni_mem);
+ err |= clSetKernelArg(info->kernel, 4, sizeof(cl_mem), &state->exp_mem);
+ err |= clSetKernelArg(info->kernel, 5, sizeof(cl_mem), &state->msg_mem);
+ err |= clSetKernelArg(info->kernel, 6, sizeof(cl_mem), &state->pks_indices);
+
+ if (err != CL_SUCCESS)
+ {
+ printf("RSA-Error: Failed to set kernel arguments! %d\n", err);
+ exit(1);
+ }
+
+ state->total = n;
+
+
+ return 0;
+
+}
+
+int modmult_opencl_exec_kernel(struct gpu_info *info, struct gpu_state_alt *state) {
+
+ size_t global;
+ // size_t local = 1;
+ int err;
+
+ global = state->total; // has to be exactly the amount of signatures we want to verify
+
+ // measure from the first call to the kernel...
+ if (state->skip) {
+ state->skip = false;
+ clock_gettime(CLOCK_REALTIME, &state->t1);
+ }
+
+ err = clEnqueueNDRangeKernel(info->commands, info->kernel, 1, NULL, &global, NULL, 0, NULL, NULL);
+ if (err)
+ {
+ printf("Error: Failed to execute kernel!\n");
+ return EXIT_FAILURE;
+ }
+
+ return 0;
+
+}
+
+unsigned long modmult_opencl_results(struct gpu_info *info, struct gpu_state_alt *state, bool timed, void * msg_buf, void * r_1_buf, void * n_buf, void* s_buf, unsigned long n) {
+
+ if (state->skip) {
+ // reset skip in the kernel execution
+ return 0;
+ }
+
+ int err;
+
+ // Wait for the command commands to get serviced before reading back results
+ //
+ err = clFinish(info->commands);
+ if (err != CL_SUCCESS)
+ {
+ printf("Error: Kernel failure! %d\n", err);
+ exit(1);
+ }
+
+ unsigned long results = 0;
+
+ // Read back the results from the device to verify the output
+ err = clEnqueueReadBuffer(info->commands, state->res_mem, CL_TRUE, 0, sizeof(unsigned long), &results, 0, NULL, NULL );
+ if (err != CL_SUCCESS)
+ {
+ printf("Error: Failed to read output array! %d\n", err);
+ exit(1);
+ }
+
+ if (timed) {
+ // stop measuring after the last command has been read
+ clock_gettime(CLOCK_REALTIME, &state->t2);
+
+ printf("Preparation (on CPU) took \t%ld.%06ld s\n", state->p.tv_sec, state->p.tv_nsec);
+
+ long sec = ( state->t2.tv_nsec < state->t1.tv_nsec ? state->t2.tv_sec - (state->t1.tv_sec + 1) : state->t2.tv_sec - state->t1.tv_sec );
+ long nanosec = ( state->t2.tv_nsec < state->t1.tv_nsec ? ((999999999 - state->t1.tv_nsec) + state->t2.tv_nsec) : (state->t2.tv_nsec - state->t1.tv_nsec) ) / 1000;
+
+ printf("GPU verification took \t%ld.%06ld s\n", sec, nanosec);
+
+ printf("Total time: \t\t%ld.%06ld s\n", sec + state->p.tv_sec, nanosec + state->p.tv_nsec);
+
+ }
+
+
+ return state->total - results;
+
+}
+
+void modmult_opencl_cleanup(struct gpu_info *info) {
+
+ clReleaseProgram(info->program);
+ clReleaseKernel(info->kernel);
+ clReleaseCommandQueue(info->commands);
+ clReleaseContext(info->context);
+
+}
+
+void modmult_opencl_release(struct gpu_state_alt *state) {
+
+ clReleaseMemObject(state->x_mem);
+ clReleaseMemObject(state->m_mem);
+ clReleaseMemObject(state->n_mem);
+ clReleaseMemObject(state->ni_mem);
+ clReleaseMemObject(state->exp_mem);
+
+ clReleaseMemObject(state->msg_mem);
+
+ clReleaseMemObject(state->pks_indices);
+}
+
+// MARK: for library
+
+void gpuv_init_montg(struct gpu_info *info, struct gpu_state_alt *state) {
+
+ info->platform = select_platform(0, false);
+ info->device_id = select_device (info->platform);
+ info->context = create_compute_context (info->device_id);
+ info->commands = create_command_queue (info->device_id, info->context);
+ info->program = compile_program (info->device_id, info->context, "gpuv-montg.cl");
+ info->kernel = create_kernel (info->program, "mont");
+
+ state->result = 0;
+ state->total = 0;
+ state->skip = true;
+
+ state->p.tv_nsec = 0;
+ state->p.tv_sec = 0;
+
+ int err = 0;
+
+ unsigned long results = 0;
+
+ state->res_mem = clCreateBuffer(info->context, CL_MEM_READ_WRITE, sizeof(unsigned long) ,NULL, NULL);
+
+ err |= clEnqueueWriteBuffer(info->commands, state->res_mem, CL_TRUE, 0, sizeof(unsigned long), &results, 0, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ printf("Error: Failed to write to source array!\n");
+ exit(1);
+ }
+
+ err |= clSetKernelArg(info->kernel, 7, sizeof(cl_mem), &state->res_mem);
+ if (err != CL_SUCCESS)
+ {
+ printf("Error: Failed to set kernel arguments! %d\n", err);
+ exit(1);
+ }
+}
+
+void gpuv_execute_montg(struct gpu_info *info, struct gpu_state_alt *state,
+ void *x_buf, void *m_buf,
+ void *r_1_buf,
+ void *n_buf, void *ni_buf,
+ void *msg_buf,
+ void *exp_buf,
+ void *s_buf,
+ unsigned long *pks, unsigned long n) {
+
+ modmult_opencl_prepare(info, state,
+ x_buf, m_buf,
+ r_1_buf,
+ n_buf, ni_buf,
+ msg_buf, exp_buf, s_buf,
+ pks, n
+ ); // prepares the next batch of signatures on CPU, naturally blocks until it's finished
+
+ state->result += modmult_opencl_results(info, state, false, msg_buf, r_1_buf, n_buf, s_buf, n); // waits for kernel, if it is not ready yet
+ modmult_opencl_exec_kernel(info,state); // start kernel (returns immediately)
+ modmult_opencl_release(state); // release buffers
+
+
+
+}
+
+unsigned long gpuv_finish_montg(struct gpu_info *info, struct gpu_state_alt *state, void * msg_buf, void * r_1_buf, void * n_buf, void* s_buf, unsigned long n) {
+
+ state->result += modmult_opencl_results(info,state,true,msg_buf,r_1_buf,n_buf, s_buf, n);
+
+ modmult_opencl_cleanup(info);
+
+ unsigned long res = state->result;
+
+ state->result = 0; // reset result
+
+ return res;
+
+}
+
+
+int gpuv_test_montg(void) {
+
+ unsigned long pairs = gpuv_estimate_pairs(); // returns an estimation of pairs
+
+ unsigned long digit_sz = (BIT_LENGTH / 8) * pairs;
+
+ len_in_bytes = digit_sz;
+
+ unsigned long arr_sz = pairs * sizeof(unsigned long);
+
+ gpu_register *x_buf = malloc(digit_sz);
+ gpu_register *m_buf = malloc(digit_sz);
+ gpu_register *r_1_buf = malloc(digit_sz);
+ gpu_register *n_buf = malloc(digit_sz);
+ gpu_register *ni_buf = malloc(digit_sz + pairs);
+ gpu_register *msg_buf = malloc(digit_sz);
+ gpu_register *s_buf = malloc(digit_sz);
+ //gpu_register *mod_buf = malloc(digit_sz);
+ gpu_register *exp_buf = malloc(pairs * sizeof(gpu_register));
+
+ memset(x_buf, 0, digit_sz);
+ memset(m_buf, 0, digit_sz);
+ memset(r_1_buf, 0, digit_sz);
+ memset(n_buf, 0, digit_sz);
+ memset(ni_buf, 0, digit_sz);
+ memset(msg_buf, 0, digit_sz);
+ memset(s_buf, 0, digit_sz);
+ //memset(mod_buf, 0, digit_sz);
+ memset(exp_buf, 0, pairs * sizeof(gpu_register));
+
+ unsigned long *pks = malloc(arr_sz);
+
+ memset(pks, 0, arr_sz);
+
+ printf("READING KEYS...\n");
+
+ montmodmult_pairs_from_files(x_buf, m_buf,
+ r_1_buf,
+ n_buf, ni_buf,
+ msg_buf,
+ exp_buf,
+
+ s_buf,
+ pks, &pairs);
+
+ printf("VERIFYING %lu SIGNATURES...\n", pairs);
+
+ struct gpu_info info;
+ struct gpu_state_alt state;
+
+ gpuv_init_montg(&info, &state);
+
+ gpuv_execute_montg(&info, &state,
+ x_buf, m_buf,
+ r_1_buf,
+ n_buf, ni_buf,
+ msg_buf,
+ exp_buf,
+ // mod_buf,
+ s_buf,
+ pks, pairs);
+
+ unsigned long res = gpuv_finish_montg(&info, &state, msg_buf, r_1_buf, n_buf,s_buf, pairs);
+
+ if (res == pairs) {
+ printf("VERIFICATION RESULT: OK\n\n");
+ } else {
+ printf("VERIFICATION RESULT: NOT OK!\n");
+ printf("At least %lu signatures were invalid.\n\n",state.total - res);
+ }
+
+ return 0;
+}
diff --git a/source/gpuv-montg.cl b/source/gpuv-montg.cl
@@ -0,0 +1 @@
+../xcode/gpuv-montg.cl
+\ No newline at end of file
diff --git a/source/gpuv-montg.h b/source/gpuv-montg.h
@@ -0,0 +1,33 @@
+//
+// gpuv-montg.h
+// lib-gpu-verify
+//
+// Created by Cedric Zwahlen on 16.12.2023.
+//
+
+#ifndef gpuv_montg_h
+#define gpuv_montg_h
+
+#include <stdio.h>
+#include <stdint.h>
+#include <gmp.h>
+#include "util.h"
+
+int gpuv_test_montg(void);
+
+void gpuv_init_montg(struct gpu_info *info, struct gpu_state_alt *state);
+
+void gpuv_execute_montg(struct gpu_info *info, struct gpu_state_alt *state,
+ void *x_buf, void *m_buf,
+ void *r_1_buf,
+ void *n_buf, void *ni_buf,
+ void *msg_buf,
+ void *exp_buf,
+ void *s_buf,
+ unsigned long *pks, unsigned long n);
+
+unsigned long gpuv_finish_montg(struct gpu_info *info, struct gpu_state_alt *state, void * msg_buf, void * r_1_buf, void * n_buf, void* s_buf, unsigned long n);
+
+typedef uint64_t gpu_register;
+
+#endif /* gpuv_montg_h */
diff --git a/source/gpuv-ref.c b/source/gpuv-ref.c
@@ -0,0 +1,257 @@
+//
+// gpuv-ref.c
+// lib-gpu-verify
+//
+// Created by Cedric Zwahlen on 06.12.2023.
+//
+
+#include "gpuv-ref.h"
+
+void ref_pairs_from_files(char *bases, unsigned long *b_off,
+ char *exponents, unsigned long *e_off,
+ char *moduli, unsigned long *m_off,
+ char *signatures, unsigned long *s_off,
+ unsigned long *pks,
+ unsigned long *n) {
+
+ FILE *pk;
+ FILE *ms;
+
+ pk = fopen("lib-gpu-generate/publickey.txt", "r");
+ ms = fopen("lib-gpu-generate/msgsig.txt", "r");
+
+ if (pk == NULL || ms == NULL) {
+ printf("Auxiliary files not found.");
+ abort();
+ }
+
+ fseek (ms, 0, SEEK_END);
+ long ms_l = ftell(ms);
+ fseek (ms, 0, SEEK_SET);
+ char *ms_ptr = malloc(ms_l);
+ char *ms_ptr_rest = malloc(ms_l);
+ if (ms_ptr || ms_ptr_rest)
+ {
+ fread (ms_ptr, 1, ms_l, ms);
+ memcpy(ms_ptr_rest, ms_ptr, ms_l);
+ }
+ fclose (ms);
+
+ fseek (pk, 0, SEEK_END);
+ long pk_l = ftell(pk);
+ fseek (pk, 0, SEEK_SET);
+ char *pk_ptr = malloc(pk_l);
+ char *pk_ptr_rest = malloc(pk_l);
+ if (pk_ptr && pk_ptr_rest)
+ {
+ fread (pk_ptr, 1, pk_l, pk);
+ memcpy(pk_ptr_rest, pk_ptr, pk_l);
+ }
+ fclose (pk);
+
+ char* message = strtok_r(ms_ptr, "\n", &ms_ptr_rest);
+ char* signature = strtok_r(0, "\n", &ms_ptr_rest);
+ char* modulus = strtok_r(pk_ptr, "\n", &pk_ptr_rest);
+ char* exponent = strtok_r(0, "\n", &pk_ptr_rest);
+ char* offs = strtok_r(0, "\n", &pk_ptr_rest);
+
+ unsigned long b_offset = 0;
+ unsigned long e_offset = 0;
+ unsigned long m_offset = 0;
+ unsigned long s_offset = 0;
+
+ int i = 0;
+ int j = 0;
+
+ while (modulus != NULL && exponent != NULL && offs != NULL) {
+
+ unsigned long n_buf_len = strlen(modulus);
+ unsigned long e_buf_len = strlen(exponent);
+
+ memcpy(&moduli[m_offset], modulus, n_buf_len);
+ memcpy(&exponents[e_offset], exponent, e_buf_len);
+
+ m_off[i] = m_offset;
+ e_off[i] = e_offset;
+
+ m_offset += n_buf_len + 1;
+ e_offset += e_buf_len + 1;
+
+ pks[i] = atoi(offs);
+
+ modulus = strtok_r(0, "\n", &pk_ptr_rest);
+ exponent = strtok_r(0, "\n", &pk_ptr_rest);
+ offs = strtok_r(0, "\n", &pk_ptr_rest);
+
+ i++;
+ }
+
+ while (message != NULL && signature != NULL) {
+
+ unsigned long m_buf_len = strlen(message);
+ unsigned long s_buf_len = strlen(signature);
+
+ memcpy(&bases[b_offset], message, m_buf_len);
+ memcpy(&signatures[s_offset], signature, s_buf_len);
+
+ b_off[j] = b_offset;
+ s_off[j] = s_offset;
+
+ b_offset += m_buf_len + 1;
+ s_offset += s_buf_len + 1;
+
+ message = strtok_r(0, "\n",&ms_ptr_rest);
+ signature = strtok_r(0, "\n",&ms_ptr_rest);
+
+ j++;
+
+ }
+
+ *n = j;
+}
+
+gcry_sexp_t sexp_from_string(char* str, const char *format) {
+
+ gcry_sexp_t sexp;
+
+ gcry_mpi_t mpi = gcry_mpi_new((int)strlen(str) * 8);
+ //size_t scanned = 0;
+ gcry_mpi_scan(&mpi, GCRYMPI_FMT_HEX, str, 0, NULL);
+
+ size_t errOff = 0;
+ gcry_sexp_build(&sexp,&errOff,format,mpi);
+
+ return sexp;
+}
+
+gcry_sexp_t sexp_from_string_key(char* str_1, char* str_2, const char *format) {
+
+ gcry_sexp_t sexp;
+
+ gcry_mpi_t mpi_1 = gcry_mpi_new((int)strlen(str_1) * 8);
+ //size_t scanned = 0;
+ gcry_mpi_scan(&mpi_1, GCRYMPI_FMT_HEX, str_1, 0, NULL);
+
+ gcry_mpi_t mpi_2 = gcry_mpi_new((int)strlen(str_2) * 8);
+ //size_t scanned = 0;
+ gcry_mpi_scan(&mpi_2, GCRYMPI_FMT_HEX, str_2, 0, NULL);
+
+ size_t errOff = 0;
+ gcry_sexp_build(&sexp,&errOff,format,mpi_1,mpi_2);
+
+ return sexp;
+}
+
+int gpuv_test_ref(void) {
+
+ unsigned long pairs = gpuv_estimate_pairs();
+
+ unsigned long str_sz = (2048) * pairs;
+
+
+ char *b = malloc(str_sz);
+ char *e = malloc(str_sz);
+ char *m = malloc(str_sz);
+ char *s = malloc(str_sz);
+
+ unsigned long *b_off = malloc(str_sz);
+ unsigned long *e_off = malloc(str_sz);
+ unsigned long *m_off = malloc(str_sz);
+ unsigned long *s_off = malloc(str_sz);
+
+ memset(b, 0, str_sz);
+ memset(e, 0, str_sz);
+ memset(m, 0, str_sz);
+ memset(s, 0, str_sz);
+
+ memset(b_off, 0, str_sz);
+ memset(e_off, 0, str_sz);
+ memset(m_off, 0, str_sz);
+ memset(s_off, 0, str_sz);
+
+ unsigned long *pks = malloc(str_sz);
+ memset(pks, 0, str_sz);
+
+ ref_pairs_from_files(b, b_off, e, e_off, m, m_off, s, s_off, pks,
+ &pairs);
+
+ unsigned long pk = 0;
+
+ while (1) {
+ if (pks[pk] + 1 == pairs)
+ break;
+ pk++;
+ }
+
+
+
+ gcry_sexp_t *m_sexps = malloc(pairs * sizeof(gcry_sexp_t));
+ gcry_sexp_t *s_sexps = malloc(pairs * sizeof(gcry_sexp_t));
+ gcry_sexp_t *key_sexps = malloc((pk + 1) * sizeof(gcry_sexp_t));
+
+ for (int i = 0; i < pairs; i++) {
+
+ m_sexps[i] = sexp_from_string(&b[b_off[i]], "(data (flags raw) (value %m))"); // message format (for comparison)
+
+ s_sexps[i] = sexp_from_string(&s[s_off[i]], "(sig-val (rsa (s %m)))"); // signature format
+ }
+
+
+ for (int i = 0; i <= pk; i++) {
+
+ key_sexps[i] = sexp_from_string_key(&m[m_off[i]], &e[e_off[i]], "(public-key (rsa (n %m) (e %m)))" ); // pub key data
+
+ }
+
+ unsigned long result = 0;
+
+ struct timespec t1, t2;
+
+ printf("VERIFYING %lu SIGNATURES...\n", pairs);
+
+ clock_gettime(CLOCK_REALTIME, &t1);
+
+ pk = 0; // reuse pk
+
+ for (int i = 0; i < pairs; i++) {
+
+ while (1) {
+ if (pks[pk] >= i)
+ break;
+ pk++;
+ }
+
+ if ( gcry_pk_verify(s_sexps[i], m_sexps[i], key_sexps[pk]) == 0 )
+ result += 1;
+
+ }
+
+ clock_gettime(CLOCK_REALTIME, &t2);
+
+ printf("CPU (Reference) verification took %ld.%06ld s\n", ( t2.tv_nsec < t1.tv_nsec ? t2.tv_sec - (t1.tv_sec + 1) : t2.tv_sec - t1.tv_sec ), ( t2.tv_nsec < t1.tv_nsec ? ((999999999 - t1.tv_nsec) + t2.tv_nsec) : (t2.tv_nsec - t1.tv_nsec) ) / 1000);
+
+ if (result == pairs) {
+ printf("VERIFICATION RESULT: %lu - OK\n\n",result);
+ } else {
+ printf("VERIFICATION RESULT: %lu - NOT OK\n\n",result);
+ }
+
+
+ free(b);
+ free(e);
+ free(m);
+ free(s);
+
+ free(b_off);
+ free(e_off);
+ free(m_off);
+ free(s_off);
+
+ free(pks);
+
+ free(m_sexps);
+ free(s_sexps);
+ free(key_sexps);
+
+ return result == pairs ? 1 : 0;
+}
diff --git a/source/gpuv-ref.h b/source/gpuv-ref.h
@@ -0,0 +1,16 @@
+//
+// gpuv-ref.h
+// lib-gpu-verify
+//
+// Created by Cedric Zwahlen on 06.12.2023.
+//
+
+
+#ifndef gpuv_ref_h
+#define gpuv_ref_h
+
+#include "util.h"
+
+int gpuv_test_ref(void);
+
+#endif /* gpuv-ref_h */
diff --git a/source/gpuv.c b/source/gpuv.c
@@ -0,0 +1,622 @@
+/*
+ * lib-gpu-verify
+ *
+ * This software contains code derived from or inspired by the BigDigit library,
+ * <http://www.di-mgt.com.au/bigdigits.html>
+ * which is distributed under the Mozilla Public License, version 2.0.
+ *
+ * The original code and modifications made to it are subject to the terms and
+ * conditions of the Mozilla Public License, version 2.0. A copy of the
+ * MPL license can be obtained at
+ * https://www.mozilla.org/en-US/MPL/2.0/.
+ *
+ * Changes and additions to the original code are as follows:
+ * - Copied some functions of the BigDigit library into this file, to convert strings read from files to BigDigit type numbers.
+ *
+ * Contributors:
+ * - Cedric Zwahlen cedric.zwahlen@bfh.ch
+ *
+ * Please note that this software is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the Mozilla Public License, version 2.0, for the specific language
+ * governing permissions and limitations under the License.
+ */
+
+#include "gpuv.h"
+
+#define BITS_PER_DIGIT 32
+
+#define MAX_ALLOC_SIZE 256
+
+#define BYTES_PER_DIGIT (BITS_PER_DIGIT / 8)
+
+typedef uint32_t DIGIT_T; // for gpu might need to be half? is that half?
+
+size_t mpSizeof(const DIGIT_T a[], size_t ndigits)
+{
+ while(ndigits--)
+ {
+ if (a[ndigits] != 0)
+ return (++ndigits);
+ }
+ return 0;
+}
+
+volatile DIGIT_T mpSetZero(volatile DIGIT_T a[], size_t ndigits)
+{ /* Sets a = 0 */
+
+ /* Prevent optimiser ignoring this */
+ volatile DIGIT_T optdummy;
+ volatile DIGIT_T *p = a;
+
+ while (ndigits--)
+ a[ndigits] = 0;
+
+ optdummy = *p;
+ return optdummy;
+}
+
+size_t uiceil(double x)
+/* Returns ceil(x) as a non-negative integer or 0 if x < 0 */
+{
+ size_t c;
+
+ if (x < 0) return 0;
+ c = (size_t)x;
+ if ((x - c) > 0.0)
+ c++;
+
+ return c;
+}
+
+volatile uint8_t zeroise_bytes(volatile void *v, size_t n)
+{ /* Zeroise byte array b and make sure optimiser does not ignore this */
+ volatile uint8_t optdummy;
+ volatile uint8_t *b = (uint8_t*)v;
+ while(n--)
+ b[n] = 0;
+ optdummy = *b;
+ return optdummy;
+}
+
+size_t mpConvFromOctets(DIGIT_T a[], size_t ndigits, const unsigned char *c, size_t nbytes)
+/* Converts nbytes octets into big digit a of max size ndigits
+ Returns actual number of digits set (may be larger than mpSizeof)
+*/
+{
+ size_t i;
+ int j, k;
+ DIGIT_T t;
+
+ mpSetZero(a, ndigits);
+ //memset(a, 0, ndigits);
+
+ /* Read in octets, least significant first */
+ /* i counts into big_d, j along c, and k is # bits to shift */
+ for (i = 0, j = (int)nbytes - 1; i < ndigits && j >= 0; i++)
+ {
+ t = 0;
+ for (k = 0; j >= 0 && k < BITS_PER_DIGIT; j--, k += 8)
+ t |= ((DIGIT_T)c[j]) << k;
+ a[i] = t;
+ }
+
+ return i;
+}
+
+size_t mpConvFromHex(DIGIT_T a[], size_t ndigits, const char *s)
+/* Convert a string in hexadecimal format to a big digit.
+ Return actual number of digits set (may be larger than mpSizeof).
+ Just ignores invalid characters in s.
+*/
+{
+
+ uint8_t newdigits[MAX_ALLOC_SIZE*2];
+
+ size_t newlen;
+ size_t n;
+ unsigned long t;
+ size_t i, j;
+
+ mpSetZero(a, ndigits);
+ //memset(&a, 0, ndigits);
+
+ /* Create some temp storage for int values */
+ n = strlen(s);
+ if (0 == n) return 0;
+ newlen = uiceil(n * 0.5); /* log(16)/log(256)=0.5 */
+ //ALLOC_BYTES(newdigits, newlen);
+ memset(&newdigits, 0, newlen);
+
+ /* Work through zero-terminated string */
+ for (i = 0; s[i]; i++)
+ {
+ t = s[i];
+ if ((t >= '0') && (t <= '9')) t = (t - '0');
+ else if ((t >= 'a') && (t <= 'f')) t = (t - 'a' + 10);
+ else if ((t >= 'A') && (t <= 'F')) t = (t - 'A' + 10);
+ else continue;
+ for (j = newlen; j > 0; j--)
+ {
+ t += (unsigned long)newdigits[j-1] << 4;
+ newdigits[j-1] = (unsigned char)(t & 0xFF);
+ t >>= 8;
+ }
+ }
+
+ /* Convert bytes to big digits */
+ n = mpConvFromOctets(a, ndigits, newdigits, newlen);
+
+ memset(&newdigits, 0, newlen);
+
+ return n;
+}
+
+// MARK: OPENCL CODE
+
+void opencl_pairs_from_files(void *bases, unsigned long *b_len,
+ void *exponents, unsigned long *e_len,
+ void *moduli, unsigned long *m_len,
+ void *signatures, unsigned long *s_len,
+ unsigned long *pks,
+ unsigned long *n
+ ) {
+
+ FILE *pk;
+ FILE *ms;
+
+ pk = fopen("lib-gpu-generate/publickey.txt", "r");
+ ms = fopen("lib-gpu-generate/msgsig.txt", "r");
+
+ if (pk == NULL || ms == NULL) {
+ printf("Auxiliary files not found.");
+ abort();
+ }
+
+ fseek (ms, 0, SEEK_END);
+ long ms_l = ftell(ms);
+ fseek (ms, 0, SEEK_SET);
+ char *ms_ptr = malloc(ms_l);
+ char *ms_ptr_rest = malloc(ms_l);
+ if (ms_ptr || ms_ptr_rest)
+ {
+ fread (ms_ptr, 1, ms_l, ms);
+ memcpy(ms_ptr_rest, ms_ptr, ms_l);
+ }
+ fclose (ms);
+
+ fseek (pk, 0, SEEK_END);
+ long pk_l = ftell(pk);
+ fseek (pk, 0, SEEK_SET);
+ char *pk_ptr = malloc(pk_l);
+ char *pk_ptr_rest = malloc(pk_l);
+ if (pk_ptr && pk_ptr_rest)
+ {
+ fread (pk_ptr, 1, pk_l, pk);
+ memcpy(pk_ptr_rest, pk_ptr, pk_l);
+ }
+ fclose (pk);
+
+ int i = 0;
+ int j = 0;
+
+ DIGIT_T *bases_t = bases;
+ DIGIT_T *exponents_t = exponents;
+ DIGIT_T *moduli_t = moduli;
+ DIGIT_T *signatures_t = signatures;
+
+ int sz = 2048 / sizeof(DIGIT_T);
+
+ char* message = strtok_r(ms_ptr, "\n", &ms_ptr_rest);
+ char* signature = strtok_r(0, "\n", &ms_ptr_rest);
+ char* modulus = strtok_r(pk_ptr, "\n", &pk_ptr_rest);
+ char* exponent = strtok_r(0, "\n", &pk_ptr_rest);
+ char* offs = strtok_r(0, "\n", &pk_ptr_rest);
+
+ while (modulus != NULL && exponent != NULL && offs != NULL) {
+
+ pks[i] = atoi(offs);
+
+ unsigned long n_buf_len = strlen(modulus);
+ unsigned long e_buf_len = strlen(exponent);
+
+ DIGIT_T exponent_dgt [sz*2];
+ DIGIT_T modulus_dgt [sz*2];
+
+ mpSetZero(exponent_dgt, sz*2);
+ mpSetZero(modulus_dgt, sz*2);
+
+ mpConvFromHex(exponent_dgt, e_buf_len, exponent);
+ mpConvFromHex(modulus_dgt, n_buf_len, modulus);
+
+ unsigned long max_len = 64; // hardcoded for 2048 bit RSA
+
+ e_len[i] = (i == 0 ? 0 : e_len[i - 1]) + mpSizeof(exponent_dgt, sz*2);
+ m_len[i] = (i == 0 ? 0 : m_len[i - 1]) + max_len;
+
+ memcpy(&moduli_t[i == 0 ? 0 : (m_len[i - 1])], modulus_dgt, ( m_len[i] - (i == 0 ? 0 : m_len[i - 1]) ) * sizeof(DIGIT_T));
+ memcpy(&exponents_t[i == 0 ? 0 : (e_len[i - 1])], exponent_dgt, ( e_len[i] - (i == 0 ? 0 : e_len[i - 1]) ) * sizeof(DIGIT_T));
+
+
+ modulus = strtok_r(0, "\n", &pk_ptr_rest);
+ exponent = strtok_r(0, "\n", &pk_ptr_rest);
+ offs = strtok_r(0, "\n", &pk_ptr_rest);
+
+
+ i++;
+ }
+
+
+
+ while (message != NULL && signature != NULL) {
+
+ unsigned long m_buf_len = strlen(message);
+ unsigned long s_buf_len = strlen(signature);
+
+ DIGIT_T base_dgt [sz*2]; // temp storage, large enough
+ DIGIT_T signature_dgt [sz*2];
+
+ mpSetZero(base_dgt, sz*2);
+ mpSetZero(signature_dgt, sz*2);
+
+ mpConvFromHex(base_dgt, m_buf_len, message);
+ mpConvFromHex(signature_dgt, s_buf_len, signature);
+
+ unsigned long max_len = 64; // the maximum of DIGIT_T types we need
+
+ b_len[j] = (j == 0 ? 0 : b_len[j - 1]) + max_len;
+ s_len[j] = (j == 0 ? 0 : s_len[j - 1]) + max_len;
+
+ memcpy(&bases_t[j == 0 ? 0 : (b_len[j - 1])], base_dgt, ( b_len[j] - (j == 0 ? 0 : b_len[j - 1]) ) * sizeof(DIGIT_T));
+ memcpy(&signatures_t[j == 0 ? 0 : (s_len[j - 1])], signature_dgt, ( s_len[j] - (j == 0 ? 0 : s_len[j - 1]) ) * sizeof(DIGIT_T));
+
+ message = strtok_r(0, "\n",&ms_ptr_rest);
+ signature = strtok_r(0, "\n",&ms_ptr_rest);
+
+ j++;
+
+ }
+
+ *n = j;
+
+}
+
+int opencl_prepare(struct gpu_info *info, struct gpu_state *state,
+ void *bases, unsigned long *b_len,
+ void *exponents, unsigned long *e_len,
+ void *moduli, unsigned long *m_len,
+ void *signatures, unsigned long *s_len,
+ const unsigned long *pks,
+ const unsigned long n) {
+
+ int err; // error code returned from api calls
+
+ unsigned long pk = 0;
+
+ while (1) {
+ if (pks[pk] + 1 >= n)
+ break;
+ pk++;
+ }
+
+
+ state->mod_mem = clCreateBuffer(info->context, CL_MEM_READ_WRITE, sizeof(DIGIT_T) * m_len[pk], NULL, NULL);
+ state->exp_mem = clCreateBuffer(info->context, CL_MEM_READ_WRITE, sizeof(DIGIT_T) * e_len[pk], NULL, NULL);
+
+ state->sig_mem = clCreateBuffer(info->context, CL_MEM_READ_WRITE, sizeof(DIGIT_T) * s_len[n-1], NULL, NULL);
+ state->comp_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, sizeof(DIGIT_T) * b_len[n-1], NULL, NULL); // the base, to compare whether we get the same signature
+
+ state->mod_len = clCreateBuffer(info->context, CL_MEM_READ_ONLY, sizeof(unsigned long) * (pk + 1), NULL, NULL);
+ state->exp_len = clCreateBuffer(info->context, CL_MEM_READ_ONLY, sizeof(unsigned long) * (pk + 1), NULL, NULL);
+
+ state->sig_len = clCreateBuffer(info->context, CL_MEM_READ_ONLY, sizeof(unsigned long) * n, NULL, NULL);
+ state->comp_len = clCreateBuffer(info->context, CL_MEM_READ_ONLY, sizeof(unsigned long) * n, NULL, NULL);
+
+ state->pks_indices = clCreateBuffer(info->context, CL_MEM_READ_WRITE, sizeof(unsigned long) * (pk + 1),NULL, NULL);
+
+
+
+
+ if (!state->sig_mem || !state->exp_mem || !state->mod_mem || !state->comp_mem || !state->invalid )
+ {
+ printf("Error: Failed to allocate device memory!\n");
+ exit(1);
+ }
+
+ // Write our data set into the input array in device memory
+ //
+ err = clEnqueueWriteBuffer(info->commands, state->sig_mem, CL_TRUE, 0, sizeof(DIGIT_T) * s_len[n-1], signatures, 0, NULL, NULL);
+ err |= clEnqueueWriteBuffer(info->commands, state->sig_len, CL_TRUE, 0,sizeof(unsigned long) * n, s_len, 0, NULL, NULL);
+
+ err |= clEnqueueWriteBuffer(info->commands, state->exp_mem, CL_TRUE, 0,sizeof(DIGIT_T) * e_len[pk], exponents, 0, NULL, NULL);
+ err |= clEnqueueWriteBuffer(info->commands, state->exp_len, CL_TRUE, 0,sizeof(unsigned long) * (pk + 1), e_len, 0, NULL, NULL);
+ err |= clEnqueueWriteBuffer(info->commands, state->mod_mem, CL_TRUE, 0, sizeof(DIGIT_T) * m_len[pk], moduli, 0, NULL, NULL);
+ err |= clEnqueueWriteBuffer(info->commands, state->mod_len, CL_TRUE, 0,sizeof(unsigned long) * (pk + 1), m_len, 0, NULL, NULL);
+
+ err |= clEnqueueWriteBuffer(info->commands, state->comp_mem, CL_TRUE, 0, sizeof(DIGIT_T) * b_len[n-1], bases, 0, NULL, NULL);
+ err |= clEnqueueWriteBuffer(info->commands, state->comp_len, CL_TRUE, 0,sizeof(unsigned long) * n, b_len, 0, NULL, NULL);
+
+ err |= clEnqueueWriteBuffer(info->commands, state->pks_indices, CL_TRUE, 0, sizeof(unsigned long) * (pk + 1), pks, 0, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ printf("Error: Failed to write to source array!\n");
+ exit(1);
+ }
+
+ // Set the arguments to our compute kernel
+ //
+ err = 0;
+ err = clSetKernelArg(info->kernel, 0, sizeof(cl_mem), &state->sig_mem);
+ err |= clSetKernelArg(info->kernel, 1, sizeof(cl_mem), &state->sig_len);
+ err |= clSetKernelArg(info->kernel, 2, sizeof(cl_mem), &state->exp_mem);
+ err |= clSetKernelArg(info->kernel, 3, sizeof(cl_mem), &state->exp_len);
+ err |= clSetKernelArg(info->kernel, 4, sizeof(cl_mem), &state->mod_mem);
+ err |= clSetKernelArg(info->kernel, 5, sizeof(cl_mem), &state->mod_len);
+ err |= clSetKernelArg(info->kernel, 6, sizeof(cl_mem), &state->comp_mem);
+ err |= clSetKernelArg(info->kernel, 7, sizeof(cl_mem), &state->comp_len);
+ err |= clSetKernelArg(info->kernel, 9, sizeof(cl_mem), &state->pks_indices);
+ err |= clSetKernelArg(info->kernel, 10, sizeof(unsigned long), &n);
+
+ if (err != CL_SUCCESS)
+ {
+ printf("RSA-Error: Failed to set kernel arguments! %d\n", err);
+ exit(1);
+ }
+
+ state->total = n;
+
+
+ return 0;
+
+}
+
+int opencl_exec_kernel(struct gpu_info *info, struct gpu_state *state) {
+
+ size_t global;
+ int err;
+
+ global = state->total;
+
+ // measure from the first call to the kernel...
+ if (state->skip) {
+ state->skip = false;
+ clock_gettime(CLOCK_REALTIME, &state->t1);
+ }
+
+ err = clEnqueueNDRangeKernel(info->commands, info->kernel, 1, NULL, &global, NULL, 0, NULL, NULL);
+ if (err)
+ {
+ printf("Error: Failed to execute kernel!\n");
+ return EXIT_FAILURE;
+ }
+
+ return 0;
+
+}
+
+
+unsigned long opencl_results(struct gpu_info *info, struct gpu_state *state, bool timed) {
+
+ if (state->skip) {
+ // reset skip in the kernel execution
+ return 0;
+ }
+
+ int err;
+
+ unsigned long failed_signatures = 0;
+
+
+ // Wait for the command commands to get serviced before reading back results
+ //
+ err = clFinish(info->commands);
+ if (err != CL_SUCCESS)
+ {
+ printf("Error: Kernel failure! %d\n", err);
+ exit(1);
+ }
+
+ // Read back the results from the device to verify the output
+ //
+ err = clEnqueueReadBuffer(info->commands, state->invalid, CL_TRUE, 0, sizeof(unsigned long), &failed_signatures, 0, NULL, NULL );
+ if (err != CL_SUCCESS)
+ {
+ printf("Error: Failed to read output array! %d\n", err);
+ exit(1);
+ }
+
+
+ if (timed) {
+ // stop measuring after the last command has been read
+ clock_gettime(CLOCK_REALTIME, &state->t2);
+
+ printf("GPU verification took %ld.%06ld s\n", ( state->t2.tv_nsec < state->t1.tv_nsec ? state->t2.tv_sec - (state->t1.tv_sec + 1) : state->t2.tv_sec - state->t1.tv_sec ), ( state->t2.tv_nsec < state->t1.tv_nsec ? ((999999999 - state->t1.tv_nsec) + state->t2.tv_nsec) : (state->t2.tv_nsec - state->t1.tv_nsec) ) / 1000);
+
+ }
+
+
+ return state->total - failed_signatures;
+
+}
+
+void opencl_cleanup(struct gpu_info *info) {
+
+ clReleaseProgram(info->program);
+ clReleaseKernel(info->kernel);
+ clReleaseCommandQueue(info->commands);
+ clReleaseContext(info->context);
+
+}
+
+void opencl_release(struct gpu_state *state) {
+
+ clReleaseMemObject(state->comp_mem);
+ clReleaseMemObject(state->exp_mem);
+ clReleaseMemObject(state->mod_mem);
+ clReleaseMemObject(state->sig_mem);
+
+ clReleaseMemObject(state->comp_len);
+ clReleaseMemObject(state->exp_len);
+ clReleaseMemObject(state->mod_len);
+ clReleaseMemObject(state->sig_len);
+
+ clReleaseMemObject(state->pks_indices);
+
+
+
+}
+
+// MARK: for library
+
+void gpuv_init(struct gpu_info *info, struct gpu_state *state) {
+
+ info->platform = select_platform(0, false);
+ info->device_id = select_device (info->platform);
+ info->context = create_compute_context (info->device_id);
+ info->commands = create_command_queue (info->device_id, info->context);
+ info->program = compile_program (info->device_id, info->context, "gpuv.cl");
+ info->kernel = create_kernel (info->program, "several");
+
+ state->result = 0;
+ state->total = 0;
+ state->skip = true;
+
+
+ int err = 0;
+
+ int failed_signatures = 0;
+
+ state->invalid = clCreateBuffer(info->context, CL_MEM_READ_WRITE, sizeof(unsigned long) ,NULL, NULL);
+
+ err |= clEnqueueWriteBuffer(info->commands, state->invalid, CL_TRUE, 0, sizeof(unsigned long), &failed_signatures, 0, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ printf("Error: Failed to write to source array!\n");
+ exit(1);
+ }
+
+ err |= clSetKernelArg(info->kernel, 8, sizeof(cl_mem), &state->invalid);
+ if (err != CL_SUCCESS)
+ {
+ printf("Error: Failed to set kernel arguments! %d\n", err);
+ exit(1);
+ }
+}
+
+void gpuv_execute(struct gpu_info *info,
+ struct gpu_state *state,
+ void *bases, unsigned long *b_len,
+ void *exponents, unsigned long *e_len,
+ void *moduli, unsigned long *m_len,
+ void *signatures, unsigned long *s_len,
+ const unsigned long *pks,
+ const unsigned long n) {
+
+ opencl_prepare(info, state, bases, b_len,
+ exponents, e_len,
+ moduli, m_len,
+ signatures, s_len,
+ pks, n); // prepares the next batch of signatures on CPU, naturally blocks until it's finished
+ state->result += opencl_results(info, state, false); // waits for kernel, if it is not ready yet
+ opencl_exec_kernel(info,state); // start kernel (returns immediately)
+ opencl_release(state); // release buffers
+
+
+
+}
+
+unsigned long gpuv_finish(struct gpu_info *info, struct gpu_state *state) {
+
+ state->result += opencl_results(info,state,true);
+
+ opencl_cleanup(info);
+ clReleaseMemObject(state->invalid);
+
+ unsigned long res = state->result;
+
+ state->result = 0; // reset result
+
+ return res;
+
+}
+
+// MARK: function to know how much storage the gpu has to split data
+
+
+
+//size_t retSize_3 = sizeof(cl_ulong);
+//cl_ulong max_stor = 0;
+//clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, 0, NULL, &retSize_3);
+//clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, retSize_3, &max_stor, &retSize_3);
+
+//printf("max memory: %llu\n", max_stor);
+
+
+int gpuv_test(void) {
+
+ unsigned long pairs = gpuv_estimate_pairs(); // returns an estimation of pairs
+
+ unsigned long digit_sz = 64 * pairs * sizeof(DIGIT_T);
+ unsigned long arr_sz = pairs * sizeof(unsigned long);
+
+ DIGIT_T *q = malloc(digit_sz);
+ DIGIT_T *r = malloc(digit_sz);
+ DIGIT_T *s = malloc(digit_sz);
+ DIGIT_T *t = malloc(digit_sz);
+
+ memset(q, 0, digit_sz);
+ memset(r, 0, digit_sz);
+ memset(s, 0, digit_sz);
+ memset(t, 0, digit_sz);
+
+ unsigned long *u = malloc(arr_sz);
+ unsigned long *v = malloc(arr_sz);
+ unsigned long *w = malloc(arr_sz);
+ unsigned long *x = malloc(arr_sz);
+
+ unsigned long *pks = malloc(arr_sz);
+
+ memset(u, 0, arr_sz);
+ memset(v, 0, arr_sz);
+ memset(w, 0, arr_sz);
+ memset(x, 0, arr_sz);
+ memset(pks, 0, arr_sz);
+
+ opencl_pairs_from_files(q, u,
+ r, v,
+ s, w,
+ t, x, pks, &pairs); // this returns the actual amount of pairs
+
+ printf("VERIFYING %lu SIGNATURES...\n", pairs);
+
+ struct gpu_info info;
+ struct gpu_state state;
+
+ gpuv_init(&info, &state);
+
+ gpuv_execute(&info, &state, q, u, r, v, s, w, t, x, pks, pairs);
+
+ unsigned long res = gpuv_finish(&info, &state);
+
+ if (res == pairs) {
+ printf("VERIFICATION RESULT: %lu - OK\n\n",res);
+ } else {
+ printf("VERIFICATION RESULT: %lu - NOT OK\n\n",res);
+ }
+
+
+ free(q);
+ free(r);
+ free(s);
+ free(t);
+
+ free(u);
+ free(v);
+ free(w);
+ free(x);
+
+ free(pks);
+
+ return 0;
+}
+
diff --git a/source/gpuv.cl b/source/gpuv.cl
@@ -0,0 +1 @@
+../xcode/gpuv.cl
+\ No newline at end of file
diff --git a/source/gpuv.h b/source/gpuv.h
@@ -0,0 +1,30 @@
+//
+// gpuv.h
+// lib-gpu-verify
+//
+// Created by Cedric Zwahlen on 28.09.2023.
+//
+
+#ifndef gpuv_h
+#define gpuv_h
+
+#include "util.h"
+#include <stdint.h>
+
+int gpuv_test(void);
+
+
+void gpuv_init(struct gpu_info *info, struct gpu_state *state);
+
+void gpuv_execute(struct gpu_info *info,
+ struct gpu_state *state,
+ void *bases, unsigned long *b_len,
+ void *exponents, unsigned long *e_len,
+ void *moduli, unsigned long *m_len,
+ void *signatures, unsigned long *s_len,
+ const unsigned long *pks,
+ const unsigned long n);
+
+unsigned long gpuv_finish(struct gpu_info *info, struct gpu_state *state);
+
+#endif /* gpuv_h */
diff --git a/source/lib-gpu-verify.c b/source/lib-gpu-verify.c
@@ -6,11 +6,10 @@
//
-#include "rsa-test.h"
+#include "gpuv.h"
-#include "reference-test.h"
-#include "montgomery-test.h"
-#include "montmodmult.h"
+#include "gpuv-ref.h"
+#include "gpuv-montg.h"
int main(int argc, char** argv)
@@ -18,13 +17,13 @@ int main(int argc, char** argv)
- setup_gcry();
+ gpuv_prepare_gcry();
- mont_modmult_tests();
+ gpuv_test_montg();
- rsa_tests();
+ gpuv_test();
- reference_tests();
+ gpuv_test_ref();
diff --git a/source/montgomery-test.c b/source/montgomery-test.c
@@ -1,375 +0,0 @@
-//
-// montgomery-test.c
-// lib-gpu-verify
-//
-// Created by Cedric Zwahlen on 28.11.2023.
-//
-
-#include "montgomery-test.h"
-
-#include <gmp.h> // has been adapted
-
-void mont_pairs_from_files(void *bases, unsigned long *b_off,
- void *exponents, unsigned long *e_off,
- void *moduli, unsigned long *m_off,
- void *signatures, unsigned long *s_off,
- unsigned long *pks,
- unsigned long *n) {
-
- char *bases_t = bases;
- char *exponents_t = exponents;
- char *moduli_t = moduli;
- char *signatures_t = signatures;
-
- FILE *pkfile;
- FILE *msfile;
-
- pkfile = fopen("lib-gpu-generate/publickey.txt", "r");
- msfile = fopen("lib-gpu-generate/msgsig.txt", "r");
-
- if (pkfile == NULL || msfile == NULL) {
- printf("Auxiliary files not found.");
- abort();
- }
-
-
- int i = 0;
-
- unsigned long b_offset = 0;
- unsigned long e_offset = 0;
- unsigned long m_offset = 0;
- unsigned long s_offset = 0;
-
- while (1) {
-
- char n_buf[2048]; // need to be 0
- char e_buf[2048];
-
- memset(n_buf, 0, 2048);
- memset(e_buf, 0, 2048);
-
- unsigned long lastIndex = 0;
-
- if (fscanf(pkfile, "%s %s %lu", n_buf,e_buf, &lastIndex) == -1)
- break;
-
- mpz_t n, e;
-
- mpz_init_set_str(n,n_buf,16);
- mpz_init_set_str(e,e_buf,16);
-
- memcpy(&moduli_t[m_offset], n, sizeof(mpz_t));
- memcpy(&exponents_t[e_offset], e, sizeof(mpz_t));
-
- m_off[i] = m_offset;
- e_off[i] = e_offset;
-
- m_offset += sizeof(mpz_t);
- e_offset += sizeof(mpz_t);
-
- pks[i] = lastIndex;
-
- i++;
-
- // break; // testing with just one
- }
-
- int j = 0;
-
- while (1) {
-
- char m_buf[2048]; // temp storage, large enough
- char s_buf[2048];
-
- memset(m_buf, 0, 2048);
- memset(s_buf, 0, 2048);
-
- if (fscanf(msfile, "%s %s", m_buf,s_buf) == -1)
- break;
-
- mpz_t m, s;
-
- mpz_init_set_str(m,m_buf,16);
- mpz_init_set_str(s,s_buf,16);
-
- memcpy(&bases_t[b_offset], m, sizeof(mpz_t));
- memcpy(&signatures_t[s_offset], s, sizeof(mpz_t));
-
- b_off[j] = b_offset;
- s_off[j] = s_offset;
-
- b_offset += sizeof(mpz_t);
- s_offset += sizeof(mpz_t);
-
- j++;
-
- // break; // testing with just one
-
- }
-
- fclose(pkfile);
- fclose(msfile);
-
- *n = j;
-}
-
-
-
-
-int mont_verify_pairs_with_opencl(void *bases, unsigned long *b_off,
- void *exponents, unsigned long *e_off,
- void *moduli, unsigned long *m_off,
- void *signatures, unsigned long *s_off,
- const unsigned long *pks,
- const unsigned long n,
- unsigned long *result) {
-
- int err; // error code returned from api calls
-
- size_t global; // global domain size for our calculation
- size_t local; // local domain size for our calculation
-
- // MARK: this part, I can cache for the library
-
- cl_platform_id platform = select_platform(0, false);
- cl_device_id device_id = select_device (platform);
- cl_context context = create_compute_context (device_id);
- cl_command_queue commands = create_command_queue (device_id, context);
- cl_program program = compile_program (device_id, context, "montgomery.cl");
- cl_kernel kernel = create_kernel (program, "montgomery");
-
- // Create the input and output arrays in device memory for our calculation
-
- cl_mem sig_mem;
- cl_mem exp_mem;
- cl_mem mod_mem;
- cl_mem comp_mem;
-
- cl_mem sig_len;
- cl_mem exp_len;
- cl_mem mod_len;
- cl_mem comp_len;
-
- cl_mem pks_indices;
-
- cl_mem valid; // needs to be a buffer because it goes out
-
- unsigned long signature_is_valid = 0;
-
- unsigned long pk = 0;
-
- while (1) {
- if (pks[pk] + 1 == n)
- break;
- pk++;
- }
-
- unsigned long len = sizeof(mpz_t);
-
-
- mod_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, len * (pk + 1), NULL, NULL);
- exp_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, len * (pk + 1), NULL, NULL);
-
- sig_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, len * n, NULL, NULL);
- comp_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, len * n, NULL, NULL); // the base, to compare whether we get the same signature
-
- mod_len = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned long) * (pk + 1), NULL, NULL);
- exp_len = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned long) * (pk + 1), NULL, NULL);
-
- sig_len = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned long) * n, NULL, NULL);
- comp_len = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned long) * n, NULL, NULL);
-
- pks_indices = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(unsigned long) * (pk + 1),NULL, NULL);
- valid = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(unsigned long) ,NULL, NULL);
-
-
-
- if (!sig_mem || !exp_mem || !mod_mem || !comp_mem || !valid )
- {
- printf("Error: Failed to allocate device memory!\n");
- exit(1);
- }
-
- // Write our data set into the input array in device memory
- //
- err = clEnqueueWriteBuffer(commands, sig_mem, CL_TRUE, 0, len * n, signatures, 0, NULL, NULL);
- err |= clEnqueueWriteBuffer(commands, sig_len, CL_TRUE, 0,sizeof(unsigned long) * n, s_off, 0, NULL, NULL);
-
- err |= clEnqueueWriteBuffer(commands, exp_mem, CL_TRUE, 0, len * (pk + 1), exponents, 0, NULL, NULL);
- err |= clEnqueueWriteBuffer(commands, exp_len, CL_TRUE, 0, sizeof(unsigned long) * (pk + 1), e_off, 0, NULL, NULL);
-
- err |= clEnqueueWriteBuffer(commands, mod_mem, CL_TRUE, 0, len * (pk + 1), moduli, 0, NULL, NULL);
- err |= clEnqueueWriteBuffer(commands, mod_len, CL_TRUE, 0, sizeof(unsigned long) * (pk + 1), m_off, 0, NULL, NULL);
-
- err |= clEnqueueWriteBuffer(commands, comp_mem, CL_TRUE, 0, len * n, bases, 0, NULL, NULL);
- err |= clEnqueueWriteBuffer(commands, comp_len, CL_TRUE, 0,sizeof(unsigned long) * n, b_off, 0, NULL, NULL);
-
- err |= clEnqueueWriteBuffer(commands, pks_indices, CL_TRUE, 0, sizeof(unsigned long) * (pk + 1), pks, 0, NULL, NULL);
- err |= clEnqueueWriteBuffer(commands, valid, CL_TRUE, 0, sizeof(unsigned long), &signature_is_valid, 0, NULL, NULL);
- if (err != CL_SUCCESS)
- {
- printf("Error: Failed to write to source array!\n");
- exit(1);
- }
-
- // Set the arguments to our compute kernel
- //
- err = 0;
- err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &sig_mem);
- err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &sig_len);
- err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &exp_mem);
- err |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &exp_len);
- err |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &mod_mem);
- err |= clSetKernelArg(kernel, 5, sizeof(cl_mem), &mod_len);
- err |= clSetKernelArg(kernel, 6, sizeof(cl_mem), &comp_mem);
- err |= clSetKernelArg(kernel, 7, sizeof(cl_mem), &comp_len);
- err |= clSetKernelArg(kernel, 8, sizeof(cl_mem), &valid);
- err |= clSetKernelArg(kernel, 9, sizeof(cl_mem), &pks_indices);
- err |= clSetKernelArg(kernel, 10, sizeof(unsigned long), &n);
-
- //err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count);
- if (err != CL_SUCCESS)
- {
- printf("Error: Failed to set kernel arguments! %d\n", err);
- exit(1);
- }
-
- // Get the maximum work group size for executing the kernel on the device
- //
- err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL);
- if (err != CL_SUCCESS)
- {
- printf("Error: Failed to retrieve kernel work group info! %d\n", err);
- exit(1);
- }
-
- // Execute the kernel over the entire range of our 1d input data set
- // using the maximum number of work group items for this device
- //
-
- global = n;
- local = 1;
-
- err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
- if (err)
- {
- printf("Error: Failed to execute kernel!\n");
- return EXIT_FAILURE;
- }
-
- printf("KERNEL IS EXECUTING...\n");
-
- struct timespec t1, t2;
-
- clock_gettime(CLOCK_REALTIME, &t1);
-
-
- // Wait for the command commands to get serviced before reading back results
- //
- clFinish(commands);
-
- // Read back the results from the device to verify the output
- //
- //err = clEnqueueReadBuffer( commands, res_mem, CL_TRUE, 0, res_len, res_buf, 0, NULL, NULL );
- err = clEnqueueReadBuffer( commands, valid, CL_TRUE, 0, sizeof(unsigned long), &signature_is_valid, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- printf("Error: Failed to read output array! %d\n", err);
- exit(1);
- }
-
- clock_gettime(CLOCK_REALTIME, &t2);
-
- printf("GPU verification (Montgomery) took %ld.%06ld s\n", ( t2.tv_nsec < t1.tv_nsec ? t2.tv_sec - (t1.tv_sec + 1) : t2.tv_sec - t1.tv_sec ), ( t2.tv_nsec < t1.tv_nsec ? ((999999999 - t1.tv_nsec) + t2.tv_nsec) : (t2.tv_nsec - t1.tv_nsec) ) / 1000);
-
- *result = n - signature_is_valid;
-
- // Shutdown and cleanup
- //
- clReleaseMemObject(comp_mem);
- clReleaseMemObject(exp_mem);
- clReleaseMemObject(mod_mem);
- clReleaseMemObject(sig_mem);
-
- clReleaseMemObject(comp_len);
- clReleaseMemObject(exp_len);
- clReleaseMemObject(mod_len);
- clReleaseMemObject(sig_len);
-
- clReleaseProgram(program);
- clReleaseKernel(kernel);
- clReleaseCommandQueue(commands);
- clReleaseContext(context);
-
- return 0;
-
-}
-
-
-int mont_rsa_tests(void) {
-
- unsigned long pairs = number_of_pairs();
-
- long str_sz = sizeof(mpz_t) * pairs;
-
- void *b = malloc(str_sz);
- void *e = malloc(str_sz);
- void *m = malloc(str_sz);
- void *s = malloc(str_sz);
-
- unsigned long *b_off = malloc(pairs * sizeof(unsigned long));
- unsigned long *e_off = malloc(pairs * sizeof(unsigned long));
- unsigned long *m_off = malloc(pairs * sizeof(unsigned long));
- unsigned long *s_off = malloc(pairs * sizeof(unsigned long));
-
- memset(b, 0, str_sz);
- memset(e, 0, str_sz);
- memset(m, 0, str_sz);
- memset(s, 0, str_sz);
-
- memset(b_off, 0, pairs * sizeof(unsigned long));
- memset(e_off, 0, pairs * sizeof(unsigned long));
- memset(m_off, 0, pairs * sizeof(unsigned long));
- memset(s_off, 0, pairs * sizeof(unsigned long));
-
- unsigned long *pks = malloc(pairs * sizeof(unsigned long));
-
- memset(pks, 0, pairs * sizeof(unsigned long));
-
-
-
- mont_pairs_from_files(b, b_off, e, e_off, m, m_off, s, s_off, pks,
- &pairs);
-
-
- printf("VERIFYING %lu SIGNATURES...\n", pairs);
-
- unsigned long result = 0;
-
-
- mont_verify_pairs_with_opencl(s, s_off, e, e_off, m, m_off, b, b_off,
- pks, pairs, &result);
-
-
-
-
- if (result == pairs) {
- printf("VERIFICATION RESULT: %lu - OK\n\n",result);
- } else {
- printf("VERIFICATION RESULT: %lu - NOT OK\n\n",result);
- }
-
-
- free(b);
- free(e);
- free(m);
- free(s);
-
- free(b_off);
- free(e_off);
- free(m_off);
- free(s_off);
-
- // return 1 for success, 0 for failure
- return result == pairs ? 1 : 0;
-}
diff --git a/source/montgomery-test.h b/source/montgomery-test.h
@@ -1,15 +0,0 @@
-//
-// montgomery-test.h
-// lib-gpu-verify
-//
-// Created by Cedric Zwahlen on 28.11.2023.
-//
-
-#ifndef montgomery_test_h
-#define montgomery_test_h
-
-#include "util.h"
-
-int mont_rsa_tests(void);
-
-#endif /* montgomery_test_h */
diff --git a/source/montgomery.c b/source/montgomery.c
@@ -1,431 +0,0 @@
-//
-// montgomery.c
-// lib-gpu-verify
-//
-// Created by Cedric Zwahlen on 14.11.2023.
-//
-
-#include "montgomery.h"
-#include <math.h>
-
-
-
-#define BITS_PER_DIGIT (sizeof(gpu_register) * 8)
-#define HIBITMASK 0x8000000000000000
-#define MAX_DIGIT 0xFFFFFFFFFFFFFFFF
-
-#define R 32
-
-int mult(gpu_register p[2], gpu_register x, gpu_register y)
-{
-
- /* Use a 64-bit temp for product */
- //ulong t = (ulong)x * (ulong)y;
- /* then split into two parts */
-
- __int128_t t = (__int128_t)x * (__int128_t)y;
-
- p[1] = (gpu_register)(t >> BITS_PER_DIGIT);
- p[0] = (gpu_register)t;
-
- return 0;
-}
-
-
-int multiply( gpu_register *w, gpu_register *u, gpu_register *v, size_t ndigits)
-{
- /* Computes product w = u * v
- where u, v are multiprecision integers of ndigits each
- and w is a multiprecision integer of 2*ndigits
-
- Ref: Knuth Vol 2 Ch 4.3.1 p 268 Algorithm M.
- */
-
- gpu_register k, t[2];
- size_t i, j, m, n;
-
- //assert(w != u && w != v);
-
- m = n = ndigits;
-
- /* Step M1. Initialise */
- for (i = 0; i < 2 * m; i++)
- w[i] = 0;
-
- for (j = 0; j < n; j++)
- {
- /* Step M2. Zero multiplier? */
- if (v[j] == 0)
- {
- w[j + m] = 0;
- }
- else
- {
- /* Step M3. Initialise i */
- k = 0;
- for (i = 0; i < m; i++)
- {
- /* Step M4. Multiply and add */
- /* t = u_i * v_j + w_(i+j) + k */
- mult(t, u[i], v[j]);
-
- t[0] += k;
- if (t[0] < k)
- t[1]++;
- t[0] += w[i+j];
- if (t[0] < w[i+j])
- t[1]++;
-
- w[i+j] = t[0];
- k = t[1];
- }
- /* Step M5. Loop on i, set w_(j+m) = k */
- w[j+m] = k;
- }
- } /* Step M6. Loop on j */
-
- return 0;
-}
-
-
-int square( gpu_register *w, gpu_register *x, size_t ndigits)
-/* New in Version 2.0 */
-{
- /* Computes square w = x * x
- where x is a multiprecision integer of ndigits
- and w is a multiprecision integer of 2*ndigits
-
- Ref: Menezes p596 Algorithm 14.16 with errata.
- */
-
- gpu_register k, p[2], u[2], cbit, carry;
- size_t i, j, t, i2, cpos;
-
- t = ndigits;
-
- /* 1. For i from 0 to (2t-1) do: w_i = 0 */
- i2 = t << 1;
- for (i = 0; i < i2; i++)
- w[i] = 0;
-
- carry = 0;
- cpos = i2-1;
- /* 2. For i from 0 to (t-1) do: */
- for (i = 0; i < t; i++)
- {
- /* 2.1 (uv) = w_2i + x_i * x_i, w_2i = v, c = u
- Careful, w_2i may be double-prec
- */
- i2 = i << 1; /* 2*i */
- mult(p, x[i], x[i]);
- p[0] += w[i2];
- if (p[0] < w[i2])
- p[1]++;
- k = 0; /* p[1] < b, so no overflow here */
- if (i2 == cpos && carry)
- {
- p[1] += carry;
- if (p[1] < carry)
- k++;
- carry = 0;
- }
- w[i2] = p[0];
- u[0] = p[1];
- u[1] = k;
-
- /* 2.2 for j from (i+1) to (t-1) do:
- (uv) = w_{i+j} + 2x_j * x_i + c,
- w_{i+j} = v, c = u,
- u is double-prec
- w_{i+j} is dbl if [i+j] == cpos
- */
- k = 0;
- for (j = i+1; j < t; j++)
- {
- /* p = x_j * x_i */
- mult(p, x[j], x[i]);
- /* p = 2p <=> p <<= 1 */
- cbit = (p[0] & HIBITMASK) != 0;
- k = (p[1] & HIBITMASK) != 0;
- p[0] <<= 1;
- p[1] <<= 1;
- p[1] |= cbit;
- /* p = p + c */
- p[0] += u[0];
- if (p[0] < u[0])
- {
- p[1]++;
- if (p[1] == 0)
- k++;
- }
- p[1] += u[1];
- if (p[1] < u[1])
- k++;
- /* p = p + w_{i+j} */
- p[0] += w[i+j];
- if (p[0] < w[i+j])
- {
- p[1]++;
- if (p[1] == 0)
- k++;
- }
- if ((i+j) == cpos && carry)
- { /* catch overflow from last round */
- p[1] += carry;
- if (p[1] < carry)
- k++;
- carry = 0;
- }
- /* w_{i+j} = v, c = u */
- w[i+j] = p[0];
- u[0] = p[1];
- u[1] = k;
- }
- /* 2.3 w_{i+t} = u */
- w[i+t] = u[0];
- /* remember overflow in w_{i+t} */
- carry = u[1];
- cpos = i+t;
- }
-
- /* (NB original step 3 deleted in Menezes errata) */
-
- /* Return w */
-
- return 0;
-}
-
-gpu_register add( gpu_register *w, gpu_register *u, gpu_register *v, size_t ndigits)
-{
- /* Calculates w = u + v
- where w, u, v are multiprecision integers of ndigits each
- Returns carry if overflow. Carry = 0 or 1.
-
- Ref: Knuth Vol 2 Ch 4.3.1 p 266 Algorithm A.
- */
-
- gpu_register k;
- size_t j;
-
- //assert(w != v);
-
- /* Step A1. Initialise */
- k = 0;
-
- for (j = 0; j < ndigits; j++)
- {
- /* Step A2. Add digits w_j = (u_j + v_j + k)
- Set k = 1 if carry (overflow) occurs
- */
- w[j] = u[j] + k;
- if (w[j] < k)
- k = 1;
- else
- k = 0;
-
- w[j] += v[j];
- if (w[j] < v[j])
- k++;
-
- } /* Step A3. Loop on j */
-
- return k; /* w_n = k */
-}
-
-gpu_register subtract(gpu_register *w, gpu_register *u, gpu_register *v, size_t ndigits)
-{
- /* Calculates w = u - v where u >= v
- w, u, v are multiprecision integers of ndigits each
- Returns 0 if OK, or 1 if v > u.
-
- Ref: Knuth Vol 2 Ch 4.3.1 p 267 Algorithm S.
- */
-
- gpu_register k;
- size_t j;
-
-
-
- /* Step S1. Initialise */
- k = 0;
-
- for (j = 0; j < ndigits; j++)
- {
- /* Step S2. Subtract digits w_j = (u_j - v_j - k)
- Set k = 1 if borrow occurs.
- */
- w[j] = u[j] - k;
- if (w[j] > MAX_DIGIT - k)
- k = 1;
- else
- k = 0;
-
- w[j] -= v[j];
- if (w[j] > MAX_DIGIT - v[j])
- k++;
-
- } /* Step S3. Loop on j */
-
- return k; /* Should be zero if u >= v */
-}
-
-void equal( gpu_register *a, gpu_register *b, size_t ndigits)
-{ /* Sets a = b */
- size_t i;
-
- for (i = 0; i < ndigits; i++)
- {
- a[i] = b[i];
- }
-}
-
-
-void erase_all( gpu_register *a, size_t n)
-{
-
- for (int i = 0; i < n; i++)
- {
- a[i] = 0;
- }
-
-}
-
-void shift_right(gpu_register *r, int n) {
-
- for (int i = 0; i < R+1; i++) {
-
- r[i] = r[i + n];
- //r[i + n] = 0;
-
- }
-
-}
-
-// 1 if r > l ; -1 if r < l; == 0
-int compare(gpu_register *r, gpu_register *l, int n) {
-
- int x = 0;
- for (int i = n - 1; i >= 0; i--) {
- x = r[i] > l[i];
- if (x) return 1;
- x = r[i] < l[i];
- if (x) return -1;
- }
- return 0;
-}
-
-int testbit(gpu_register e, int i) {
-
- return (e & (0x1 << (gpu_register)i) ) > 0 ? 1 : 0;
-
-}
-
-void montMul( gpu_register *ret,
- gpu_register *a, gpu_register *b,
- gpu_register *ni, gpu_register *n,
- gpu_register *tmp_1, gpu_register *tmp_2, gpu_register *tmp_3
- ) {
-
- multiply(tmp_1,a,b,R);
- multiply(tmp_2,tmp_1,ni,R);
- multiply(tmp_3,tmp_2,n,R);
-
- add(tmp_2,tmp_1,tmp_3,R*2+1); // MARK: something gets lost in the carry
-
- shift_right(tmp_2, R);
-
- erase_all(tmp_3, R*2);
- equal(tmp_3, n, R);
-
- if (compare(tmp_2, tmp_3, R+1) >= 0) {
- subtract(ret, tmp_2, tmp_3, R+1);
- } else {
- equal(ret, tmp_2, R);
- }
-
-}
-
-void montSqr( gpu_register *ret,
- gpu_register *a,
- gpu_register *ni, gpu_register *n,
- gpu_register *tmp_1, gpu_register *tmp_2, gpu_register *tmp_3
- ) {
-
- square(tmp_1,a,R);
- multiply(tmp_2,tmp_1,ni,R);
- multiply(tmp_3,tmp_2,n,R);
-
- add(tmp_2,tmp_1,tmp_3,R*2+1);
-
- shift_right(tmp_2, R);
-
- erase_all(tmp_3, R*2);
- equal(tmp_3, n, R);
-
- if (compare(tmp_2, tmp_3, R+1) >= 0) {
- subtract(ret, tmp_2, tmp_3, R+1);
- } else {
- equal(ret, tmp_2, R);
- }
-
-
-
-}
-
-
- void mont( gpu_register *x, gpu_register *m,
- gpu_register *res, gpu_register *n, // res is not needed, we write the result in x
- gpu_register *ni, gpu_register *exp,
- gpu_register *tmp_1, gpu_register *tmp_2, gpu_register *tmp_3,
- gpu_register *pks//, unsigned long count // invalid is not needed either
- )
-{
-
- size_t i = 0;
-
- int offs = 0; // the size of one number
-
- int pk = 0; // das funktioniert so nicht – die globale id wird grösser, ohne dass noch weitere elemente in pks sind...
-
-
-// while (1) {
-// if (pks[pk] >= i)
-// break;
-// pk++;
-// }
-
-
- int k = ceil(log2((float)exp[pk] + (float)1));
-
-
-
- for (int j = k - 1; j >= 0; j--) {
-
- montSqr(res, x, ni, n, tmp_1, tmp_2, tmp_3);
-
- if (testbit(exp[pk], j)) {
-
- equal(x, res, R);
- erase_all(tmp_1,R * 2);
- erase_all(tmp_2,R * 2);
- erase_all(tmp_3,R * 2);
-
- montMul(res, x, m, ni, n, tmp_1, tmp_2, tmp_3);
-
-
- }
-
- equal(x, res, R);
- erase_all(tmp_1,R * 2);
- erase_all(tmp_2,R * 2);
- erase_all(tmp_3,R * 2);
- // clear / reset the temps...
- }
-
- equal(x, res, R);
- erase_all(m,R);
- m[0] = 1; // reuse m, to convert out of montgomery
- montMul(res, x, m, ni, n, tmp_1, tmp_2, tmp_3);
-
-
-}
diff --git a/source/montgomery.cl b/source/montgomery.cl
@@ -1 +0,0 @@
-../xcode/montgomery.cl
-\ No newline at end of file
diff --git a/source/montgomery.h b/source/montgomery.h
@@ -1,27 +0,0 @@
-//
-// montgomery.h
-// lib-gpu-verify
-//
-// Created by Cedric Zwahlen on 14.11.2023.
-//
-
-#ifndef montgomery_h
-#define montgomery_h
-
-#include <stdio.h>
-
-#include <gmp.h>
-
-#include <assert.h>
-
-typedef u_int64_t gpu_register;
-
-void mont( gpu_register *x, gpu_register *m,
- gpu_register *res, gpu_register *n, // res is not needed, we write the result in x
- gpu_register *ni, gpu_register *exp,
- gpu_register *tmp_1, gpu_register *tmp_2, gpu_register *tmp_3,
- gpu_register *pks//, unsigned long count // invalid is not needed either
-);
-
-
-#endif /* montgomery_h */
diff --git a/source/montmodmult.c b/source/montmodmult.c
@@ -1,662 +0,0 @@
-//
-// montmodmult.c
-// lib-gpu-verify
-//
-// Created by Cedric Zwahlen on 16.12.2023.
-//
-
-#include "montmodmult.h"
-#include "util.h"
-#include <pthread.h>
-
-static unsigned long len_in_bytes = 0;
-
-#define ORDER -1 // I think we need to do this, because we want to write it in the 'wrong' way
-#define END 0
-
-#define BIT_LENGTH (2048)
-
-#define BITS 64
-
-// sizes are always the same 32 units for all, except exp_buf
-void montmodmult_pairs_from_files(void *x_buf, void *m_buf,
- void *r_1_buf,
- void *n_buf, void *ni_buf,
- void *msg_buf,
- void *exp_buf,
- //void *mod_buf,
- void *s_buf,
- unsigned long *pks, unsigned long *n) {
-
- FILE * pk;
- FILE * ms;
-
- pk = fopen("lib-gpu-generate/publickey.txt", "r");
- ms = fopen("lib-gpu-generate/msgsig.txt", "r");
-
- if (pk == NULL || ms == NULL) {
- printf("Auxiliary files not found.");
- abort();
- }
-
- fseek (ms, 0, SEEK_END);
- long ms_l = ftell(ms);
- fseek (ms, 0, SEEK_SET);
- char *ms_ptr = malloc(ms_l);
- char *ms_ptr_rest = malloc(ms_l);
- if (ms_ptr || ms_ptr_rest)
- {
- fread (ms_ptr, 1, ms_l, ms);
- memcpy(ms_ptr_rest, ms_ptr, ms_l);
- }
- fclose (ms);
-
- fseek (pk, 0, SEEK_END);
- long pk_l = ftell(pk);
- fseek (pk, 0, SEEK_SET);
- char *pk_ptr = malloc(pk_l);
- char *pk_ptr_rest = malloc(pk_l);
- if (pk_ptr && pk_ptr_rest)
- {
- fread (pk_ptr, 1, pk_l, pk);
- memcpy(pk_ptr_rest, pk_ptr, pk_l);
- }
- fclose (pk);
-
- gpu_register *n_buf_t = n_buf;
- gpu_register *msg_buf_t = msg_buf;
- gpu_register *s_buf_t = s_buf;
- gpu_register *exp_buf_t = exp_buf;
-
- int len = (BIT_LENGTH / 8) / sizeof(gpu_register);
-
- char* message = strtok_r(ms_ptr, "\n", &ms_ptr_rest);
- char* signature = strtok_r(0, "\n", &ms_ptr_rest);
- char* modulus = strtok_r(pk_ptr, "\n", &pk_ptr_rest);
- char* exponent = strtok_r(0, "\n", &pk_ptr_rest);
- char* offs = strtok_r(0, "\n", &pk_ptr_rest);
-
- int i = 0;
- int j = 0;
-
- mpz_t e,mod,msg,s;
-
- mpz_init(e);
- mpz_init(mod);
- mpz_init(msg);
- mpz_init(s);
-
- while (message != NULL && signature != NULL) {
-
- if (i == 0 || pks[j - 1] < i) {
-
- mpz_set_str(mod,modulus,16);
- mpz_set_str(e,exponent,16);
-
- pks[j] = atoi(offs);
-
- modulus = strtok_r(0, "\n", &pk_ptr_rest);
- exponent = strtok_r(0, "\n", &pk_ptr_rest);
- offs = strtok_r(0, "\n", &pk_ptr_rest);
-
- mpz_export(&n_buf_t[len * j], NULL, ORDER, sizeof(gpu_register), END, 0, mod);
- mpz_export(&exp_buf_t[j], NULL, ORDER, sizeof(gpu_register), END, 0, e);
-
-
- j++;
-
- }
-
- mpz_set_str(msg,message,16);
- mpz_set_str(s,signature,16);
-
- message = strtok_r(0, "\n",&ms_ptr_rest);
- signature = strtok_r(0, "\n",&ms_ptr_rest);
-
- mpz_export(&msg_buf_t[len * i], NULL, ORDER, sizeof(gpu_register), END, 0, msg);
- mpz_export(&s_buf_t[len * i], NULL, ORDER, sizeof(gpu_register), END, 0, s);
-
- i++;
- }
-
- mpz_clear(e);
- mpz_clear(mod);
- mpz_clear(msg);
- mpz_clear(s);
-
-
-
- *n = i;
-
-}
-
-struct thread_args {
-
- void *x_buf;
- void *m_buf;
- void *r_1_buf;
- void *n_buf;
- void *ni_buf;
- void *s_buf;
- unsigned long *pks;
- unsigned long n_start;
- unsigned long n_end;
-};
-
-void *convert_thread(void * vargp) {
-
- struct thread_args *args = vargp;
-
- int len = (BIT_LENGTH / 8) / sizeof(gpu_register);
-
-
- int j = 0;
-
- while(1) {
- if (args->pks[j] > args->n_start)
- break;
- j++;
- }
-
-
- //printf("pks[%i] == %i, start at: %i, end at: %i\n",j,args->pks[j], args->n_start, args->n_end);
-
- gpu_register *s_buf_t = args->s_buf;
- gpu_register *n_buf_t = args->n_buf;
-
- gpu_register *x_buf_t = args->x_buf;
- gpu_register *m_buf_t = args->m_buf;
- gpu_register *r_1_buf_t = args->r_1_buf;
- gpu_register *ni_buf_t = args->ni_buf;
-
- mpz_t mod, s;
- mpz_init(s);
- mpz_init(mod);
-
- mpz_t r, r_1, ni, M, x;
-
- mpz_init(r);
- mpz_init(r_1);
- mpz_init(ni);
- mpz_init(M);
- mpz_init(x);
-
- mpz_t one; // helper variable
- mpz_init_set_si(one,1);
-
- mpz_set_si(one, 1);
- mpz_mul_2exp(r,one,BIT_LENGTH); // r
-
- int start = (int)args->n_start;
-
-
-
- for(int i = start; i < args->n_end; i++) {
-
- if (i == start || args->pks[j - 1] < i) {
-
- mpz_import(mod, len, ORDER, sizeof(gpu_register), END, 0, &n_buf_t[len * j]);
-
- mpz_gcdext(one, r_1, ni, r, mod); // set r_1 and ni
-
- int sgn = mpz_sgn(r_1);
-
- mpz_abs(r_1, r_1);
- mpz_abs(ni, ni);
-
- if (sgn == -1) {
- mpz_sub(ni, r, ni);
- mpz_sub(r_1, mod, r_1);
- }
-
-
- mpz_export(&ni_buf_t[len * j], NULL, ORDER, sizeof(gpu_register), END, 0, ni);
- mpz_export(&r_1_buf_t[len * j], NULL, ORDER, sizeof(gpu_register), END, 0, r_1);
-
-
-
- j++;
- }
-
- mpz_import(s, len, ORDER, sizeof(gpu_register), END, 0, &s_buf_t[len * i]);
-
- // set x (the number to 'square' (multiply by itself))
- mpz_mul(M, s, r);
- mpz_mod(M, M, mod);
-
- mpz_mod(x, r, mod);
-
-
- mpz_export(&x_buf_t[len * i], NULL, ORDER, sizeof(gpu_register), END, 0, x);
- mpz_export(&m_buf_t[len * i], NULL, ORDER, sizeof(gpu_register), END, 0, M);
-
- }
-
- mpz_clear(r);
- mpz_clear(r_1);
- mpz_clear(ni);
- mpz_clear(M);
- mpz_clear(x);
-
- mpz_clear(one);
-
- return NULL;
-
-}
-
-void modmult_opencl_convert(struct gpu_state_alt *state, void *x_buf, void *m_buf,
- void *r_1_buf,
- void *n_buf, void *ni_buf,
- void *msg_buf,
- void *exp_buf,
- //void *mod_buf,
- void *s_buf,
- unsigned long *pks, unsigned long n) {
-
- long nr_of_threads = 4;
-
- #if __APPLE__ || unix
-
- nr_of_threads = sysconf(_SC_NPROCESSORS_ONLN);
-
- #elif _WIN32
-
- SYSTEM_INFO sysinfo;
- GetSystemInfo(&sysinfo);
- nr_of_threads = sysinfo.dwNumberOfProcessors;
-
- #endif
-
-
- struct timespec p1, p2;
-
- clock_gettime(CLOCK_REALTIME, &p1);
-
- pthread_t tid[nr_of_threads];
- // not the best, but it is safe
- int ids[nr_of_threads];
-
- struct thread_args args[nr_of_threads];
-
- unsigned long range = n / nr_of_threads;
-
- for (int i = 0; i < nr_of_threads - 1; i++)
- {
- args[i].n_start = i * range;
- args[i].n_end = (i + 1) * range;
- }
-
- // last one takes the 'rest'
- args[nr_of_threads - 1].n_start = (nr_of_threads - 1) * range;
- args[nr_of_threads - 1].n_end = n;
-
-
- for (int i = 0; i < nr_of_threads; i++) {
-
- args[i].m_buf = m_buf;
- args[i].n_buf = n_buf;
- args[i].ni_buf = ni_buf;
- args[i].pks = pks;
- args[i].r_1_buf = r_1_buf;
- args[i].s_buf = s_buf;
- args[i].x_buf = x_buf;
-
- ids[i] = i;
- int err = pthread_create(&tid[i], NULL, convert_thread, (void *)&args[i]);
- if ( err != 0 )
- printf("Error creating threads");
- }
-
- for (int j = 0; j < nr_of_threads; j++) {
- int err = pthread_join(tid[j], NULL);
- if ( err != 0 )
- printf("Error joining threads");
- }
-
- clock_gettime(CLOCK_REALTIME, &p2);
-
- state->p.tv_sec += ( p2.tv_nsec < p1.tv_nsec ? p2.tv_sec - (p1.tv_sec + 1) : p2.tv_sec - p1.tv_sec );
- state->p.tv_nsec += ( p2.tv_nsec < p1.tv_nsec ? ((999999999 - p1.tv_nsec) + p2.tv_nsec) : (p2.tv_nsec - p1.tv_nsec) ) / 1000;
-
-
-}
-
-int modmult_opencl_prepare(struct gpu_info *info, struct gpu_state_alt *state,
- void *x_buf, void *m_buf,
- void *r_1_buf,
- void *n_buf, void *ni_buf,
- void *msg_buf,
- void *exp_buf,
- // void *mod_buf,
- void *s_buf,
- unsigned long *pks, unsigned long n
- ) {
-
-
-
- modmult_opencl_convert(state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n);
-
-
- int err; // error code returned from api calls
-
- unsigned long pk = 0;
-
- while (1) {
- if (pks[pk] + 1 >= n)
- break;
- pk++;
- }
-
- unsigned long len = len_in_bytes;
-
- state->x_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, len, NULL, NULL);
- state->m_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, len, NULL, NULL);
- state->n_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, len, NULL, NULL);
- state->ni_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, len, NULL, NULL);
-
- state->exp_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, n * sizeof(gpu_register), NULL, NULL);
-
- state->msg_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, len, NULL, NULL);
-
- state->pks_indices = clCreateBuffer(info->context, CL_MEM_READ_ONLY, sizeof(unsigned long) * (pk + 2), NULL, NULL); // plus 2 because the first index will contain how many elements are in the array
-
- if (!state->x_mem || !state->m_mem || !state->n_mem || !state->ni_mem || !state->exp_mem)
- {
- printf("Error: Failed to allocate device memory!\n");
- exit(1);
- }
-
- // Write our data set into the input array in device memory
- //
- err = clEnqueueWriteBuffer(info->commands, state->x_mem, CL_TRUE, 0, len, x_buf, 0, NULL, NULL);
- err |= clEnqueueWriteBuffer(info->commands, state->m_mem, CL_TRUE, 0, len, m_buf, 0, NULL, NULL);
-
- //err |= clEnqueueWriteBuffer(info->commands, state->res_mem, CL_TRUE, 0, len, res, 0, NULL, NULL);
- err |= clEnqueueWriteBuffer(info->commands, state->n_mem, CL_TRUE, 0, len, n_buf, 0, NULL, NULL);
- err |= clEnqueueWriteBuffer(info->commands, state->ni_mem, CL_TRUE, 0, len, ni_buf, 0, NULL, NULL);
-
- err |= clEnqueueWriteBuffer(info->commands, state->exp_mem, CL_TRUE, 0, n * sizeof(gpu_register), exp_buf, 0, NULL, NULL);
-
- err |= clEnqueueWriteBuffer(info->commands, state->msg_mem, CL_TRUE, 0, len, msg_buf, 0, NULL, NULL);
-
- err |= clEnqueueWriteBuffer(info->commands, state->pks_indices, CL_TRUE, sizeof(unsigned long), sizeof(unsigned long) * (pk + 1), pks, 0, NULL, NULL);
- err |= clEnqueueWriteBuffer(info->commands, state->pks_indices, CL_TRUE, 0, sizeof(unsigned long), &pk, 0, NULL, NULL);
-
- if (err != CL_SUCCESS)
- {
- printf("Error: Failed to write to source array!\n");
- exit(1);
- }
-
- // Set the arguments to our compute kernel
- //
- err = 0;
- err = clSetKernelArg(info->kernel, 0, sizeof(cl_mem), &state->x_mem);
- err |= clSetKernelArg(info->kernel, 1, sizeof(cl_mem), &state->m_mem);
- err |= clSetKernelArg(info->kernel, 2, sizeof(cl_mem), &state->n_mem);
- err |= clSetKernelArg(info->kernel, 3, sizeof(cl_mem), &state->ni_mem);
- err |= clSetKernelArg(info->kernel, 4, sizeof(cl_mem), &state->exp_mem);
- err |= clSetKernelArg(info->kernel, 5, sizeof(cl_mem), &state->msg_mem);
- err |= clSetKernelArg(info->kernel, 6, sizeof(cl_mem), &state->pks_indices);
-
- if (err != CL_SUCCESS)
- {
- printf("RSA-Error: Failed to set kernel arguments! %d\n", err);
- exit(1);
- }
-
- state->total = n;
-
-
- return 0;
-
-}
-
-int modmult_opencl_exec_kernel(struct gpu_info *info, struct gpu_state_alt *state) {
-
- size_t global;
- // size_t local = 1;
- int err;
-
- global = state->total; // has to be exactly the amount of signatures we want to verify
-
- // measure from the first call to the kernel...
- if (state->skip) {
- state->skip = false;
- clock_gettime(CLOCK_REALTIME, &state->t1);
- }
-
- err = clEnqueueNDRangeKernel(info->commands, info->kernel, 1, NULL, &global, NULL, 0, NULL, NULL);
- if (err)
- {
- printf("Error: Failed to execute kernel!\n");
- return EXIT_FAILURE;
- }
-
- return 0;
-
-}
-
-unsigned long modmult_opencl_results(struct gpu_info *info, struct gpu_state_alt *state, bool timed, void * msg_buf, void * r_1_buf, void * n_buf, void* s_buf, unsigned long n) {
-
- if (state->skip) {
- // reset skip in the kernel execution
- return 0;
- }
-
- int err;
-
- // Wait for the command commands to get serviced before reading back results
- //
- err = clFinish(info->commands);
- if (err != CL_SUCCESS)
- {
- printf("Error: Kernel failure! %d\n", err);
- exit(1);
- }
-
- unsigned long results = 0;
-
- // Read back the results from the device to verify the output
- err = clEnqueueReadBuffer(info->commands, state->res_mem, CL_TRUE, 0, sizeof(unsigned long), &results, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- printf("Error: Failed to read output array! %d\n", err);
- exit(1);
- }
-
- if (timed) {
- // stop measuring after the last command has been read
- clock_gettime(CLOCK_REALTIME, &state->t2);
-
- printf("Preparation (on CPU) took \t%ld.%06ld s\n", state->p.tv_sec, state->p.tv_nsec);
-
- long sec = ( state->t2.tv_nsec < state->t1.tv_nsec ? state->t2.tv_sec - (state->t1.tv_sec + 1) : state->t2.tv_sec - state->t1.tv_sec );
- long nanosec = ( state->t2.tv_nsec < state->t1.tv_nsec ? ((999999999 - state->t1.tv_nsec) + state->t2.tv_nsec) : (state->t2.tv_nsec - state->t1.tv_nsec) ) / 1000;
-
- printf("GPU verification took \t%ld.%06ld s\n", sec, nanosec);
-
- printf("Total time: \t\t%ld.%06ld s\n", sec + state->p.tv_sec, nanosec + state->p.tv_nsec);
-
- }
-
-
- return state->total - results;
-
-}
-
-void modmult_opencl_cleanup(struct gpu_info *info) {
-
- clReleaseProgram(info->program);
- clReleaseKernel(info->kernel);
- clReleaseCommandQueue(info->commands);
- clReleaseContext(info->context);
-
-}
-
-void modmult_opencl_release(struct gpu_state_alt *state) {
-
- clReleaseMemObject(state->x_mem);
- clReleaseMemObject(state->m_mem);
- clReleaseMemObject(state->n_mem);
- clReleaseMemObject(state->ni_mem);
- clReleaseMemObject(state->exp_mem);
-
- clReleaseMemObject(state->msg_mem);
-
- clReleaseMemObject(state->pks_indices);
-}
-
-// MARK: for library
-
-void modmult_gpu_init(struct gpu_info *info, struct gpu_state_alt *state) {
-
- info->platform = select_platform(0, false);
- info->device_id = select_device (info->platform);
- info->context = create_compute_context (info->device_id);
- info->commands = create_command_queue (info->device_id, info->context);
- info->program = compile_program (info->device_id, info->context, "montmodmult.cl");
- info->kernel = create_kernel (info->program, "mont");
-
- state->result = 0;
- state->total = 0;
- state->skip = true;
-
- state->p.tv_nsec = 0;
- state->p.tv_sec = 0;
-
- int err = 0;
-
- unsigned long results = 0;
-
- state->res_mem = clCreateBuffer(info->context, CL_MEM_READ_WRITE, sizeof(unsigned long) ,NULL, NULL);
-
- err |= clEnqueueWriteBuffer(info->commands, state->res_mem, CL_TRUE, 0, sizeof(unsigned long), &results, 0, NULL, NULL);
- if (err != CL_SUCCESS)
- {
- printf("Error: Failed to write to source array!\n");
- exit(1);
- }
-
- err |= clSetKernelArg(info->kernel, 7, sizeof(cl_mem), &state->res_mem);
- if (err != CL_SUCCESS)
- {
- printf("Error: Failed to set kernel arguments! %d\n", err);
- exit(1);
- }
-}
-
-void modmult_gpu_execute(struct gpu_info *info, struct gpu_state_alt *state,
- void *x_buf, void *m_buf,
- void *r_1_buf,
- void *n_buf, void *ni_buf,
- void *msg_buf,
- void *exp_buf,
- //void *mod_buf,
- void *s_buf,
- unsigned long *pks, unsigned long n) {
-
- modmult_opencl_prepare(info, state,
- x_buf, m_buf,
- r_1_buf,
- n_buf, ni_buf,
- msg_buf, exp_buf, s_buf,
- pks, n
- ); // prepares the next batch of signatures on CPU, naturally blocks until it's finished
-
- state->result += modmult_opencl_results(info, state, false, msg_buf, r_1_buf, n_buf, s_buf, n); // waits for kernel, if it is not ready yet
- modmult_opencl_exec_kernel(info,state); // start kernel (returns immediately)
- modmult_opencl_release(state); // release buffers
-
-
-
-}
-
-unsigned long modmult_gpu_finish(struct gpu_info *info, struct gpu_state_alt *state, void * msg_buf, void * r_1_buf, void * n_buf, void* s_buf, unsigned long n) {
-
- state->result += modmult_opencl_results(info,state,true,msg_buf,r_1_buf,n_buf, s_buf, n);
-
- modmult_opencl_cleanup(info);
-
- unsigned long res = state->result;
-
- state->result = 0; // reset result
-
- return res;
-
-}
-
-
-int mont_modmult_tests(void) {
-
- unsigned long pairs = number_of_pairs(); // returns an estimation of pairs
-
- unsigned long digit_sz = (BIT_LENGTH / 8) * pairs;
-
- len_in_bytes = digit_sz;
-
- unsigned long arr_sz = pairs * sizeof(unsigned long);
-
- gpu_register *x_buf = malloc(digit_sz);
- gpu_register *m_buf = malloc(digit_sz);
- gpu_register *r_1_buf = malloc(digit_sz);
- gpu_register *n_buf = malloc(digit_sz);
- gpu_register *ni_buf = malloc(digit_sz + pairs);
- gpu_register *msg_buf = malloc(digit_sz);
- gpu_register *s_buf = malloc(digit_sz);
- //gpu_register *mod_buf = malloc(digit_sz);
- gpu_register *exp_buf = malloc(pairs * sizeof(gpu_register));
-
- memset(x_buf, 0, digit_sz);
- memset(m_buf, 0, digit_sz);
- memset(r_1_buf, 0, digit_sz);
- memset(n_buf, 0, digit_sz);
- memset(ni_buf, 0, digit_sz);
- memset(msg_buf, 0, digit_sz);
- memset(s_buf, 0, digit_sz);
- //memset(mod_buf, 0, digit_sz);
- memset(exp_buf, 0, pairs * sizeof(gpu_register));
-
- unsigned long *pks = malloc(arr_sz);
-
- memset(pks, 0, arr_sz);
-
- printf("READING KEYS...\n");
-
- montmodmult_pairs_from_files(x_buf, m_buf,
- r_1_buf,
- n_buf, ni_buf,
- msg_buf,
- exp_buf,
-
- s_buf,
- pks, &pairs);
-
- printf("VERIFYING %lu SIGNATURES...\n", pairs);
-
- struct gpu_info info;
- struct gpu_state_alt state;
-
- modmult_gpu_init(&info, &state);
-
- modmult_gpu_execute(&info, &state,
- x_buf, m_buf,
- r_1_buf,
- n_buf, ni_buf,
- msg_buf,
- exp_buf,
- // mod_buf,
- s_buf,
- pks, pairs);
-
- unsigned long res = modmult_gpu_finish(&info, &state, msg_buf, r_1_buf, n_buf,s_buf, pairs);
-
- if (res == pairs) {
- printf("VERIFICATION RESULT: OK\n\n");
- } else {
- printf("VERIFICATION RESULT: NOT OK!\n");
- printf("At least %lu signatures were invalid.\n\n",state.total - res);
- }
-
- return 0;
-}
diff --git a/source/montmodmult.cl b/source/montmodmult.cl
@@ -1 +0,0 @@
-../xcode/montmodmult.cl
-\ No newline at end of file
diff --git a/source/montmodmult.h b/source/montmodmult.h
@@ -1,20 +0,0 @@
-//
-// montmodmult.h
-// lib-gpu-verify
-//
-// Created by Cedric Zwahlen on 16.12.2023.
-//
-
-#ifndef montmodmult_h
-#define montmodmult_h
-
-#include <stdio.h>
-#include <stdint.h>
-#include <gmp.h>
-
-int mont_modmult_tests(void);
-
-
-typedef uint64_t gpu_register;
-
-#endif /* montmodmult_h */
diff --git a/source/reference-test.c b/source/reference-test.c
@@ -1,260 +0,0 @@
-//
-// reference-test.c
-// lib-gpu-verify
-//
-// Created by Cedric Zwahlen on 06.12.2023.
-//
-
-#include "reference-test.h"
-
-
-void ref_pairs_from_files(char *bases, unsigned long *b_off,
- char *exponents, unsigned long *e_off,
- char *moduli, unsigned long *m_off,
- char *signatures, unsigned long *s_off,
- unsigned long *pks,
- unsigned long *n) {
-
- FILE *pk;
- FILE *ms;
-
- pk = fopen("lib-gpu-generate/publickey.txt", "r");
- ms = fopen("lib-gpu-generate/msgsig.txt", "r");
-
- if (pk == NULL || ms == NULL) {
- printf("Auxiliary files not found.");
- abort();
- }
-
- fseek (ms, 0, SEEK_END);
- long ms_l = ftell(ms);
- fseek (ms, 0, SEEK_SET);
- char *ms_ptr = malloc(ms_l);
- char *ms_ptr_rest = malloc(ms_l);
- if (ms_ptr || ms_ptr_rest)
- {
- fread (ms_ptr, 1, ms_l, ms);
- memcpy(ms_ptr_rest, ms_ptr, ms_l);
- }
- fclose (ms);
-
- fseek (pk, 0, SEEK_END);
- long pk_l = ftell(pk);
- fseek (pk, 0, SEEK_SET);
- char *pk_ptr = malloc(pk_l);
- char *pk_ptr_rest = malloc(pk_l);
- if (pk_ptr && pk_ptr_rest)
- {
- fread (pk_ptr, 1, pk_l, pk);
- memcpy(pk_ptr_rest, pk_ptr, pk_l);
- }
- fclose (pk);
-
- char* message = strtok_r(ms_ptr, "\n", &ms_ptr_rest);
- char* signature = strtok_r(0, "\n", &ms_ptr_rest);
- char* modulus = strtok_r(pk_ptr, "\n", &pk_ptr_rest);
- char* exponent = strtok_r(0, "\n", &pk_ptr_rest);
- char* offs = strtok_r(0, "\n", &pk_ptr_rest);
-
- unsigned long b_offset = 0;
- unsigned long e_offset = 0;
- unsigned long m_offset = 0;
- unsigned long s_offset = 0;
-
- int i = 0;
- int j = 0;
-
- while (modulus != NULL && exponent != NULL && offs != NULL) {
-
- unsigned long n_buf_len = strlen(modulus);
- unsigned long e_buf_len = strlen(exponent);
-
- memcpy(&moduli[m_offset], modulus, n_buf_len);
- memcpy(&exponents[e_offset], exponent, e_buf_len);
-
- m_off[i] = m_offset;
- e_off[i] = e_offset;
-
- m_offset += n_buf_len + 1;
- e_offset += e_buf_len + 1;
-
- pks[i] = atoi(offs);
-
- modulus = strtok_r(0, "\n", &pk_ptr_rest);
- exponent = strtok_r(0, "\n", &pk_ptr_rest);
- offs = strtok_r(0, "\n", &pk_ptr_rest);
-
- i++;
- }
-
- while (message != NULL && signature != NULL) {
-
- unsigned long m_buf_len = strlen(message);
- unsigned long s_buf_len = strlen(signature);
-
- memcpy(&bases[b_offset], message, m_buf_len);
- memcpy(&signatures[s_offset], signature, s_buf_len);
-
- b_off[j] = b_offset;
- s_off[j] = s_offset;
-
- b_offset += m_buf_len + 1;
- s_offset += s_buf_len + 1;
-
- message = strtok_r(0, "\n",&ms_ptr_rest);
- signature = strtok_r(0, "\n",&ms_ptr_rest);
-
- j++;
-
- }
-
- *n = j;
-}
-
-gcry_sexp_t sexp_from_string(char* str, const char *format) {
-
- gcry_sexp_t sexp;
-
- gcry_mpi_t mpi = gcry_mpi_new((int)strlen(str) * 8);
- //size_t scanned = 0;
- gcry_mpi_scan(&mpi, GCRYMPI_FMT_HEX, str, 0, NULL);
-
- size_t errOff = 0;
- gcry_sexp_build(&sexp,&errOff,format,mpi);
-
- return sexp;
-}
-
-gcry_sexp_t sexp_from_string_key(char* str_1, char* str_2, const char *format) {
-
- gcry_sexp_t sexp;
-
- gcry_mpi_t mpi_1 = gcry_mpi_new((int)strlen(str_1) * 8);
- //size_t scanned = 0;
- gcry_mpi_scan(&mpi_1, GCRYMPI_FMT_HEX, str_1, 0, NULL);
-
- gcry_mpi_t mpi_2 = gcry_mpi_new((int)strlen(str_2) * 8);
- //size_t scanned = 0;
- gcry_mpi_scan(&mpi_2, GCRYMPI_FMT_HEX, str_2, 0, NULL);
-
- size_t errOff = 0;
- gcry_sexp_build(&sexp,&errOff,format,mpi_1,mpi_2);
-
- return sexp;
-}
-
-int reference_tests(void) {
-
- // setup_gcry();
-
- unsigned long pairs = number_of_pairs();
-
- unsigned long str_sz = (2048) * pairs;
-
-
- char *b = malloc(str_sz);
- char *e = malloc(str_sz);
- char *m = malloc(str_sz);
- char *s = malloc(str_sz);
-
- unsigned long *b_off = malloc(str_sz);
- unsigned long *e_off = malloc(str_sz);
- unsigned long *m_off = malloc(str_sz);
- unsigned long *s_off = malloc(str_sz);
-
- memset(b, 0, str_sz);
- memset(e, 0, str_sz);
- memset(m, 0, str_sz);
- memset(s, 0, str_sz);
-
- memset(b_off, 0, str_sz);
- memset(e_off, 0, str_sz);
- memset(m_off, 0, str_sz);
- memset(s_off, 0, str_sz);
-
- unsigned long *pks = malloc(str_sz);
- memset(pks, 0, str_sz);
-
- ref_pairs_from_files(b, b_off, e, e_off, m, m_off, s, s_off, pks,
- &pairs);
-
- unsigned long pk = 0;
-
- while (1) {
- if (pks[pk] + 1 == pairs)
- break;
- pk++;
- }
-
-
-
- gcry_sexp_t *m_sexps = malloc(pairs * sizeof(gcry_sexp_t));
- gcry_sexp_t *s_sexps = malloc(pairs * sizeof(gcry_sexp_t));
- gcry_sexp_t *key_sexps = malloc((pk + 1) * sizeof(gcry_sexp_t));
-
- for (int i = 0; i < pairs; i++) {
-
- m_sexps[i] = sexp_from_string(&b[b_off[i]], "(data (flags raw) (value %m))"); // message format (for comparison)
-
- s_sexps[i] = sexp_from_string(&s[s_off[i]], "(sig-val (rsa (s %m)))"); // signature format
- }
-
-
- for (int i = 0; i <= pk; i++) {
-
- key_sexps[i] = sexp_from_string_key(&m[m_off[i]], &e[e_off[i]], "(public-key (rsa (n %m) (e %m)))" ); // pub key data
-
- }
-
- unsigned long result = 0;
-
- struct timespec t1, t2;
-
- printf("VERIFYING %lu SIGNATURES...\n", pairs);
-
- clock_gettime(CLOCK_REALTIME, &t1);
-
- pk = 0; // reuse pk
-
- for (int i = 0; i < pairs; i++) {
-
- while (1) {
- if (pks[pk] >= i)
- break;
- pk++;
- }
-
- if ( gcry_pk_verify(s_sexps[i], m_sexps[i], key_sexps[pk]) == 0 )
- result += 1;
-
- }
-
- clock_gettime(CLOCK_REALTIME, &t2);
-
- printf("CPU (Reference) verification took %ld.%06ld s\n", ( t2.tv_nsec < t1.tv_nsec ? t2.tv_sec - (t1.tv_sec + 1) : t2.tv_sec - t1.tv_sec ), ( t2.tv_nsec < t1.tv_nsec ? ((999999999 - t1.tv_nsec) + t2.tv_nsec) : (t2.tv_nsec - t1.tv_nsec) ) / 1000);
-
- if (result == pairs) {
- printf("VERIFICATION RESULT: %lu - OK\n\n",result);
- } else {
- printf("VERIFICATION RESULT: %lu - NOT OK\n\n",result);
- }
-
-
- free(b);
- free(e);
- free(m);
- free(s);
-
- free(b_off);
- free(e_off);
- free(m_off);
- free(s_off);
-
- free(pks);
-
- free(m_sexps);
- free(s_sexps);
- free(key_sexps);
-
- return result == pairs ? 1 : 0;
-}
diff --git a/source/reference-test.h b/source/reference-test.h
@@ -1,15 +0,0 @@
-//
-// reference-test.h
-// lib-gpu-verify
-//
-// Created by Cedric Zwahlen on 06.12.2023.
-//
-
-#ifndef reference_test_h
-#define reference_test_h
-
-#include "util.h"
-
-int reference_tests(void);
-
-#endif /* reference_test_h */
diff --git a/source/rsa-test.c b/source/rsa-test.c
@@ -1,639 +0,0 @@
-/*
- * lib-gpu-verify
- *
- * This software contains code derived from or inspired by the BigDigit library,
- * <http://www.di-mgt.com.au/bigdigits.html>
- * which is distributed under the Mozilla Public License, version 2.0.
- *
- * The original code and modifications made to it are subject to the terms and
- * conditions of the Mozilla Public License, version 2.0. A copy of the
- * MPL license can be obtained at
- * https://www.mozilla.org/en-US/MPL/2.0/.
- *
- * Changes and additions to the original code are as follows:
- * - Copied some functions of the BigDigit library into this file, to convert strings read from files to BigDigit type numbers.
- *
- * Contributors:
- * - Cedric Zwahlen cedric.zwahlen@bfh.ch
- *
- * Please note that this software is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the Mozilla Public License, version 2.0, for the specific language
- * governing permissions and limitations under the License.
- */
-
-#include "rsa-test.h"
-
-#define BITS_PER_DIGIT 32
-
-#define MAX_ALLOC_SIZE 256
-
-#define BYTES_PER_DIGIT (BITS_PER_DIGIT / 8)
-
-typedef uint32_t DIGIT_T; // for gpu might need to be half? is that half?
-
-size_t mpSizeof(const DIGIT_T a[], size_t ndigits)
-{
- while(ndigits--)
- {
- if (a[ndigits] != 0)
- return (++ndigits);
- }
- return 0;
-}
-
-volatile DIGIT_T mpSetZero(volatile DIGIT_T a[], size_t ndigits)
-{ /* Sets a = 0 */
-
- /* Prevent optimiser ignoring this */
- volatile DIGIT_T optdummy;
- volatile DIGIT_T *p = a;
-
- while (ndigits--)
- a[ndigits] = 0;
-
- optdummy = *p;
- return optdummy;
-}
-
-size_t uiceil(double x)
-/* Returns ceil(x) as a non-negative integer or 0 if x < 0 */
-{
- size_t c;
-
- if (x < 0) return 0;
- c = (size_t)x;
- if ((x - c) > 0.0)
- c++;
-
- return c;
-}
-
-volatile uint8_t zeroise_bytes(volatile void *v, size_t n)
-{ /* Zeroise byte array b and make sure optimiser does not ignore this */
- volatile uint8_t optdummy;
- volatile uint8_t *b = (uint8_t*)v;
- while(n--)
- b[n] = 0;
- optdummy = *b;
- return optdummy;
-}
-
-size_t mpConvFromOctets(DIGIT_T a[], size_t ndigits, const unsigned char *c, size_t nbytes)
-/* Converts nbytes octets into big digit a of max size ndigits
- Returns actual number of digits set (may be larger than mpSizeof)
-*/
-{
- size_t i;
- int j, k;
- DIGIT_T t;
-
- mpSetZero(a, ndigits);
- //memset(a, 0, ndigits);
-
- /* Read in octets, least significant first */
- /* i counts into big_d, j along c, and k is # bits to shift */
- for (i = 0, j = (int)nbytes - 1; i < ndigits && j >= 0; i++)
- {
- t = 0;
- for (k = 0; j >= 0 && k < BITS_PER_DIGIT; j--, k += 8)
- t |= ((DIGIT_T)c[j]) << k;
- a[i] = t;
- }
-
- return i;
-}
-
-size_t mpConvFromHex(DIGIT_T a[], size_t ndigits, const char *s)
-/* Convert a string in hexadecimal format to a big digit.
- Return actual number of digits set (may be larger than mpSizeof).
- Just ignores invalid characters in s.
-*/
-{
-
- uint8_t newdigits[MAX_ALLOC_SIZE*2];
-
- size_t newlen;
- size_t n;
- unsigned long t;
- size_t i, j;
-
- mpSetZero(a, ndigits);
- //memset(&a, 0, ndigits);
-
- /* Create some temp storage for int values */
- n = strlen(s);
- if (0 == n) return 0;
- newlen = uiceil(n * 0.5); /* log(16)/log(256)=0.5 */
- //ALLOC_BYTES(newdigits, newlen);
- memset(&newdigits, 0, newlen);
-
- /* Work through zero-terminated string */
- for (i = 0; s[i]; i++)
- {
- t = s[i];
- if ((t >= '0') && (t <= '9')) t = (t - '0');
- else if ((t >= 'a') && (t <= 'f')) t = (t - 'a' + 10);
- else if ((t >= 'A') && (t <= 'F')) t = (t - 'A' + 10);
- else continue;
- for (j = newlen; j > 0; j--)
- {
- t += (unsigned long)newdigits[j-1] << 4;
- newdigits[j-1] = (unsigned char)(t & 0xFF);
- t >>= 8;
- }
- }
-
- /* Convert bytes to big digits */
- n = mpConvFromOctets(a, ndigits, newdigits, newlen);
-
- memset(&newdigits, 0, newlen);
-
- return n;
-}
-
-// MARK: OPENCL CODE
-
-void opencl_pairs_from_files(void *bases, unsigned long *b_len,
- void *exponents, unsigned long *e_len,
- void *moduli, unsigned long *m_len,
- void *signatures, unsigned long *s_len,
- unsigned long *pks,
- unsigned long *n
- ) {
-
- FILE *pk;
- FILE *ms;
-
- pk = fopen("lib-gpu-generate/publickey.txt", "r");
- ms = fopen("lib-gpu-generate/msgsig.txt", "r");
-
- if (pk == NULL || ms == NULL) {
- printf("Auxiliary files not found.");
- abort();
- }
-
- fseek (ms, 0, SEEK_END);
- long ms_l = ftell(ms);
- fseek (ms, 0, SEEK_SET);
- char *ms_ptr = malloc(ms_l);
- char *ms_ptr_rest = malloc(ms_l);
- if (ms_ptr || ms_ptr_rest)
- {
- fread (ms_ptr, 1, ms_l, ms);
- memcpy(ms_ptr_rest, ms_ptr, ms_l);
- }
- fclose (ms);
-
- fseek (pk, 0, SEEK_END);
- long pk_l = ftell(pk);
- fseek (pk, 0, SEEK_SET);
- char *pk_ptr = malloc(pk_l);
- char *pk_ptr_rest = malloc(pk_l);
- if (pk_ptr && pk_ptr_rest)
- {
- fread (pk_ptr, 1, pk_l, pk);
- memcpy(pk_ptr_rest, pk_ptr, pk_l);
- }
- fclose (pk);
-
- int i = 0;
- int j = 0;
-
- DIGIT_T *bases_t = bases;
- DIGIT_T *exponents_t = exponents;
- DIGIT_T *moduli_t = moduli;
- DIGIT_T *signatures_t = signatures;
-
- int sz = 2048 / sizeof(DIGIT_T);
-
- char* message = strtok_r(ms_ptr, "\n", &ms_ptr_rest);
- char* signature = strtok_r(0, "\n", &ms_ptr_rest);
- char* modulus = strtok_r(pk_ptr, "\n", &pk_ptr_rest);
- char* exponent = strtok_r(0, "\n", &pk_ptr_rest);
- char* offs = strtok_r(0, "\n", &pk_ptr_rest);
-
- while (modulus != NULL && exponent != NULL && offs != NULL) {
-
- pks[i] = atoi(offs);
-
- unsigned long n_buf_len = strlen(modulus);
- unsigned long e_buf_len = strlen(exponent);
-
- DIGIT_T exponent_dgt [sz*2];
- DIGIT_T modulus_dgt [sz*2];
-
- mpSetZero(exponent_dgt, sz*2);
- mpSetZero(modulus_dgt, sz*2);
-
- mpConvFromHex(exponent_dgt, e_buf_len, exponent);
- mpConvFromHex(modulus_dgt, n_buf_len, modulus);
-
- unsigned long max_len = 64; // hardcoded for 2048 bit RSA
-
- e_len[i] = (i == 0 ? 0 : e_len[i - 1]) + mpSizeof(exponent_dgt, sz*2);
- m_len[i] = (i == 0 ? 0 : m_len[i - 1]) + max_len;
-
- memcpy(&moduli_t[i == 0 ? 0 : (m_len[i - 1])], modulus_dgt, ( m_len[i] - (i == 0 ? 0 : m_len[i - 1]) ) * sizeof(DIGIT_T));
- memcpy(&exponents_t[i == 0 ? 0 : (e_len[i - 1])], exponent_dgt, ( e_len[i] - (i == 0 ? 0 : e_len[i - 1]) ) * sizeof(DIGIT_T));
-
-
- modulus = strtok_r(0, "\n", &pk_ptr_rest);
- exponent = strtok_r(0, "\n", &pk_ptr_rest);
- offs = strtok_r(0, "\n", &pk_ptr_rest);
-
-
- i++;
- }
-
-
-
- while (message != NULL && signature != NULL) {
-
- unsigned long m_buf_len = strlen(message);
- unsigned long s_buf_len = strlen(signature);
-
- DIGIT_T base_dgt [sz*2]; // temp storage, large enough
- DIGIT_T signature_dgt [sz*2];
-
- mpSetZero(base_dgt, sz*2);
- mpSetZero(signature_dgt, sz*2);
-
- mpConvFromHex(base_dgt, m_buf_len, message);
- mpConvFromHex(signature_dgt, s_buf_len, signature);
-
- unsigned long max_len = 64; // the maximum of DIGIT_T types we need
-
- b_len[j] = (j == 0 ? 0 : b_len[j - 1]) + max_len;
- s_len[j] = (j == 0 ? 0 : s_len[j - 1]) + max_len;
-
- memcpy(&bases_t[j == 0 ? 0 : (b_len[j - 1])], base_dgt, ( b_len[j] - (j == 0 ? 0 : b_len[j - 1]) ) * sizeof(DIGIT_T));
- memcpy(&signatures_t[j == 0 ? 0 : (s_len[j - 1])], signature_dgt, ( s_len[j] - (j == 0 ? 0 : s_len[j - 1]) ) * sizeof(DIGIT_T));
-
- message = strtok_r(0, "\n",&ms_ptr_rest);
- signature = strtok_r(0, "\n",&ms_ptr_rest);
-
- j++;
-
- }
-
- *n = j;
-
-}
-
-int opencl_prepare(struct gpu_info *info, struct gpu_state *state,
- void *bases, unsigned long *b_len,
- void *exponents, unsigned long *e_len,
- void *moduli, unsigned long *m_len,
- void *signatures, unsigned long *s_len,
- const unsigned long *pks,
- const unsigned long n) {
-
- int err; // error code returned from api calls
-
- unsigned long pk = 0;
-
- while (1) {
- if (pks[pk] + 1 >= n)
- break;
- pk++;
- }
-
-
- state->mod_mem = clCreateBuffer(info->context, CL_MEM_READ_WRITE, sizeof(DIGIT_T) * m_len[pk], NULL, NULL);
- state->exp_mem = clCreateBuffer(info->context, CL_MEM_READ_WRITE, sizeof(DIGIT_T) * e_len[pk], NULL, NULL);
-
- state->sig_mem = clCreateBuffer(info->context, CL_MEM_READ_WRITE, sizeof(DIGIT_T) * s_len[n-1], NULL, NULL);
- state->comp_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, sizeof(DIGIT_T) * b_len[n-1], NULL, NULL); // the base, to compare whether we get the same signature
-
- state->mod_len = clCreateBuffer(info->context, CL_MEM_READ_ONLY, sizeof(unsigned long) * (pk + 1), NULL, NULL);
- state->exp_len = clCreateBuffer(info->context, CL_MEM_READ_ONLY, sizeof(unsigned long) * (pk + 1), NULL, NULL);
-
- state->sig_len = clCreateBuffer(info->context, CL_MEM_READ_ONLY, sizeof(unsigned long) * n, NULL, NULL);
- state->comp_len = clCreateBuffer(info->context, CL_MEM_READ_ONLY, sizeof(unsigned long) * n, NULL, NULL);
-
- state->pks_indices = clCreateBuffer(info->context, CL_MEM_READ_WRITE, sizeof(unsigned long) * (pk + 1),NULL, NULL);
-
-
-
-
- if (!state->sig_mem || !state->exp_mem || !state->mod_mem || !state->comp_mem || !state->invalid )
- {
- printf("Error: Failed to allocate device memory!\n");
- exit(1);
- }
-
- // Write our data set into the input array in device memory
- //
- err = clEnqueueWriteBuffer(info->commands, state->sig_mem, CL_TRUE, 0, sizeof(DIGIT_T) * s_len[n-1], signatures, 0, NULL, NULL);
- err |= clEnqueueWriteBuffer(info->commands, state->sig_len, CL_TRUE, 0,sizeof(unsigned long) * n, s_len, 0, NULL, NULL);
-
- err |= clEnqueueWriteBuffer(info->commands, state->exp_mem, CL_TRUE, 0,sizeof(DIGIT_T) * e_len[pk], exponents, 0, NULL, NULL);
- err |= clEnqueueWriteBuffer(info->commands, state->exp_len, CL_TRUE, 0,sizeof(unsigned long) * (pk + 1), e_len, 0, NULL, NULL);
- err |= clEnqueueWriteBuffer(info->commands, state->mod_mem, CL_TRUE, 0, sizeof(DIGIT_T) * m_len[pk], moduli, 0, NULL, NULL);
- err |= clEnqueueWriteBuffer(info->commands, state->mod_len, CL_TRUE, 0,sizeof(unsigned long) * (pk + 1), m_len, 0, NULL, NULL);
-
- err |= clEnqueueWriteBuffer(info->commands, state->comp_mem, CL_TRUE, 0, sizeof(DIGIT_T) * b_len[n-1], bases, 0, NULL, NULL);
- err |= clEnqueueWriteBuffer(info->commands, state->comp_len, CL_TRUE, 0,sizeof(unsigned long) * n, b_len, 0, NULL, NULL);
-
- err |= clEnqueueWriteBuffer(info->commands, state->pks_indices, CL_TRUE, 0, sizeof(unsigned long) * (pk + 1), pks, 0, NULL, NULL);
- //err |= clEnqueueWriteBuffer(info->commands, state->valid, CL_TRUE, 0, sizeof(unsigned long), &signature_is_valid, 0, NULL, NULL);
- if (err != CL_SUCCESS)
- {
- printf("Error: Failed to write to source array!\n");
- exit(1);
- }
-
- // Set the arguments to our compute kernel
- //
- err = 0;
- err = clSetKernelArg(info->kernel, 0, sizeof(cl_mem), &state->sig_mem);
- err |= clSetKernelArg(info->kernel, 1, sizeof(cl_mem), &state->sig_len);
- err |= clSetKernelArg(info->kernel, 2, sizeof(cl_mem), &state->exp_mem);
- err |= clSetKernelArg(info->kernel, 3, sizeof(cl_mem), &state->exp_len);
- err |= clSetKernelArg(info->kernel, 4, sizeof(cl_mem), &state->mod_mem);
- err |= clSetKernelArg(info->kernel, 5, sizeof(cl_mem), &state->mod_len);
- err |= clSetKernelArg(info->kernel, 6, sizeof(cl_mem), &state->comp_mem);
- err |= clSetKernelArg(info->kernel, 7, sizeof(cl_mem), &state->comp_len);
- //err |= clSetKernelArg(info->kernel, 8, sizeof(cl_mem), &state->valid); -> set somewhere else
- err |= clSetKernelArg(info->kernel, 9, sizeof(cl_mem), &state->pks_indices);
- err |= clSetKernelArg(info->kernel, 10, sizeof(unsigned long), &n);
-
-
- //err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count);
- if (err != CL_SUCCESS)
- {
- printf("RSA-Error: Failed to set kernel arguments! %d\n", err);
- exit(1);
- }
-
- state->total = n;
-
-
- return 0;
-
-}
-
-int opencl_exec_kernel(struct gpu_info *info, struct gpu_state *state) {
-
- size_t global;
- int err;
-
- global = state->total;
-
- // measure from the first call to the kernel...
- if (state->skip) {
- state->skip = false;
- clock_gettime(CLOCK_REALTIME, &state->t1);
- }
-
- err = clEnqueueNDRangeKernel(info->commands, info->kernel, 1, NULL, &global, NULL, 0, NULL, NULL);
- if (err)
- {
- printf("Error: Failed to execute kernel!\n");
- return EXIT_FAILURE;
- }
-
- //printf("KERNEL IS EXECUTING...\n");
-
- return 0;
-
-}
-
-
-unsigned long opencl_results(struct gpu_info *info, struct gpu_state *state, bool timed) {
-
- if (state->skip) {
- // reset skip in the kernel execution
- return 0;
- }
-
- int err;
-
- unsigned long failed_signatures = 0;
-
-
- // Wait for the command commands to get serviced before reading back results
- //
- err = clFinish(info->commands);
- if (err != CL_SUCCESS)
- {
- printf("Error: Kernel failure! %d\n", err);
- exit(1);
- }
-
- // Read back the results from the device to verify the output
- //
- //err = clEnqueueReadBuffer( commands, res_mem, CL_TRUE, 0, res_len, res_buf, 0, NULL, NULL );
- err = clEnqueueReadBuffer(info->commands, state->invalid, CL_TRUE, 0, sizeof(unsigned long), &failed_signatures, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- printf("Error: Failed to read output array! %d\n", err);
- exit(1);
- }
-
-
-
-
-
-
- if (timed) {
- // stop measuring after the last command has been read
- clock_gettime(CLOCK_REALTIME, &state->t2);
-
- printf("GPU verification took %ld.%06ld s\n", ( state->t2.tv_nsec < state->t1.tv_nsec ? state->t2.tv_sec - (state->t1.tv_sec + 1) : state->t2.tv_sec - state->t1.tv_sec ), ( state->t2.tv_nsec < state->t1.tv_nsec ? ((999999999 - state->t1.tv_nsec) + state->t2.tv_nsec) : (state->t2.tv_nsec - state->t1.tv_nsec) ) / 1000);
-
- }
-
-
- return state->total - failed_signatures;
-
-}
-
-void opencl_cleanup(struct gpu_info *info) {
-
- clReleaseProgram(info->program);
- clReleaseKernel(info->kernel);
- clReleaseCommandQueue(info->commands);
- clReleaseContext(info->context);
-
-}
-
-void opencl_release(struct gpu_state *state) {
-
- clReleaseMemObject(state->comp_mem);
- clReleaseMemObject(state->exp_mem);
- clReleaseMemObject(state->mod_mem);
- clReleaseMemObject(state->sig_mem);
-
- clReleaseMemObject(state->comp_len);
- clReleaseMemObject(state->exp_len);
- clReleaseMemObject(state->mod_len);
- clReleaseMemObject(state->sig_len);
-
- clReleaseMemObject(state->pks_indices);
-
-
-
-}
-
-// MARK: for library
-
-void gpu_init(struct gpu_info *info, struct gpu_state *state) {
-
- info->platform = select_platform(0, false);
- info->device_id = select_device (info->platform);
- info->context = create_compute_context (info->device_id);
- info->commands = create_command_queue (info->device_id, info->context);
- info->program = compile_program (info->device_id, info->context, "verify.cl");
- info->kernel = create_kernel (info->program, "several");
-
- state->result = 0;
- state->total = 0;
- state->skip = true;
-
-
- int err = 0;
-
- int failed_signatures = 0;
-
- state->invalid = clCreateBuffer(info->context, CL_MEM_READ_WRITE, sizeof(unsigned long) ,NULL, NULL);
-
- err |= clEnqueueWriteBuffer(info->commands, state->invalid, CL_TRUE, 0, sizeof(unsigned long), &failed_signatures, 0, NULL, NULL);
- if (err != CL_SUCCESS)
- {
- printf("Error: Failed to write to source array!\n");
- exit(1);
- }
-
- err |= clSetKernelArg(info->kernel, 8, sizeof(cl_mem), &state->invalid);
- if (err != CL_SUCCESS)
- {
- printf("Error: Failed to set kernel arguments! %d\n", err);
- exit(1);
- }
-}
-
-void gpu_execute(struct gpu_info *info,
- struct gpu_state *state,
- void *bases, unsigned long *b_len,
- void *exponents, unsigned long *e_len,
- void *moduli, unsigned long *m_len,
- void *signatures, unsigned long *s_len,
- const unsigned long *pks,
- const unsigned long n) {
-
- opencl_prepare(info, state, bases, b_len,
- exponents, e_len,
- moduli, m_len,
- signatures, s_len,
- pks, n); // prepares the next batch of signatures on CPU, naturally blocks until it's finished
- state->result += opencl_results(info, state, false); // waits for kernel, if it is not ready yet
- opencl_exec_kernel(info,state); // start kernel (returns immediately)
- opencl_release(state); // release buffers
-
-
-
-}
-
-unsigned long gpu_finish(struct gpu_info *info, struct gpu_state *state) {
-
- state->result += opencl_results(info,state,true);
-
- opencl_cleanup(info);
- clReleaseMemObject(state->invalid);
-
- unsigned long res = state->result;
-
- state->result = 0; // reset result
-
- return res;
-
-}
-
-// MARK: function to know how much storage the gpu has to split data
-
-
-
-//size_t retSize_3 = sizeof(cl_ulong);
-//cl_ulong max_stor = 0;
-//clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, 0, NULL, &retSize_3);
-//clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, retSize_3, &max_stor, &retSize_3);
-
-//printf("max memory: %llu\n", max_stor);
-
-
-int rsa_tests(void) {
-
- //setup_gcry();
-
- unsigned long pairs = number_of_pairs(); // returns an estimation of pairs
-
- unsigned long digit_sz = 64 * pairs * sizeof(DIGIT_T);
- unsigned long arr_sz = pairs * sizeof(unsigned long);
-
- DIGIT_T *q = malloc(digit_sz);
- DIGIT_T *r = malloc(digit_sz);
- DIGIT_T *s = malloc(digit_sz);
- DIGIT_T *t = malloc(digit_sz);
-
- memset(q, 0, digit_sz);
- memset(r, 0, digit_sz);
- memset(s, 0, digit_sz);
- memset(t, 0, digit_sz);
-
- unsigned long *u = malloc(arr_sz);
- unsigned long *v = malloc(arr_sz);
- unsigned long *w = malloc(arr_sz);
- unsigned long *x = malloc(arr_sz);
-
- unsigned long *pks = malloc(arr_sz);
-
- memset(u, 0, arr_sz);
- memset(v, 0, arr_sz);
- memset(w, 0, arr_sz);
- memset(x, 0, arr_sz);
- memset(pks, 0, arr_sz);
-
- printf("READING KEYS...\n");
-
- opencl_pairs_from_files(q, u,
- r, v,
- s, w,
- t, x, pks, &pairs); // this returns the actual amount of pairs
-
-
-
- printf("VERIFYING %lu SIGNATURES...\n", pairs);
-
- struct gpu_info info;
- struct gpu_state state;
-
- gpu_init(&info, &state);
-
- gpu_execute(&info, &state, q, u, r, v, s, w, t, x, pks, pairs);
-
- unsigned long res = gpu_finish(&info, &state);
-
- if (res == pairs) {
- printf("VERIFICATION RESULT: %lu - OK\n\n",res);
- } else {
- printf("VERIFICATION RESULT: %lu - NOT OK\n\n",res);
- }
-
-
- free(q);
- free(r);
- free(s);
- free(t);
-
- free(u);
- free(v);
- free(w);
- free(x);
-
- free(pks);
-
- return 0;
-}
-
diff --git a/source/rsa-test.h b/source/rsa-test.h
@@ -1,33 +0,0 @@
-//
-// rsa-test.h
-// lib-gpu-verify
-//
-// Created by Cedric Zwahlen on 28.09.2023.
-//
-
-#ifndef rsa_test_h
-#define rsa_test_h
-
-#include "util.h"
-
-#include "ctype.h"
-
-
-int rsa_tests(void);
-
-// MARK: put in seperate file, and rename this one
-
-void gpu_init(struct gpu_info *info, struct gpu_state *state);
-
-void gpu_execute(struct gpu_info *info,
- struct gpu_state *state,
- void *bases, unsigned long *b_len,
- void *exponents, unsigned long *e_len,
- void *moduli, unsigned long *m_len,
- void *signatures, unsigned long *s_len,
- const unsigned long *pks,
- const unsigned long n);
-
-unsigned long gpu_finish(struct gpu_info *info, struct gpu_state *state);
-
-#endif /* rsa_test_h */
diff --git a/source/util.c b/source/util.c
@@ -7,7 +7,7 @@
#include "util.h"
-unsigned long number_of_pairs(void) {
+unsigned long gpuv_estimate_pairs(void) {
struct stat ss;
@@ -34,7 +34,7 @@ unsigned long number_of_pairs(void) {
}
-void setup_gcry(void) {
+void gpuv_prepare_gcry(void) {
/* Version check should be the very first call because it
diff --git a/source/util.h b/source/util.h
@@ -28,16 +28,11 @@
#include <CL/opencl.h>
#endif
-
-
-
-
#define NEED_LIBGCRYPT_VERSION "1.9.4"
-unsigned long number_of_pairs(void);
-
-void setup_gcry(void);
+unsigned long gpuv_estimate_pairs(void);
+void gpuv_prepare_gcry(void);
cl_platform_id select_platform (unsigned int offset, bool print_platforms);
diff --git a/xcode/.DS_Store b/xcode/.DS_Store
Binary files differ.
diff --git a/xcode/montmodmult.cl b/xcode/gpuv-montg.cl
diff --git a/xcode/verify.cl b/xcode/gpuv.cl
diff --git a/xcode/lib-gpu-generate/main.c b/xcode/lib-gpu-generate/main.c
@@ -16,7 +16,7 @@
#define NEED_LIBGCRYPT_VERSION "1.10.1"
-void setup_gcry(void) {
+void gpuv_prepare_gcry(void) {
gcry_control (GCRYCTL_SET_THREAD_CBS, 0);
@@ -198,7 +198,7 @@ int main(int argc, const char * argv[]) {
printf("generating %lu signatures with %i keys.\n",n * pks, pks);
}
- setup_gcry();
+ gpuv_prepare_gcry();
pthread_t tid[pks];
// not the best, but it is safe
diff --git a/xcode/lib-gpu-verify.xcodeproj/project.pbxproj b/xcode/lib-gpu-verify.xcodeproj/project.pbxproj
@@ -7,15 +7,14 @@
objects = {
/* Begin PBXBuildFile section */
- 6A36F8892B0F938E00AB772D /* montgomery.cl in Sources */ = {isa = PBXBuildFile; fileRef = 6A36F8882B0F938E00AB772D /* montgomery.cl */; };
- 6A8A795F2A89672700116D7D /* verify.cl in Sources */ = {isa = PBXBuildFile; fileRef = 6A8A795E2A89672700116D7D /* verify.cl */; };
+ 6A8A795F2A89672700116D7D /* gpuv.cl in Sources */ = {isa = PBXBuildFile; fileRef = 6A8A795E2A89672700116D7D /* gpuv.cl */; };
6AA38E5B2B0A97FC00E85243 /* main.c in Sources */ = {isa = PBXBuildFile; fileRef = 6AA38E5A2B0A97FC00E85243 /* main.c */; };
6ABC2E842B231DFF00033B90 /* util.c in Sources */ = {isa = PBXBuildFile; fileRef = 6ABC2E832B231DFF00033B90 /* util.c */; };
- 6ABC2E882B231E3D00033B90 /* reference-test.c in Sources */ = {isa = PBXBuildFile; fileRef = 6ABC2E862B231E3D00033B90 /* reference-test.c */; };
- 6AC553252B2E174900046AB7 /* montmodmult.cl in Sources */ = {isa = PBXBuildFile; fileRef = 6AC553242B2E174900046AB7 /* montmodmult.cl */; };
- 6AC553292B2E17C800046AB7 /* montmodmult.c in Sources */ = {isa = PBXBuildFile; fileRef = 6AC553282B2E17C800046AB7 /* montmodmult.c */; };
+ 6ABC2E882B231E3D00033B90 /* gpuv-ref.c in Sources */ = {isa = PBXBuildFile; fileRef = 6ABC2E862B231E3D00033B90 /* gpuv-ref.c */; };
+ 6AC553252B2E174900046AB7 /* gpuv-montg.cl in Sources */ = {isa = PBXBuildFile; fileRef = 6AC553242B2E174900046AB7 /* gpuv-montg.cl */; };
+ 6AC553292B2E17C800046AB7 /* gpuv-montg.c in Sources */ = {isa = PBXBuildFile; fileRef = 6AC553282B2E17C800046AB7 /* gpuv-montg.c */; };
6AF7487A2ADADEBD00D58E08 /* lib-gpu-verify.c in Sources */ = {isa = PBXBuildFile; fileRef = 6AF748792ADADEBD00D58E08 /* lib-gpu-verify.c */; };
- 6AF748832ADADF4500D58E08 /* rsa-test.c in Sources */ = {isa = PBXBuildFile; fileRef = 6AF7487F2ADADF4500D58E08 /* rsa-test.c */; };
+ 6AF748832ADADF4500D58E08 /* gpuv.c in Sources */ = {isa = PBXBuildFile; fileRef = 6AF7487F2ADADF4500D58E08 /* gpuv.c */; };
C3770EFD0E6F1138009A5A77 /* OpenCL.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = C3770EFC0E6F1138009A5A77 /* OpenCL.framework */; };
/* End PBXBuildFile section */
@@ -42,27 +41,20 @@
/* Begin PBXFileReference section */
466E0F5F0C932E1A00ED01DB /* lib-gpu-verify */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "lib-gpu-verify"; sourceTree = BUILT_PRODUCTS_DIR; };
- 6A36F8882B0F938E00AB772D /* montgomery.cl */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.opencl; path = montgomery.cl; sourceTree = "<group>"; };
- 6A7914CC2B0CF320001EDCC1 /* montgomery.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = montgomery.h; path = ../source/montgomery.h; sourceTree = "<group>"; };
- 6A7914CD2B0CF320001EDCC1 /* montgomery.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = montgomery.c; path = ../source/montgomery.c; sourceTree = "<group>"; };
- 6A8A795E2A89672700116D7D /* verify.cl */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.opencl; path = verify.cl; sourceTree = "<group>"; };
+ 6A8A795E2A89672700116D7D /* gpuv.cl */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.opencl; path = gpuv.cl; sourceTree = "<group>"; };
6AA38E582B0A97FC00E85243 /* lib-gpu-generate */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "lib-gpu-generate"; sourceTree = BUILT_PRODUCTS_DIR; };
6AA38E5A2B0A97FC00E85243 /* main.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = main.c; sourceTree = "<group>"; };
6AA38E612B0A9B2100E85243 /* lib-gpu-generate.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = "lib-gpu-generate.entitlements"; sourceTree = "<group>"; };
- 6AB4D99B2B1645F900A686F2 /* montgomery-test.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "montgomery-test.h"; path = "../source/montgomery-test.h"; sourceTree = "<group>"; };
- 6AB4D99C2B1645F900A686F2 /* montgomery-test.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = "montgomery-test.c"; path = "../source/montgomery-test.c"; sourceTree = "<group>"; };
6ABC2E832B231DFF00033B90 /* util.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = util.c; path = ../source/util.c; sourceTree = "<group>"; };
6ABC2E852B231E0400033B90 /* util.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = util.h; path = ../source/util.h; sourceTree = "<group>"; };
- 6ABC2E862B231E3D00033B90 /* reference-test.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "reference-test.c"; path = "../source/reference-test.c"; sourceTree = "<group>"; };
- 6ABC2E872B231E3D00033B90 /* reference-test.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "reference-test.h"; path = "../source/reference-test.h"; sourceTree = "<group>"; };
- 6AC553242B2E174900046AB7 /* montmodmult.cl */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.opencl; path = montmodmult.cl; sourceTree = "<group>"; };
- 6AC553272B2E17C800046AB7 /* montmodmult.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = montmodmult.h; path = ../source/montmodmult.h; sourceTree = "<group>"; };
- 6AC553282B2E17C800046AB7 /* montmodmult.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = montmodmult.c; path = ../source/montmodmult.c; sourceTree = "<group>"; };
- 6AC5532A2B2E885200046AB7 /* gmp.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = gmp.c; path = ../source/gmp.c; sourceTree = "<group>"; };
- 6AC5532C2B2E889100046AB7 /* gmp.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = gmp.h; path = ../source/gmp.h; sourceTree = "<group>"; };
+ 6ABC2E862B231E3D00033B90 /* gpuv-ref.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "gpuv-ref.c"; path = "../source/gpuv-ref.c"; sourceTree = "<group>"; };
+ 6ABC2E872B231E3D00033B90 /* gpuv-ref.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "gpuv-ref.h"; path = "../source/gpuv-ref.h"; sourceTree = "<group>"; };
+ 6AC553242B2E174900046AB7 /* gpuv-montg.cl */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.opencl; path = "gpuv-montg.cl"; sourceTree = "<group>"; };
+ 6AC553272B2E17C800046AB7 /* gpuv-montg.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "gpuv-montg.h"; path = "../source/gpuv-montg.h"; sourceTree = "<group>"; };
+ 6AC553282B2E17C800046AB7 /* gpuv-montg.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = "gpuv-montg.c"; path = "../source/gpuv-montg.c"; sourceTree = "<group>"; };
6AF748792ADADEBD00D58E08 /* lib-gpu-verify.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "lib-gpu-verify.c"; path = "../source/lib-gpu-verify.c"; sourceTree = "<group>"; };
- 6AF7487F2ADADF4500D58E08 /* rsa-test.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "rsa-test.c"; path = "../source/rsa-test.c"; sourceTree = "<group>"; };
- 6AF748802ADADF4500D58E08 /* rsa-test.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "rsa-test.h"; path = "../source/rsa-test.h"; sourceTree = "<group>"; };
+ 6AF7487F2ADADF4500D58E08 /* gpuv.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = gpuv.c; path = ../source/gpuv.c; sourceTree = "<group>"; };
+ 6AF748802ADADF4500D58E08 /* gpuv.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = gpuv.h; path = ../source/gpuv.h; sourceTree = "<group>"; };
C3770EFC0E6F1138009A5A77 /* OpenCL.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = OpenCL.framework; path = /System/Library/Frameworks/OpenCL.framework; sourceTree = "<absolute>"; };
/* End PBXFileReference section */
@@ -107,12 +99,10 @@
6A984F162AC5B18A00F530FD /* Headers */ = {
isa = PBXGroup;
children = (
- 6AF748802ADADF4500D58E08 /* rsa-test.h */,
- 6A7914CC2B0CF320001EDCC1 /* montgomery.h */,
- 6AC5532C2B2E889100046AB7 /* gmp.h */,
+ 6AF748802ADADF4500D58E08 /* gpuv.h */,
6ABC2E852B231E0400033B90 /* util.h */,
- 6ABC2E872B231E3D00033B90 /* reference-test.h */,
- 6AB4D99B2B1645F900A686F2 /* montgomery-test.h */,
+ 6ABC2E872B231E3D00033B90 /* gpuv-ref.h */,
+ 6AC553272B2E17C800046AB7 /* gpuv-montg.h */,
);
name = Headers;
sourceTree = "<group>";
@@ -126,30 +116,17 @@
path = "lib-gpu-generate";
sourceTree = "<group>";
};
- 6AC553262B2E175500046AB7 /* montmodmult */ = {
- isa = PBXGroup;
- children = (
- 6AC553242B2E174900046AB7 /* montmodmult.cl */,
- 6AC553272B2E17C800046AB7 /* montmodmult.h */,
- 6AC553282B2E17C800046AB7 /* montmodmult.c */,
- );
- name = montmodmult;
- sourceTree = "<group>";
- };
C3770EF10E6F10BB009A5A77 /* Sources */ = {
isa = PBXGroup;
children = (
6A984F162AC5B18A00F530FD /* Headers */,
- 6A8A795E2A89672700116D7D /* verify.cl */,
- 6A36F8882B0F938E00AB772D /* montgomery.cl */,
- 6AF748792ADADEBD00D58E08 /* lib-gpu-verify.c */,
- 6AF7487F2ADADF4500D58E08 /* rsa-test.c */,
- 6AC5532A2B2E885200046AB7 /* gmp.c */,
- 6A7914CD2B0CF320001EDCC1 /* montgomery.c */,
- 6AB4D99C2B1645F900A686F2 /* montgomery-test.c */,
+ 6A8A795E2A89672700116D7D /* gpuv.cl */,
+ 6AC553242B2E174900046AB7 /* gpuv-montg.cl */,
6ABC2E832B231DFF00033B90 /* util.c */,
- 6ABC2E862B231E3D00033B90 /* reference-test.c */,
- 6AC553262B2E175500046AB7 /* montmodmult */,
+ 6AF748792ADADEBD00D58E08 /* lib-gpu-verify.c */,
+ 6AF7487F2ADADF4500D58E08 /* gpuv.c */,
+ 6ABC2E862B231E3D00033B90 /* gpuv-ref.c */,
+ 6AC553282B2E17C800046AB7 /* gpuv-montg.c */,
);
name = Sources;
sourceTree = "<group>";
@@ -239,14 +216,13 @@
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
- 6ABC2E882B231E3D00033B90 /* reference-test.c in Sources */,
- 6AC553252B2E174900046AB7 /* montmodmult.cl in Sources */,
- 6AC553292B2E17C800046AB7 /* montmodmult.c in Sources */,
+ 6ABC2E882B231E3D00033B90 /* gpuv-ref.c in Sources */,
+ 6AC553252B2E174900046AB7 /* gpuv-montg.cl in Sources */,
+ 6AC553292B2E17C800046AB7 /* gpuv-montg.c in Sources */,
6AF7487A2ADADEBD00D58E08 /* lib-gpu-verify.c in Sources */,
- 6A8A795F2A89672700116D7D /* verify.cl in Sources */,
+ 6A8A795F2A89672700116D7D /* gpuv.cl in Sources */,
6ABC2E842B231DFF00033B90 /* util.c in Sources */,
- 6AF748832ADADF4500D58E08 /* rsa-test.c in Sources */,
- 6A36F8892B0F938E00AB772D /* montgomery.cl in Sources */,
+ 6AF748832ADADF4500D58E08 /* gpuv.c in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
diff --git a/xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcuserdata/cedriczwahlen.xcuserdatad/UserInterfaceState.xcuserstate b/xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcuserdata/cedriczwahlen.xcuserdatad/UserInterfaceState.xcuserstate
Binary files differ.
diff --git a/xcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist b/xcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist
@@ -1199,9 +1199,9 @@
filePath = "../source/rsa-test.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "607"
- endingLineNumber = "607"
- landmarkName = "rsa_tests()"
+ startingLineNumber = "594"
+ endingLineNumber = "594"
+ landmarkName = "gpuv_test()"
landmarkType = "9">
<Locations>
<Location
@@ -1427,9 +1427,9 @@
filePath = "../source/rsa-test.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "631"
- endingLineNumber = "631"
- landmarkName = "rsa_tests()"
+ startingLineNumber = "618"
+ endingLineNumber = "618"
+ landmarkName = "gpuv_test()"
landmarkType = "9">
<Locations>
<Location
@@ -1520,9 +1520,9 @@
filePath = "../source/rsa-test.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "633"
- endingLineNumber = "633"
- landmarkName = "rsa_tests()"
+ startingLineNumber = "620"
+ endingLineNumber = "620"
+ landmarkName = "gpuv_test()"
landmarkType = "9">
</BreakpointContent>
</BreakpointProxy>
@@ -2518,9 +2518,9 @@
filePath = "../source/rsa-test.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "638"
- endingLineNumber = "638"
- landmarkName = "rsa_tests()"
+ startingLineNumber = "625"
+ endingLineNumber = "625"
+ landmarkName = "gpuv_test()"
landmarkType = "9">
<Locations>
<Location
@@ -2626,9 +2626,9 @@
filePath = "../source/rsa-test.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "570"
- endingLineNumber = "570"
- landmarkName = "rsa_tests()"
+ startingLineNumber = "557"
+ endingLineNumber = "557"
+ landmarkName = "gpuv_test()"
landmarkType = "9">
<Locations>
<Location
@@ -2721,9 +2721,9 @@
filePath = "../source/rsa-test.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "636"
- endingLineNumber = "636"
- landmarkName = "rsa_tests()"
+ startingLineNumber = "623"
+ endingLineNumber = "623"
+ landmarkName = "gpuv_test()"
landmarkType = "9">
<Locations>
<Location
@@ -2784,9 +2784,9 @@
filePath = "../source/rsa-test.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "604"
- endingLineNumber = "604"
- landmarkName = "rsa_tests()"
+ startingLineNumber = "591"
+ endingLineNumber = "591"
+ landmarkName = "gpuv_test()"
landmarkType = "9">
<Locations>
<Location
@@ -2847,9 +2847,9 @@
filePath = "../source/rsa-test.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "596"
- endingLineNumber = "596"
- landmarkName = "rsa_tests()"
+ startingLineNumber = "583"
+ endingLineNumber = "583"
+ landmarkName = "gpuv_test()"
landmarkType = "9">
</BreakpointContent>
</BreakpointProxy>
@@ -2863,9 +2863,9 @@
filePath = "../source/rsa-test.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "629"
- endingLineNumber = "629"
- landmarkName = "rsa_tests()"
+ startingLineNumber = "616"
+ endingLineNumber = "616"
+ landmarkName = "gpuv_test()"
landmarkType = "9">
<Locations>
<Location
@@ -2911,9 +2911,9 @@
filePath = "../source/rsa-test.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "583"
- endingLineNumber = "583"
- landmarkName = "rsa_tests()"
+ startingLineNumber = "570"
+ endingLineNumber = "570"
+ landmarkName = "gpuv_test()"
landmarkType = "9">
</BreakpointContent>
</BreakpointProxy>
@@ -2927,9 +2927,9 @@
filePath = "../source/rsa-test.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "624"
- endingLineNumber = "624"
- landmarkName = "rsa_tests()"
+ startingLineNumber = "611"
+ endingLineNumber = "611"
+ landmarkName = "gpuv_test()"
landmarkType = "9">
</BreakpointContent>
</BreakpointProxy>
@@ -2943,8 +2943,8 @@
filePath = "../source/rsa-test.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "374"
- endingLineNumber = "374"
+ startingLineNumber = "370"
+ endingLineNumber = "370"
landmarkName = "opencl_prepare(info, state, bases, b_len, exponents, e_len, moduli, m_len, signatures, s_len, pks, n)"
landmarkType = "9">
</BreakpointContent>
@@ -3007,9 +3007,9 @@
filePath = "../source/rsa-test.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "618"
- endingLineNumber = "618"
- landmarkName = "rsa_tests()"
+ startingLineNumber = "605"
+ endingLineNumber = "605"
+ landmarkName = "gpuv_test()"
landmarkType = "9">
</BreakpointContent>
</BreakpointProxy>
@@ -3023,9 +3023,9 @@
filePath = "../source/rsa-test.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "531"
- endingLineNumber = "531"
- landmarkName = "gpu_execute(info, state, bases, b_len, exponents, e_len, moduli, m_len, signatures, s_len, pks, n)"
+ startingLineNumber = "520"
+ endingLineNumber = "520"
+ landmarkName = "gpuv_execute(info, state, bases, b_len, exponents, e_len, moduli, m_len, signatures, s_len, pks, n)"
landmarkType = "9">
</BreakpointContent>
</BreakpointProxy>
@@ -3039,8 +3039,8 @@
filePath = "../source/rsa-test.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "449"
- endingLineNumber = "449"
+ startingLineNumber = "438"
+ endingLineNumber = "438"
landmarkName = "opencl_results(info, state, timed)"
landmarkType = "9">
<Locations>
@@ -3147,8 +3147,8 @@
filePath = "../source/rsa-test.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "390"
- endingLineNumber = "390"
+ startingLineNumber = "386"
+ endingLineNumber = "386"
landmarkName = "opencl_exec_kernel(info, state)"
landmarkType = "9">
</BreakpointContent>
@@ -3163,8 +3163,8 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "22"
- endingLineNumber = "22"
+ startingLineNumber = "21"
+ endingLineNumber = "21"
landmarkName = "montmodmult_pairs_from_files(x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)"
landmarkType = "9">
</BreakpointContent>
@@ -3179,8 +3179,8 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "93"
- endingLineNumber = "93"
+ startingLineNumber = "91"
+ endingLineNumber = "91"
landmarkName = "montmodmult_pairs_from_files(x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)"
landmarkType = "9">
<Locations>
@@ -3227,9 +3227,9 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "596"
- endingLineNumber = "596"
- landmarkName = "mont_modmult_tests()"
+ startingLineNumber = "591"
+ endingLineNumber = "591"
+ landmarkName = "gpuv_test_montg()"
landmarkType = "9">
<Locations>
<Location
@@ -3275,9 +3275,9 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "646"
- endingLineNumber = "646"
- landmarkName = "mont_modmult_tests()"
+ startingLineNumber = "641"
+ endingLineNumber = "641"
+ landmarkName = "gpuv_test_montg()"
landmarkType = "9">
<Locations>
<Location
@@ -3398,8 +3398,8 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "350"
- endingLineNumber = "350"
+ startingLineNumber = "346"
+ endingLineNumber = "346"
landmarkName = "modmult_opencl_prepare(info, state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)"
landmarkType = "9">
<Locations>
@@ -3446,8 +3446,8 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "340"
- endingLineNumber = "340"
+ startingLineNumber = "336"
+ endingLineNumber = "336"
landmarkName = "modmult_opencl_prepare(info, state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)"
landmarkType = "9">
</BreakpointContent>
@@ -3462,8 +3462,8 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "477"
- endingLineNumber = "477"
+ startingLineNumber = "473"
+ endingLineNumber = "473"
landmarkName = "modmult_opencl_results(info, state, timed, msg_buf, r_1_buf, n_buf, s_buf, n)"
landmarkType = "9">
<Locations>
@@ -3525,8 +3525,8 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "352"
- endingLineNumber = "352"
+ startingLineNumber = "348"
+ endingLineNumber = "348"
landmarkName = "modmult_opencl_prepare(info, state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)"
landmarkType = "9">
<Locations>
@@ -3573,8 +3573,8 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "428"
- endingLineNumber = "428"
+ startingLineNumber = "424"
+ endingLineNumber = "424"
landmarkName = "modmult_opencl_exec_kernel(info, state)"
landmarkType = "9">
<Locations>
@@ -3621,8 +3621,8 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "425"
- endingLineNumber = "425"
+ startingLineNumber = "421"
+ endingLineNumber = "421"
landmarkName = "modmult_opencl_exec_kernel(info, state)"
landmarkType = "9">
<Locations>
@@ -3714,8 +3714,8 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "73"
- endingLineNumber = "73"
+ startingLineNumber = "71"
+ endingLineNumber = "71"
landmarkName = "montmodmult_pairs_from_files(x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)"
landmarkType = "9">
</BreakpointContent>
@@ -3730,9 +3730,9 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "571"
- endingLineNumber = "571"
- landmarkName = "modmult_gpu_execute(info, state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)"
+ startingLineNumber = "566"
+ endingLineNumber = "566"
+ landmarkName = "gpuv_execute_montg(info, state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)"
landmarkType = "9">
<Locations>
<Location
@@ -3778,8 +3778,8 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "372"
- endingLineNumber = "372"
+ startingLineNumber = "368"
+ endingLineNumber = "368"
landmarkName = "modmult_opencl_prepare(info, state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)"
landmarkType = "9">
</BreakpointContent>
@@ -3794,8 +3794,8 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "460"
- endingLineNumber = "460"
+ startingLineNumber = "456"
+ endingLineNumber = "456"
landmarkName = "modmult_opencl_results(info, state, timed, msg_buf, r_1_buf, n_buf, s_buf, n)"
landmarkType = "9">
<Locations>
@@ -3903,8 +3903,8 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "89"
- endingLineNumber = "89"
+ startingLineNumber = "87"
+ endingLineNumber = "87"
landmarkName = "montmodmult_pairs_from_files(x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)"
landmarkType = "9">
</BreakpointContent>
@@ -3919,8 +3919,8 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "82"
- endingLineNumber = "82"
+ startingLineNumber = "80"
+ endingLineNumber = "80"
landmarkName = "montmodmult_pairs_from_files(x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)"
landmarkType = "9">
</BreakpointContent>
@@ -3935,9 +3935,9 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "639"
- endingLineNumber = "639"
- landmarkName = "mont_modmult_tests()"
+ startingLineNumber = "634"
+ endingLineNumber = "634"
+ landmarkName = "gpuv_test_montg()"
landmarkType = "9">
</BreakpointContent>
</BreakpointProxy>
@@ -3951,9 +3951,9 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "600"
- endingLineNumber = "600"
- landmarkName = "mont_modmult_tests()"
+ startingLineNumber = "595"
+ endingLineNumber = "595"
+ landmarkName = "gpuv_test_montg()"
landmarkType = "9">
</BreakpointContent>
</BreakpointProxy>
@@ -3969,7 +3969,7 @@
endingColumnNumber = "9223372036854775807"
startingLineNumber = "33"
endingLineNumber = "33"
- landmarkName = "number_of_pairs()"
+ landmarkName = "gpuv_estimate_pairs()"
landmarkType = "9">
</BreakpointContent>
</BreakpointProxy>
@@ -3983,8 +3983,8 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "421"
- endingLineNumber = "421"
+ startingLineNumber = "417"
+ endingLineNumber = "417"
landmarkName = "modmult_opencl_exec_kernel(info, state)"
landmarkType = "9">
</BreakpointContent>
@@ -3993,14 +3993,14 @@
BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
<BreakpointContent
uuid = "2E10F90D-6276-4B90-B2B1-8111E48FD074"
- shouldBeEnabled = "Yes"
+ shouldBeEnabled = "No"
ignoreCount = "0"
continueAfterRunningActions = "No"
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "318"
- endingLineNumber = "318"
+ startingLineNumber = "315"
+ endingLineNumber = "315"
landmarkName = "modmult_opencl_convert(state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)"
landmarkType = "9">
<Locations>
@@ -4154,6 +4154,21 @@
endingLineNumber = "312"
offsetFromSymbolStart = "940">
</Location>
+ <Location
+ uuid = "2E10F90D-6276-4B90-B2B1-8111E48FD074 - 6fe7da94a4c07cf2"
+ shouldBeEnabled = "Yes"
+ ignoreCount = "0"
+ continueAfterRunningActions = "No"
+ symbolName = "modmult_opencl_convert"
+ moduleName = "lib-gpu-verify"
+ usesParentBreakpointCondition = "Yes"
+ urlString = "file:///Users/cedriczwahlen/libgpuverify/source/montmodmult.c"
+ startingColumnNumber = "9223372036854775807"
+ endingColumnNumber = "9223372036854775807"
+ startingLineNumber = "315"
+ endingLineNumber = "315"
+ offsetFromSymbolStart = "954">
+ </Location>
</Locations>
</BreakpointContent>
</BreakpointProxy>
@@ -4167,8 +4182,8 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "270"
- endingLineNumber = "270"
+ startingLineNumber = "267"
+ endingLineNumber = "267"
landmarkName = "modmult_opencl_convert(state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)"
landmarkType = "9">
</BreakpointContent>
@@ -4183,8 +4198,8 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "283"
- endingLineNumber = "283"
+ startingLineNumber = "280"
+ endingLineNumber = "280"
landmarkName = "modmult_opencl_convert(state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)"
landmarkType = "9">
<Locations>
@@ -4231,8 +4246,8 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "290"
- endingLineNumber = "290"
+ startingLineNumber = "287"
+ endingLineNumber = "287"
landmarkName = "modmult_opencl_convert(state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)"
landmarkType = "9">
</BreakpointContent>
@@ -4241,14 +4256,14 @@
BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
<BreakpointContent
uuid = "2A4E0B7E-B255-4271-B259-62B34ABE9D3E"
- shouldBeEnabled = "Yes"
+ shouldBeEnabled = "No"
ignoreCount = "0"
continueAfterRunningActions = "No"
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "293"
- endingLineNumber = "293"
+ startingLineNumber = "290"
+ endingLineNumber = "290"
landmarkName = "modmult_opencl_convert(state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)"
landmarkType = "9">
<Locations>
@@ -4357,6 +4372,21 @@
endingLineNumber = "288"
offsetFromSymbolStart = "477">
</Location>
+ <Location
+ uuid = "2A4E0B7E-B255-4271-B259-62B34ABE9D3E - 6fe7da94a4c07bca"
+ shouldBeEnabled = "Yes"
+ ignoreCount = "0"
+ continueAfterRunningActions = "No"
+ symbolName = "modmult_opencl_convert"
+ moduleName = "lib-gpu-verify"
+ usesParentBreakpointCondition = "Yes"
+ urlString = "file:///Users/cedriczwahlen/libgpuverify/source/montmodmult.c"
+ startingColumnNumber = "9223372036854775807"
+ endingColumnNumber = "9223372036854775807"
+ startingLineNumber = "291"
+ endingLineNumber = "291"
+ offsetFromSymbolStart = "491">
+ </Location>
</Locations>
</BreakpointContent>
</BreakpointProxy>
@@ -4370,8 +4400,8 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "310"
- endingLineNumber = "310"
+ startingLineNumber = "307"
+ endingLineNumber = "307"
landmarkName = "modmult_opencl_convert(state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)"
landmarkType = "9">
<Locations>
@@ -4418,8 +4448,8 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "305"
- endingLineNumber = "305"
+ startingLineNumber = "302"
+ endingLineNumber = "302"
landmarkName = "modmult_opencl_convert(state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)"
landmarkType = "9">
</BreakpointContent>
@@ -4434,8 +4464,8 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "311"
- endingLineNumber = "311"
+ startingLineNumber = "308"
+ endingLineNumber = "308"
landmarkName = "modmult_opencl_convert(state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)"
landmarkType = "9">
<Locations>
@@ -4482,8 +4512,8 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "312"
- endingLineNumber = "312"
+ startingLineNumber = "309"
+ endingLineNumber = "309"
landmarkName = "modmult_opencl_convert(state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)"
landmarkType = "9">
<Locations>
@@ -4530,8 +4560,8 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "155"
- endingLineNumber = "155"
+ startingLineNumber = "153"
+ endingLineNumber = "153"
landmarkName = "convert_thread(vargp)"
landmarkType = "9">
<Locations>
@@ -4578,8 +4608,8 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "194"
- endingLineNumber = "194"
+ startingLineNumber = "192"
+ endingLineNumber = "192"
landmarkName = "convert_thread(vargp)"
landmarkType = "9">
</BreakpointContent>
diff --git a/xcode/montgomery.cl b/xcode/montgomery.cl
@@ -1,2954 +0,0 @@
-/* this kernel contains code of the mini-gmp, a minimalistic implementation of a GNU GMP subset.
-
- Contributed to the GNU project by Niels Möller
- Additional functionalities and improvements by Marco Bodrato.
-
- Changes and additions for this kernel by Cedric Zwahlen
-
-Copyright 1991-1997, 1999-2022 Free Software Foundation, Inc.
-
-This file contains code that is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/.
-
- */
-
-
-#ifndef MINI_GMP_LIMB_TYPE
-#define MINI_GMP_LIMB_TYPE long
-#endif
-
-
-#define ULONG_MAX_gpu 0xFFFFFFFFUL
-
-#define GMP_LIMB_BITS (sizeof(mp_limb_t) * CHAR_BIT)
-
-#define GMP_LIMB_MAX ((mp_limb_t) ~ (mp_limb_t) 0)
-#define GMP_LIMB_HIGHBIT ((mp_limb_t) 1 << (GMP_LIMB_BITS - 1))
-
-#define GMP_HLIMB_BIT ((mp_limb_t) 1 << (GMP_LIMB_BITS / 2))
-#define GMP_LLIMB_MASK (GMP_HLIMB_BIT - 1)
-
-#define GMP_ULONG_BITS (sizeof(unsigned long) * CHAR_BIT)
-#define GMP_ULONG_HIGHBIT ((unsigned long) 1 << (GMP_ULONG_BITS - 1))
-
-#define GMP_ABS(x) ((x) >= 0 ? (x) : -(x))
-#define GMP_NEG_CAST(T,x) (-((T)((x) + 1) - 1))
-
-#define GMP_MIN(a, b) ((a) < (b) ? (a) : (b))
-#define GMP_MAX(a, b) ((a) > (b) ? (a) : (b))
-
-#define GMP_CMP(a,b) (((a) > (b)) - ((a) < (b)))
-
-#define GMP_MPN_OVERLAP_P(xp, xsize, yp, ysize) \
- ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
-
-
-#define gmp_clz(count, x) do { \
- mp_limb_t __clz_x = (x); \
- unsigned __clz_c = 0; \
- int LOCAL_SHIFT_BITS = 8; \
- if (GMP_LIMB_BITS > LOCAL_SHIFT_BITS) \
- for (; \
- (__clz_x & ((mp_limb_t) 0xff << (GMP_LIMB_BITS - 8))) == 0; \
- __clz_c += 8) \
- { __clz_x <<= LOCAL_SHIFT_BITS; } \
- for (; (__clz_x & GMP_LIMB_HIGHBIT) == 0; __clz_c++) \
- __clz_x <<= 1; \
- (count) = __clz_c; \
- } while (0)
-
-#define gmp_umullo_limb(u, v) \
- ((sizeof(mp_limb_t) >= sizeof(int)) ? (u)*(v) : (unsigned int)(u) * (v))
-
-#define gmp_umul_ppmm(w1, w0, u, v) \
- do { \
- int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS; \
- if (sizeof(unsigned int) * CHAR_BIT >= 2 * GMP_LIMB_BITS) \
- { \
- unsigned int __ww = (unsigned int) (u) * (v); \
- w0 = (mp_limb_t) __ww; \
- w1 = (mp_limb_t) (__ww >> LOCAL_GMP_LIMB_BITS); \
- } \
- else if (GMP_ULONG_BITS >= 2 * GMP_LIMB_BITS) \
- { \
- unsigned long int __ww = (unsigned long int) (u) * (v); \
- w0 = (mp_limb_t) __ww; \
- w1 = (mp_limb_t) (__ww >> LOCAL_GMP_LIMB_BITS); \
- } \
- else { \
- mp_limb_t __x0, __x1, __x2, __x3; \
- unsigned __ul, __vl, __uh, __vh; \
- mp_limb_t __u = (u), __v = (v); \
- assert (sizeof (unsigned) * 2 >= sizeof (mp_limb_t)); \
- \
- __ul = __u & GMP_LLIMB_MASK; \
- __uh = __u >> (GMP_LIMB_BITS / 2); \
- __vl = __v & GMP_LLIMB_MASK; \
- __vh = __v >> (GMP_LIMB_BITS / 2); \
- \
- __x0 = (mp_limb_t) __ul * __vl; \
- __x1 = (mp_limb_t) __ul * __vh; \
- __x2 = (mp_limb_t) __uh * __vl; \
- __x3 = (mp_limb_t) __uh * __vh; \
- \
- __x1 += __x0 >> (GMP_LIMB_BITS / 2);/* this can't give carry */ \
- __x1 += __x2; /* but this indeed can */ \
- if (__x1 < __x2) /* did we get it? */ \
- __x3 += GMP_HLIMB_BIT; /* yes, add it in the proper pos. */ \
- \
- (w1) = __x3 + (__x1 >> (GMP_LIMB_BITS / 2)); \
- (w0) = (__x1 << (GMP_LIMB_BITS / 2)) + (__x0 & GMP_LLIMB_MASK); \
- } \
- } while (0)
-
-#define gmp_assert_nocarry(x) do { \
- mp_limb_t __cy = (x); \
- assert (__cy == 0); \
- (void) (__cy); \
- } while (0)
-
-#define gmp_add_ssaaaa(sh, sl, ah, al, bh, bl) \
- do { \
- mp_limb_t __x; \
- __x = (al) + (bl); \
- (sh) = (ah) + (bh) + (__x < (al)); \
- (sl) = __x; \
- } while (0)
-
-#define gmp_sub_ddmmss(sh, sl, ah, al, bh, bl) \
- do { \
- mp_limb_t __x; \
- __x = (al) - (bl); \
- (sh) = (ah) - (bh) - ((al) < (bl)); \
- (sl) = __x; \
- } while (0)
-
-
-#define gmp_udiv_qrnnd_preinv(q, r, nh, nl, d, di) \
- do { \
- mp_limb_t _qh, _ql, _r, _mask; \
- gmp_umul_ppmm (_qh, _ql, (nh), (di)); \
- gmp_add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \
- _r = (nl) - gmp_umullo_limb (_qh, (d)); \
- _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \
- _qh += _mask; \
- _r += _mask & (d); \
- if (_r >= (d)) \
- { \
- _r -= (d); \
- _qh++; \
- } \
- \
- (r) = _r; \
- (q) = _qh; \
- } while (0)
-
-#define gmp_udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv) \
- do { \
- mp_limb_t _q0, _t1, _t0, _mask; \
- gmp_umul_ppmm ((q), _q0, (n2), (dinv)); \
- gmp_add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1)); \
- \
- /* Compute the two most significant limbs of n - q'd */ \
- (r1) = (n1) - gmp_umullo_limb ((d1), (q)); \
- gmp_sub_ddmmss ((r1), (r0), (r1), (n0), (d1), (d0)); \
- gmp_umul_ppmm (_t1, _t0, (d0), (q)); \
- gmp_sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0); \
- (q)++; \
- \
- /* Conditionally adjust q and the remainders */ \
- _mask = - (mp_limb_t) ((r1) >= _q0); \
- (q) += _mask; \
- gmp_add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0)); \
- if ((r1) >= (d1)) \
- { \
- if ((r1) > (d1) || (r0) >= (d0)) \
- { \
- (q)++; \
- gmp_sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0)); \
- } \
- } \
- } while (0)
-
-#define gmp_ctz(count, x) do { \
- mp_limb_t __ctz_x = (x); \
- unsigned __ctz_c = 0; \
- gmp_clz (__ctz_c, __ctz_x & - __ctz_x); \
- (count) = GMP_LIMB_BITS - 1 - __ctz_c; \
- } while (0)
-
-
-#define MPZ_SRCPTR_SWAP(x, y) \
- do { \
- mpz_srcptr __mpz_srcptr_swap__tmp = (x); \
- (x) = (y); \
- (y) = __mpz_srcptr_swap__tmp; \
- } while (0)
-
-#define MP_SIZE_T_SWAP(x, y) \
- do { \
- mp_size_t __mp_size_t_swap__tmp = (x); \
- (x) = (y); \
- (y) = __mp_size_t_swap__tmp; \
- } while (0)
-
-#define MPZ_PTR_SWAP(x, y) \
- do { \
- mpz_ptr __mpz_ptr_swap__tmp = (x); \
- (x) = (y); \
- (y) = __mpz_ptr_swap__tmp; \
- } while (0)
-
-#define MP_BITCNT_T_SWAP(x,y) \
- do { \
- mp_bitcnt_t __mp_bitcnt_t_swap__tmp = (x); \
- (x) = (y); \
- (y) = __mp_bitcnt_t_swap__tmp; \
- } while (0)
-
-
-#define assert(x){if((x)==0){printf((char __constant *)"assert reached\n");}}
-
-#define NULL ((void*)0)
-
-typedef unsigned MINI_GMP_LIMB_TYPE mp_limb_t;
-typedef long mp_size_t;
-typedef unsigned long mp_bitcnt_t;
-
-typedef mp_limb_t *mp_ptr;
-typedef const mp_limb_t *mp_srcptr;
-
-typedef struct
-{
- int _mp_alloc; /* Number of *limbs* allocated and pointed
- to by the _mp_d field. */
- int _mp_size; /* abs(_mp_size) is the number of limbs the
- last field points to. If _mp_size is
- negative this is a negative number. */
- //mp_limb_t *_mp_d; /* Pointer to the limbs. */
-
- mp_limb_t _mp_d[256];
-
-} __mpz_struct;
-
-typedef __mpz_struct mpz_t[1];
-
-typedef __mpz_struct *mpz_ptr;
-
-typedef const __mpz_struct *mpz_srcptr;
-
-struct gmp_div_inverse
-{
- /* Normalization shift count. */
- unsigned shift;
- /* Normalized divisor (d0 unused for mpn_div_qr_1) */
- mp_limb_t d1, d0;
- /* Inverse, for 2/1 or 3/2. */
- mp_limb_t di;
-};
-
-
-struct mpn_base_info
-{
- /* bb is the largest power of the base which fits in one limb, and
- exp is the corresponding exponent. */
- unsigned exp;
- mp_limb_t bb;
-};
-
-
-enum mpz_div_round_mode { GMP_DIV_FLOOR, GMP_DIV_CEIL, GMP_DIV_TRUNC };
-
-void mpz_init (mpz_t r);
-void mpn_copyi (mp_ptr d, mp_srcptr s, mp_size_t n);
-void mpz_set (mpz_t r, const mpz_t x);
-void
-mpz_set (mpz_t r, const mpz_t x);
-void
-mpz_set_ui (mpz_t r, unsigned long int x);
-void
-mpz_set_si (mpz_t r, signed long int x);
-void
-mpz_init_set_si (mpz_t r, signed long int x);
-void
-mpz_init_set (mpz_t r, const mpz_t x);
-void
-mpz_init2 (mpz_t r, mp_bitcnt_t bits);
-void
-mpz_init_set_ui (mpz_t r, unsigned long int x);
-void
-mpz_clear (mpz_t r);
-void
-gmp_die (const char *msg);
-
-
-mp_size_t mpn_normalized_size (mp_srcptr xp, mp_size_t n);
-void
-mpz_add_ui (mpz_t r, const mpz_t a, unsigned long b);
-void
-mpz_ui_sub (mpz_t r, unsigned long a, const mpz_t b);
-void
-mpz_sub_ui (mpz_t r, const mpz_t a, unsigned long b);
-int
-mpn_absfits_ulong_p (mp_srcptr up, mp_size_t un);
-unsigned long int
-mpz_get_ui (const mpz_t u);
-int
-mpz_cmpabs_ui (const mpz_t u, unsigned long v);
-mp_limb_t
-mpn_sub_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b);
-mp_limb_t
-mpn_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n);
-mp_limb_t
-mpn_sub (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn);
-mp_limb_t
-mpn_invert_3by2 (mp_limb_t u1, mp_limb_t u0);
-int
-mpz_div_qr (mpz_t q, mpz_t r,
- const mpz_t n, const mpz_t d, enum mpz_div_round_mode mode);
-void
-mpz_mod (mpz_t r, const mpz_t n, const mpz_t d);
-void
-mpn_div_qr_1_invert (struct gmp_div_inverse *inv, mp_limb_t d);
-
-void
-mpn_div_qr_2_invert (struct gmp_div_inverse *inv,
- mp_limb_t d1, mp_limb_t d0);
-
-void
-mpn_div_qr_invert (struct gmp_div_inverse *inv,
- mp_srcptr dp, mp_size_t dn);
-int
-mpz_cmp_ui (const mpz_t u, unsigned long v);
-int
-mpn_cmp (mp_srcptr ap, mp_srcptr bp, mp_size_t n);
-mp_limb_t
-mpn_lshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt);
-mp_limb_t
-mpn_rshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt);
-int
-mpz_invert (mpz_t r, const mpz_t u, const mpz_t m);
-mp_limb_t
-mpn_div_qr_1_preinv (mp_ptr qp, mp_srcptr np, mp_size_t nn,
- const struct gmp_div_inverse *inv);
-mp_limb_t
-mpn_add_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n);
-void
-mpn_div_qr_2_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn,
- const struct gmp_div_inverse *inv);
-mp_limb_t
-mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl);
-void
-mpn_div_qr_pi1 (mp_ptr qp,
- mp_ptr np, mp_size_t nn, mp_limb_t n1,
- mp_srcptr dp, mp_size_t dn,
- mp_limb_t dinv);
-void
-mpn_div_qr_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn,
- mp_srcptr dp, mp_size_t dn,
- const struct gmp_div_inverse *inv);
-void
-mpz_powm (mpz_t r, const mpz_t b, const mpz_t e, const mpz_t m);
-int
-mpn_cmp4 (mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn);
-mp_size_t
-mpz_abs_sub (mpz_t r, const mpz_t a, const mpz_t b);
-mp_limb_t
-mpn_add_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b);
-mp_limb_t
-mpn_add (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn);
-mp_size_t
-mpz_abs_add (mpz_t r, const mpz_t a, const mpz_t b);
-void
-mpz_sub (mpz_t r, const mpz_t a, const mpz_t b);
-mp_limb_t
-mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl);
-mp_limb_t
-mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl);
-mp_limb_t
-mpn_mul (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn);
-void
-mpz_mul (mpz_t r, const mpz_t u, const mpz_t v);
-void
-mpn_copyd (mp_ptr d, mp_srcptr s, mp_size_t n);
-void
-mpn_zero (mp_ptr rp, mp_size_t n);
-void
-mpz_mul_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bits);
-int
-mpn_zero_p(mp_srcptr rp, mp_size_t n);
-void
-mpz_div_q_2exp (mpz_t q, const mpz_t u, mp_bitcnt_t bit_index,
- enum mpz_div_round_mode mode);
-void
-mpz_tdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt);
-int
-mpz_cmp (const mpz_t a, const mpz_t b);
-void
-mpz_add (mpz_t r, const mpz_t a, const mpz_t b);
-int
-mpz_tstbit (const mpz_t d, mp_bitcnt_t bit_index);
-mp_bitcnt_t
-mpn_limb_size_in_base_2 (mp_limb_t u);
-size_t
-mpz_sizeinbase (const mpz_t u, int base);
-int
-mpz_sgn (const mpz_t u);
-mp_bitcnt_t
-mpn_common_scan (mp_limb_t limb, mp_size_t i, mp_srcptr up, mp_size_t un,
- mp_limb_t ux);
-mp_bitcnt_t
-mpn_scan1 (mp_srcptr ptr, mp_bitcnt_t bit);
-mp_bitcnt_t
-mpz_scan1 (mpz_t u, mp_bitcnt_t starting_bit);
-mp_bitcnt_t
-mpz_make_odd (mpz_t r);
-void
-mpz_tdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d);
-void
-mpz_abs_add_bit (mpz_t d, mp_bitcnt_t bit_index);
-void
-mpz_abs_sub_bit (mpz_t d, mp_bitcnt_t bit_index);
-void
-mpz_setbit (mpz_t d, mp_bitcnt_t bit_index);
-void
-mpz_divexact (mpz_t q, const mpz_t n, const mpz_t d);
-int
-mpz_cmpabs (const mpz_t u, const mpz_t v);
-void
-mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v);
-void
-mpz_addmul_ui (mpz_t r, const mpz_t u, unsigned long int v);
-
-unsigned
-mpn_base_power_of_two_p (unsigned b);
-void
-mpn_get_base_info (struct mpn_base_info *info, mp_limb_t b);
-int isspace_gpu(unsigned char c);
-int strlen_c(__global char *c);
-mp_size_t mpn_set_str_bits (mp_ptr rp, const unsigned char *sp, size_t sn,
- unsigned bits);
-mp_size_t
-mpn_set_str_other (mp_ptr rp, const unsigned char *sp, size_t sn,
- mp_limb_t b, const struct mpn_base_info *info);
-int
-mpz_set_str (mpz_t r, __global char *sp, int base);
-int
-mpz_init_set_str (mpz_t r, __global char *sp, int base);
-
-//void mpz_sub (mpz_t r, const mpz_t a, const mpz_t b);
-////void mpz_add (mpz_t, const mpz_t, const mpz_t);
-
-void mpz_abs (mpz_t, const mpz_t);
-
-void mpz_neg (mpz_t, const mpz_t);
-void mpz_swap (mpz_t, mpz_t);
-//void mpz_mod (mpz_t, const mpz_t, const mpz_t);
-//
-////int mpz_sgn (const mpz_t);
-//
-////void mpz_mul (mpz_t, const mpz_t, const mpz_t);
-//void mpz_mul_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
-//
-//void mpz_gcdext (mpz_t, mpz_t, mpz_t, const mpz_t, const mpz_t);
-////void mpz_powm (mpz_t, const mpz_t, const mpz_t, const mpz_t);
-//
-void mpz_addmul (mpz_t, const mpz_t, const mpz_t);
-//
-//int mpz_tstbit (const mpz_t, mp_bitcnt_t);
-//
-//int mpz_cmp_ui (const mpz_t u, unsigned long v);
-//
-void mpn_div_qr (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn);
-//
-//mp_limb_t mpn_invert_3by2 (mp_limb_t, mp_limb_t);
-
-void
-mpz_set_lg (unsigned long *r, __global unsigned long *x);
-
-#define mpn_invert_limb(x) mpn_invert_3by2 ((x), 0)
-
-#define MPZ_REALLOC(z,n) (z)->_mp_d
-
-void
-mpz_init (mpz_t r)
-{
- const mp_limb_t dummy_limb = GMP_LIMB_MAX & 0xc1a0;
-
- r->_mp_alloc = 0;
- r->_mp_size = 0;
-
- //memset(r->_mp_d, 0, 256);
-
- for (int i = 0; i < 256; i++) {
- r->_mp_d[i] = 0;
- }
-
- // r->_mp_d = (mp_ptr) &dummy_limb;
-}
-
-void
-mpn_copyi (mp_ptr d, mp_srcptr s, mp_size_t n)
-{
- mp_size_t i;
- for (i = 0; i < n; i++)
- d[i] = s[i];
-}
-
-void
-mpz_set (mpz_t r, const mpz_t x)
-{
- /* Allow the NOP r == x */
- if (r != x)
- {
- mp_size_t n;
- mp_ptr rp;
-
- n = GMP_ABS (x->_mp_size);
- rp = MPZ_REALLOC (r, n);
-
- mpn_copyi (rp, x->_mp_d, n);
- r->_mp_size = x->_mp_size;
- }
-}
-
-void
-mpz_set_lg (unsigned long *r, __global unsigned long *x)
-{
-
-
-
-
- // event_t wait;
-
- //wait = async_work_group_strided_copy(r,x, 256 + 2, 4,0);
-
-
-
- //wait_group_events(0,&wait);
-
- r[0] = x[0];
- r[1] = x[1];
-
- for (int i = 2; i < 256; i++) {
-
- r[i] = x[i];
-
- }
-
- //printf((__constant char *)"%i\n",r->_mp_size);
-
- // memcpy(r->_mp_d,(*(mpz_t *)x)->_mp_d,256);
-
-}
-
-
-void
-mpz_set_ui (mpz_t r, unsigned long int x)
-{
- if (x > 0)
- {
- r->_mp_size = 1;
- MPZ_REALLOC (r, 1)[0] = x;
- if (GMP_LIMB_BITS < GMP_ULONG_BITS)
- {
- int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS;
- while (x >>= LOCAL_GMP_LIMB_BITS)
- {
- ++ r->_mp_size;
- MPZ_REALLOC (r, r->_mp_size)[r->_mp_size - 1] = x;
- }
- }
- }
- else
- r->_mp_size = 0;
-}
-
-
-void
-mpz_neg (mpz_t r, const mpz_t u)
-{
- mpz_set (r, u);
- r->_mp_size = -r->_mp_size;
-}
-
-
-void
-mpz_set_si (mpz_t r, signed long int x)
-{
- if (x >= 0)
- mpz_set_ui (r, x);
- else /* (x < 0) */
- if (GMP_LIMB_BITS < GMP_ULONG_BITS)
- {
- mpz_set_ui (r, GMP_NEG_CAST (unsigned long int, x));
- mpz_neg (r, r);
- }
- else
- {
- r->_mp_size = -1;
- MPZ_REALLOC (r, 1)[0] = GMP_NEG_CAST (unsigned long int, x);
- }
-}
-
-void
-mpz_init_set_si (mpz_t r, signed long int x)
-{
- mpz_init (r);
- mpz_set_si (r, x);
-}
-
-
-void
-mpz_init_set (mpz_t r, const mpz_t x)
-{
- mpz_init (r);
- mpz_set (r, x);
-}
-
-void
-mpz_init2 (mpz_t r, mp_bitcnt_t bits)
-{
- mp_size_t rn;
-
- bits -= (bits != 0); /* Round down, except if 0 */
- rn = 1 + bits / GMP_LIMB_BITS;
-
- r->_mp_alloc = rn;
- r->_mp_size = 0;
- // r->_mp_d = gmp_alloc_limbs (rn);
-}
-
-void
-mpz_init_set_ui (mpz_t r, unsigned long int x)
-{
- mpz_init (r);
- mpz_set_ui (r, x);
-}
-
-void
-mpz_clear (mpz_t r)
-{
- //if (r->_mp_alloc)
- //gmp_free_limbs (r->_mp_d, r->_mp_alloc);
-}
-
-
-void
-gmp_die (const char *msg)
-{
- //fprintf (stderr, "%s\n", msg);
- //abort();
-}
-
-mp_size_t mpn_normalized_size (mp_srcptr xp, mp_size_t n)
-{
- while (n > 0 && xp[n-1] == 0)
- --n;
- return n;
-}
-
-void
-mpz_add_ui (mpz_t r, const mpz_t a, unsigned long b)
-{
- mpz_t bb;
- mpz_init_set_ui (bb, b);
- mpz_add (r, a, bb);
- mpz_clear (bb);
-}
-
-void
-mpz_ui_sub (mpz_t r, unsigned long a, const mpz_t b)
-{
- mpz_neg (r, b);
- mpz_add_ui (r, r, a);
-}
-
-
-void
-mpz_sub_ui (mpz_t r, const mpz_t a, unsigned long b)
-{
- mpz_ui_sub (r, b, a);
- mpz_neg (r, r);
-}
-
-int
-mpn_absfits_ulong_p (mp_srcptr up, mp_size_t un)
-{
- int ulongsize = GMP_ULONG_BITS / GMP_LIMB_BITS;
- mp_limb_t ulongrem = 0;
-
- if (GMP_ULONG_BITS % GMP_LIMB_BITS != 0)
- ulongrem = (mp_limb_t) (ULONG_MAX_gpu >> GMP_LIMB_BITS * ulongsize) + 1;
-
- return un <= ulongsize || (up[ulongsize] < ulongrem && un == ulongsize + 1);
-}
-
-unsigned long int
-mpz_get_ui (const mpz_t u)
-{
- if (GMP_LIMB_BITS < GMP_ULONG_BITS)
- {
- int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS;
- unsigned long r = 0;
- mp_size_t n = GMP_ABS (u->_mp_size);
- n = GMP_MIN (n, 1 + (mp_size_t) (GMP_ULONG_BITS - 1) / GMP_LIMB_BITS);
- while (--n >= 0)
- r = (r << LOCAL_GMP_LIMB_BITS) + u->_mp_d[n];
- return r;
- }
-
- return u->_mp_size == 0 ? 0 : u->_mp_d[0];
-}
-
-int
-mpz_cmpabs_ui (const mpz_t u, unsigned long v)
-{
- mp_size_t un = GMP_ABS (u->_mp_size);
-
- if (! mpn_absfits_ulong_p (u->_mp_d, un))
- return 1;
- else
- {
- unsigned long uu = mpz_get_ui (u);
- return GMP_CMP(uu, v);
- }
-}
-
-mp_limb_t
-mpn_sub_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b)
-{
- mp_size_t i;
-
- assert (n > 0);
-
- i = 0;
- do
- {
- mp_limb_t a = ap[i];
- /* Carry out */
- mp_limb_t cy = a < b;
- rp[i] = a - b;
- b = cy;
- }
- while (++i < n);
-
- return b;
-}
-
-mp_limb_t
-mpn_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
-{
- mp_size_t i;
- mp_limb_t cy;
-
- for (i = 0, cy = 0; i < n; i++)
- {
- mp_limb_t a, b;
- a = ap[i]; b = bp[i];
- b += cy;
- cy = (b < cy);
- cy += (a < b);
- rp[i] = a - b;
- }
- return cy;
-}
-
-mp_limb_t
-mpn_sub (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
-{
- mp_limb_t cy;
-
- assert (an >= bn);
-
- cy = mpn_sub_n (rp, ap, bp, bn);
- if (an > bn)
- cy = mpn_sub_1 (rp + bn, ap + bn, an - bn, cy);
- return cy;
-}
-
-
-mp_limb_t
-mpn_invert_3by2 (mp_limb_t u1, mp_limb_t u0)
-{
- mp_limb_t r, m;
-
- {
- mp_limb_t p, ql;
- unsigned ul, uh, qh;
-
- assert (sizeof (unsigned) * 2 >= sizeof (mp_limb_t));
- /* For notation, let b denote the half-limb base, so that B = b^2.
- Split u1 = b uh + ul. */
- ul = u1 & GMP_LLIMB_MASK;
- uh = u1 >> (GMP_LIMB_BITS / 2);
-
- /* Approximation of the high half of quotient. Differs from the 2/1
- inverse of the half limb uh, since we have already subtracted
- u0. */
- qh = (u1 ^ GMP_LIMB_MAX) / uh;
-
- /* Adjust to get a half-limb 3/2 inverse, i.e., we want
-
- qh' = floor( (b^3 - 1) / u) - b = floor ((b^3 - b u - 1) / u
- = floor( (b (~u) + b-1) / u),
-
- and the remainder
-
- r = b (~u) + b-1 - qh (b uh + ul)
- = b (~u - qh uh) + b-1 - qh ul
-
- Subtraction of qh ul may underflow, which implies adjustments.
- But by normalization, 2 u >= B > qh ul, so we need to adjust by
- at most 2.
- */
-
- r = ((~u1 - (mp_limb_t) qh * uh) << (GMP_LIMB_BITS / 2)) | GMP_LLIMB_MASK;
-
- p = (mp_limb_t) qh * ul;
- /* Adjustment steps taken from udiv_qrnnd_c */
- if (r < p)
- {
- qh--;
- r += u1;
- if (r >= u1) /* i.e. we didn't get carry when adding to r */
- if (r < p)
- {
- qh--;
- r += u1;
- }
- }
- r -= p;
-
- /* Low half of the quotient is
-
- ql = floor ( (b r + b-1) / u1).
-
- This is a 3/2 division (on half-limbs), for which qh is a
- suitable inverse. */
-
- p = (r >> (GMP_LIMB_BITS / 2)) * qh + r;
- /* Unlike full-limb 3/2, we can add 1 without overflow. For this to
- work, it is essential that ql is a full mp_limb_t. */
- ql = (p >> (GMP_LIMB_BITS / 2)) + 1;
-
- /* By the 3/2 trick, we don't need the high half limb. */
- r = (r << (GMP_LIMB_BITS / 2)) + GMP_LLIMB_MASK - ql * u1;
-
- if (r >= (GMP_LIMB_MAX & (p << (GMP_LIMB_BITS / 2))))
- {
- ql--;
- r += u1;
- }
- m = ((mp_limb_t) qh << (GMP_LIMB_BITS / 2)) + ql;
- if (r >= u1)
- {
- m++;
- r -= u1;
- }
- }
-
- /* Now m is the 2/1 inverse of u1. If u0 > 0, adjust it to become a
- 3/2 inverse. */
- if (u0 > 0)
- {
- mp_limb_t th, tl;
- r = ~r;
- r += u0;
- if (r < u0)
- {
- m--;
- if (r >= u1)
- {
- m--;
- r -= u1;
- }
- r -= u1;
- }
- gmp_umul_ppmm (th, tl, u0, m);
- r += th;
- if (r < th)
- {
- m--;
- m -= ((r > u1) | ((r == u1) & (tl > u0)));
- }
- }
-
- return m;
-}
-
-int
-mpz_div_qr (mpz_t q, mpz_t r,
- const mpz_t n, const mpz_t d, enum mpz_div_round_mode mode)
-{
- mp_size_t ns, ds, nn, dn, qs;
- ns = n->_mp_size;
- ds = d->_mp_size;
-
- if (ds == 0) {
-
- }
- //gmp_die("mpz_div_qr: Divide by zero.");
-
- if (ns == 0)
- {
- if (q)
- q->_mp_size = 0;
- if (r)
- r->_mp_size = 0;
- return 0;
- }
-
- nn = GMP_ABS (ns);
- dn = GMP_ABS (ds);
-
- qs = ds ^ ns;
-
- if (nn < dn)
- {
- if (mode == GMP_DIV_CEIL && qs >= 0)
- {
- /* q = 1, r = n - d */
- if (r)
- mpz_sub (r, n, d);
- if (q)
- mpz_set_ui (q, 1);
- }
- else if (mode == GMP_DIV_FLOOR && qs < 0)
- {
- /* q = -1, r = n + d */
- if (r)
- mpz_add (r, n, d);
- if (q)
- mpz_set_si (q, -1);
- }
- else
- {
- /* q = 0, r = d */
- if (r)
- mpz_set (r, n);
- if (q)
- q->_mp_size = 0;
- }
- return 1;
- }
- else
- {
- mp_ptr np, qp;
- mp_size_t qn, rn;
- mpz_t tq, tr;
-
- mpz_init_set (tr, n);
- np = tr->_mp_d;
-
- qn = nn - dn + 1;
-
- if (q)
- {
- mpz_init2 (tq, qn * GMP_LIMB_BITS);
- qp = tq->_mp_d;
- }
- else
- qp = NULL;
-
- mpn_div_qr (qp, np, nn, d->_mp_d, dn);
-
- if (qp)
- {
- qn -= (qp[qn-1] == 0);
-
- tq->_mp_size = qs < 0 ? -qn : qn;
- }
- rn = mpn_normalized_size (np, dn);
- tr->_mp_size = ns < 0 ? - rn : rn;
-
- if (mode == GMP_DIV_FLOOR && qs < 0 && rn != 0)
- {
- if (q)
- mpz_sub_ui (tq, tq, 1);
- if (r)
- mpz_add (tr, tr, d);
- }
- else if (mode == GMP_DIV_CEIL && qs >= 0 && rn != 0)
- {
- if (q)
- mpz_add_ui (tq, tq, 1);
- if (r)
- mpz_sub (tr, tr, d);
- }
-
- if (q)
- {
- mpz_swap (tq, q);
- mpz_clear (tq);
- }
- if (r)
- mpz_swap (tr, r);
-
- mpz_clear (tr);
-
- return rn != 0;
- }
-}
-
-void
-mpn_div_qr (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
-{
- struct gmp_div_inverse inv;
- // mp_ptr tp = NULL;
-
- mpz_t tp;
-
-
-
- assert (dn > 0);
- assert (nn >= dn);
-
- mpn_div_qr_invert (&inv, dp, dn);
- if (dn > 2 && inv.shift > 0)
- {
- //tp = gmp_alloc_limbs (dn);
- gmp_assert_nocarry (mpn_lshift (tp->_mp_d, dp, dn, inv.shift));
- dp = tp->_mp_d;
- }
- mpn_div_qr_preinv (qp, np, nn, dp, dn, &inv);
- if (tp) {}
- //gmp_free_limbs (tp, dn);
-}
-
-void
-mpz_addmul (mpz_t r, const mpz_t u, const mpz_t v)
-{
- mpz_t t;
- mpz_init (t);
- mpz_mul (t, u, v);
- mpz_add (r, r, t);
- mpz_clear (t);
-}
-
-void
-mpz_swap (mpz_t u, mpz_t v)
-{
- //MP_SIZE_T_SWAP (u->_mp_alloc, v->_mp_alloc);
- //MPN_PTR_SWAP (u->_mp_d, u->_mp_size, v->_mp_d, v->_mp_size);
-
- mpz_t temp;
- mpz_init(temp);
-
- *temp = *u;
- *u = *v;
- *v = *temp;
-
-}
-
-void
-mpz_mod (mpz_t r, const mpz_t n, const mpz_t d)
-{
- mpz_div_qr (NULL, r, n, d, d->_mp_size >= 0 ? GMP_DIV_FLOOR : GMP_DIV_CEIL);
-}
-
-void
-mpn_div_qr_1_invert (struct gmp_div_inverse *inv, mp_limb_t d)
-{
- unsigned shift;
-
- assert (d > 0);
- gmp_clz (shift, d);
- inv->shift = shift;
- inv->d1 = d << shift;
- inv->di = mpn_invert_limb (inv->d1);
-}
-
-void
-mpn_div_qr_2_invert (struct gmp_div_inverse *inv,
- mp_limb_t d1, mp_limb_t d0)
-{
- unsigned shift;
-
- assert (d1 > 0);
- gmp_clz (shift, d1);
- inv->shift = shift;
- if (shift > 0)
- {
- d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift));
- d0 <<= shift;
- }
- inv->d1 = d1;
- inv->d0 = d0;
- inv->di = mpn_invert_3by2 (d1, d0);
-}
-
-void
-mpn_div_qr_invert (struct gmp_div_inverse *inv,
- mp_srcptr dp, mp_size_t dn)
-{
- assert (dn > 0);
-
- if (dn == 1)
- mpn_div_qr_1_invert (inv, dp[0]);
- else if (dn == 2)
- mpn_div_qr_2_invert (inv, dp[1], dp[0]);
- else
- {
- unsigned shift;
- mp_limb_t d1, d0;
-
- d1 = dp[dn-1];
- d0 = dp[dn-2];
- assert (d1 > 0);
- gmp_clz (shift, d1);
- inv->shift = shift;
- if (shift > 0)
- {
- d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift));
- d0 = (d0 << shift) | (dp[dn-3] >> (GMP_LIMB_BITS - shift));
- }
- inv->d1 = d1;
- inv->d0 = d0;
- inv->di = mpn_invert_3by2 (d1, d0);
- }
-}
-
-
-int
-mpz_cmp_ui (const mpz_t u, unsigned long v)
-{
- mp_size_t usize = u->_mp_size;
-
- if (usize < 0)
- return -1;
- else
- return mpz_cmpabs_ui (u, v);
-}
-
-int
-mpn_cmp (mp_srcptr ap, mp_srcptr bp, mp_size_t n)
-{
- while (--n >= 0)
- {
- if (ap[n] != bp[n])
- return ap[n] > bp[n] ? 1 : -1;
- }
- return 0;
-}
-
-mp_limb_t
-mpn_lshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
-{
- mp_limb_t high_limb, low_limb;
- unsigned int tnc;
- mp_limb_t retval;
-
- assert (n >= 1);
- assert (cnt >= 1);
- assert (cnt < GMP_LIMB_BITS);
-
- up += n;
- rp += n;
-
- tnc = GMP_LIMB_BITS - cnt;
- low_limb = *--up;
- retval = low_limb >> tnc;
- high_limb = (low_limb << cnt);
-
- while (--n != 0)
- {
- low_limb = *--up;
- *--rp = high_limb | (low_limb >> tnc);
- high_limb = (low_limb << cnt);
- }
- *--rp = high_limb;
-
- return retval;
-}
-
-mp_limb_t
-mpn_rshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
-{
- mp_limb_t high_limb, low_limb;
- unsigned int tnc;
- mp_limb_t retval;
-
- assert (n >= 1);
- assert (cnt >= 1);
- assert (cnt < GMP_LIMB_BITS);
-
- tnc = GMP_LIMB_BITS - cnt;
- high_limb = *up++;
- retval = (high_limb << tnc);
- low_limb = high_limb >> cnt;
-
- while (--n != 0)
- {
- high_limb = *up++;
- *rp++ = low_limb | (high_limb << tnc);
- low_limb = high_limb >> cnt;
- }
- *rp = low_limb;
-
- return retval;
-}
-
-int
-mpz_invert (mpz_t r, const mpz_t u, const mpz_t m)
-{
- mpz_t g, tr;
- int invertible;
-
- if (u->_mp_size == 0 || mpz_cmpabs_ui (m, 1) <= 0)
- return 0;
-
- mpz_init (g);
- mpz_init (tr);
-
- mpz_gcdext (g, tr, NULL, u, m);
- invertible = (mpz_cmp_ui (g, 1) == 0);
-
- if (invertible)
- {
- if (tr->_mp_size < 0)
- {
- if (m->_mp_size >= 0)
- mpz_add (tr, tr, m);
- else
- mpz_sub (tr, tr, m);
- }
- mpz_swap (r, tr);
- }
-
- mpz_clear (g);
- mpz_clear (tr);
- return invertible;
-}
-
-/* Not matching current public gmp interface, rather corresponding to
- the sbpi1_div_* functions. */
-mp_limb_t
-mpn_div_qr_1_preinv (mp_ptr qp, mp_srcptr np, mp_size_t nn,
- const struct gmp_div_inverse *inv)
-{
- mp_limb_t d, di;
- mp_limb_t r;
- mp_ptr tp = NULL;
- mp_size_t tn = 0;
-
- if (inv->shift > 0)
- {
- /* Shift, reusing qp area if possible. In-place shift if qp == np. */
- tp = qp;
- if (!tp)
- {
- tn = nn;
-
- // tp = gmp_alloc_limbs (tn);
- }
- r = mpn_lshift (tp, np, nn, inv->shift);
- np = tp;
- }
- else
- r = 0;
-
- d = inv->d1;
- di = inv->di;
- while (--nn >= 0)
- {
- mp_limb_t q;
-
- gmp_udiv_qrnnd_preinv (q, r, r, np[nn], d, di);
- if (qp)
- qp[nn] = q;
- }
- //if (tn)
- //gmp_free_limbs (tp, tn);
-
- return r >> inv->shift;
-}
-
-mp_limb_t
-mpn_add_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
-{
- mp_size_t i;
- mp_limb_t cy;
-
- for (i = 0, cy = 0; i < n; i++)
- {
- mp_limb_t a, b, r;
- a = ap[i]; b = bp[i];
- r = a + cy;
- cy = (r < cy);
- r += b;
- cy += (r < b);
- rp[i] = r;
- }
- return cy;
-}
-
-void
-mpn_div_qr_2_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn,
- const struct gmp_div_inverse *inv)
-{
- unsigned shift;
- mp_size_t i;
- mp_limb_t d1, d0, di, r1, r0;
-
- assert (nn >= 2);
- shift = inv->shift;
- d1 = inv->d1;
- d0 = inv->d0;
- di = inv->di;
-
- if (shift > 0)
- r1 = mpn_lshift (np, np, nn, shift);
- else
- r1 = 0;
-
- r0 = np[nn - 1];
-
- i = nn - 2;
- do
- {
- mp_limb_t n0, q;
- n0 = np[i];
- gmp_udiv_qr_3by2 (q, r1, r0, r1, r0, n0, d1, d0, di);
-
- if (qp)
- qp[i] = q;
- }
- while (--i >= 0);
-
- if (shift > 0)
- {
- assert ((r0 & (GMP_LIMB_MAX >> (GMP_LIMB_BITS - shift))) == 0);
- r0 = (r0 >> shift) | (r1 << (GMP_LIMB_BITS - shift));
- r1 >>= shift;
- }
-
- np[1] = r1;
- np[0] = r0;
-}
-
-mp_limb_t
-mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
-{
- mp_limb_t ul, cl, hpl, lpl, rl;
-
- assert (n >= 1);
-
- cl = 0;
- do
- {
- ul = *up++;
- gmp_umul_ppmm (hpl, lpl, ul, vl);
-
- lpl += cl;
- cl = (lpl < cl) + hpl;
-
- rl = *rp;
- lpl = rl - lpl;
- cl += lpl > rl;
- *rp++ = lpl;
- }
- while (--n != 0);
-
- return cl;
-}
-
-void
-mpn_div_qr_pi1 (mp_ptr qp,
- mp_ptr np, mp_size_t nn, mp_limb_t n1,
- mp_srcptr dp, mp_size_t dn,
- mp_limb_t dinv)
-{
- mp_size_t i;
-
- mp_limb_t d1, d0;
- mp_limb_t cy, cy1;
- mp_limb_t q;
-
- assert (dn > 2);
- assert (nn >= dn);
-
- d1 = dp[dn - 1];
- d0 = dp[dn - 2];
-
- assert ((d1 & GMP_LIMB_HIGHBIT) != 0);
- /* Iteration variable is the index of the q limb.
- *
- * We divide <n1, np[dn-1+i], np[dn-2+i], np[dn-3+i],..., np[i]>
- * by <d1, d0, dp[dn-3], ..., dp[0] >
- */
-
- i = nn - dn;
- do
- {
- mp_limb_t n0 = np[dn-1+i];
-
- if (n1 == d1 && n0 == d0)
- {
- q = GMP_LIMB_MAX;
- mpn_submul_1 (np+i, dp, dn, q);
- n1 = np[dn-1+i]; /* update n1, last loop's value will now be invalid */
- }
- else
- {
- gmp_udiv_qr_3by2 (q, n1, n0, n1, n0, np[dn-2+i], d1, d0, dinv);
-
- cy = mpn_submul_1 (np + i, dp, dn-2, q);
-
- cy1 = n0 < cy;
- n0 = n0 - cy;
- cy = n1 < cy1;
- n1 = n1 - cy1;
- np[dn-2+i] = n0;
-
- if (cy != 0)
- {
- n1 += d1 + mpn_add_n (np + i, np + i, dp, dn - 1);
- q--;
- }
- }
-
- if (qp)
- qp[i] = q;
- }
- while (--i >= 0);
-
- np[dn - 1] = n1;
-}
-
-void
-mpn_div_qr_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn,
- mp_srcptr dp, mp_size_t dn,
- const struct gmp_div_inverse *inv)
-{
- assert (dn > 0);
- assert (nn >= dn);
-
- if (dn == 1)
- np[0] = mpn_div_qr_1_preinv (qp, np, nn, inv);
- else if (dn == 2)
- mpn_div_qr_2_preinv (qp, np, nn, inv);
- else
- {
- mp_limb_t nh;
- unsigned shift;
-
- assert (inv->d1 == dp[dn-1]);
- assert (inv->d0 == dp[dn-2]);
- assert ((inv->d1 & GMP_LIMB_HIGHBIT) != 0);
-
- shift = inv->shift;
- if (shift > 0)
- nh = mpn_lshift (np, np, nn, shift);
- else
- nh = 0;
-
- mpn_div_qr_pi1 (qp, np, nn, nh, dp, dn, inv->di);
-
- if (shift > 0)
- gmp_assert_nocarry (mpn_rshift (np, np, dn, shift));
- }
-}
-
-void
-mpz_powm (mpz_t r, const mpz_t b, const mpz_t e, const mpz_t m)
-{
- mpz_t tr;
- mpz_t base;
- mp_size_t en, mn;
- mp_srcptr mp;
- struct gmp_div_inverse minv;
- unsigned shift;
- //mp_ptr tp = NULL;
- mpz_t tp;
-
- //mpz_init(tp);
-
- en = GMP_ABS (e->_mp_size);
- mn = GMP_ABS (m->_mp_size);
- if (mn == 0) {}
- //gmp_die ("mpz_powm: Zero modulo.");
-
- if (en == 0)
- {
- mpz_set_ui (r, mpz_cmpabs_ui (m, 1));
- return;
- }
-
- mp = m->_mp_d;
- mpn_div_qr_invert (&minv, mp, mn);
- shift = minv.shift;
-
- if (shift > 0)
- {
- /* To avoid shifts, we do all our reductions, except the final
- one, using a *normalized* m. */
- minv.shift = 0;
-
- // tp = gmp_alloc_limbs (mn);
- gmp_assert_nocarry (mpn_lshift (tp->_mp_d, mp, mn, shift));
- mp = tp->_mp_d;
- }
-
- mpz_init (base);
-
- if (e->_mp_size < 0)
- {
- if (!mpz_invert (base, b, m)) {}
- //gmp_die ("mpz_powm: Negative exponent and non-invertible base.");
- }
- else
- {
- mp_size_t bn;
- mpz_abs (base, b);
-
- bn = base->_mp_size;
- if (bn >= mn)
- {
- mpn_div_qr_preinv (NULL, base->_mp_d, base->_mp_size, mp, mn, &minv);
- bn = mn;
- }
-
- /* We have reduced the absolute value. Now take care of the
- sign. Note that we get zero represented non-canonically as
- m. */
- if (b->_mp_size < 0)
- {
- mp_ptr bp = MPZ_REALLOC (base, mn);
- gmp_assert_nocarry (mpn_sub (bp, mp, mn, bp, bn));
- bn = mn;
- }
- base->_mp_size = mpn_normalized_size (base->_mp_d, bn);
- }
- mpz_init_set_ui (tr, 1);
-
- while (--en >= 0)
- {
- mp_limb_t w = e->_mp_d[en];
- mp_limb_t bit;
-
- bit = GMP_LIMB_HIGHBIT;
- do
- {
- mpz_mul (tr, tr, tr);
- if (w & bit)
- mpz_mul (tr, tr, base);
- if (tr->_mp_size > mn)
- {
- mpn_div_qr_preinv (NULL, tr->_mp_d, tr->_mp_size, mp, mn, &minv);
- tr->_mp_size = mpn_normalized_size (tr->_mp_d, mn);
- }
- bit >>= 1;
- }
- while (bit > 0);
- }
-
- /* Final reduction */
- if (tr->_mp_size >= mn)
- {
- minv.shift = shift;
- mpn_div_qr_preinv (NULL, tr->_mp_d, tr->_mp_size, mp, mn, &minv);
- tr->_mp_size = mpn_normalized_size (tr->_mp_d, mn);
- }
- //if (tp)
- //gmp_free_limbs (tp, mn);
-
- mpz_swap (r, tr);
- mpz_clear (tr);
- mpz_clear (base);
-}
-
-int
-mpn_cmp4 (mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
-{
- if (an != bn)
- return an < bn ? -1 : 1;
- else
- return mpn_cmp (ap, bp, an);
-}
-
-
-mp_size_t
-mpz_abs_sub (mpz_t r, const mpz_t a, const mpz_t b)
-{
- mp_size_t an = GMP_ABS (a->_mp_size);
- mp_size_t bn = GMP_ABS (b->_mp_size);
- int cmp;
- mp_ptr rp;
-
- cmp = mpn_cmp4 (a->_mp_d, an, b->_mp_d, bn);
- if (cmp > 0)
- {
- rp = MPZ_REALLOC (r, an);
- gmp_assert_nocarry (mpn_sub (rp, a->_mp_d, an, b->_mp_d, bn));
- return mpn_normalized_size (rp, an);
- }
- else if (cmp < 0)
- {
- rp = MPZ_REALLOC (r, bn);
- gmp_assert_nocarry (mpn_sub (rp, b->_mp_d, bn, a->_mp_d, an));
- return -mpn_normalized_size (rp, bn);
- }
- else
- return 0;
-}
-
-mp_limb_t
-mpn_add_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b)
-{
- mp_size_t i;
-
- assert (n > 0);
- i = 0;
- do
- {
- mp_limb_t r = ap[i] + b;
- /* Carry out */
- b = (r < b);
- rp[i] = r;
- }
- while (++i < n);
-
- return b;
-}
-
-
-mp_limb_t
-mpn_add (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
-{
- mp_limb_t cy;
-
- assert (an >= bn);
-
- cy = mpn_add_n (rp, ap, bp, bn);
- if (an > bn)
- cy = mpn_add_1 (rp + bn, ap + bn, an - bn, cy);
- return cy;
-}
-
-mp_size_t
-mpz_abs_add (mpz_t r, const mpz_t a, const mpz_t b)
-{
- mp_size_t an = GMP_ABS (a->_mp_size);
- mp_size_t bn = GMP_ABS (b->_mp_size);
- mp_ptr rp;
- mp_limb_t cy;
-
- if (an < bn)
- {
- MPZ_SRCPTR_SWAP (a, b);
- MP_SIZE_T_SWAP (an, bn);
- }
-
- rp = MPZ_REALLOC (r, an + 1);
- cy = mpn_add (rp, a->_mp_d, an, b->_mp_d, bn);
-
- rp[an] = cy;
-
- return an + cy;
-}
-
-void
-mpz_sub (mpz_t r, const mpz_t a, const mpz_t b)
-{
- mp_size_t rn;
-
- if ( (a->_mp_size ^ b->_mp_size) >= 0)
- rn = mpz_abs_sub (r, a, b);
- else
- rn = mpz_abs_add (r, a, b);
-
- r->_mp_size = a->_mp_size >= 0 ? rn : - rn;
-}
-
-mp_limb_t
-mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
-{
- mp_limb_t ul, cl, hpl, lpl, rl;
-
- assert (n >= 1);
-
- cl = 0;
- do
- {
- ul = *up++;
- gmp_umul_ppmm (hpl, lpl, ul, vl);
-
- lpl += cl;
- cl = (lpl < cl) + hpl;
-
- rl = *rp;
- lpl = rl + lpl;
- cl += lpl < rl;
- *rp++ = lpl;
- }
- while (--n != 0);
-
- return cl;
-}
-
-mp_limb_t
-mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
-{
- mp_limb_t ul, cl, hpl, lpl;
-
- assert (n >= 1);
-
- cl = 0;
- do
- {
- ul = *up++;
- gmp_umul_ppmm (hpl, lpl, ul, vl);
-
- lpl += cl;
- cl = (lpl < cl) + hpl;
-
- *rp++ = lpl;
- }
- while (--n != 0);
-
- return cl;
-}
-
-
-mp_limb_t
-mpn_mul (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
-{
- assert (un >= vn);
- assert (vn >= 1);
- assert (!GMP_MPN_OVERLAP_P(rp, un + vn, up, un));
- assert (!GMP_MPN_OVERLAP_P(rp, un + vn, vp, vn));
-
- /* We first multiply by the low order limb. This result can be
- stored, not added, to rp. We also avoid a loop for zeroing this
- way. */
-
- rp[un] = mpn_mul_1 (rp, up, un, vp[0]);
-
- /* Now accumulate the product of up[] and the next higher limb from
- vp[]. */
-
- while (--vn >= 1)
- {
- rp += 1, vp += 1;
- rp[un] = mpn_addmul_1 (rp, up, un, vp[0]);
- }
- return rp[un];
-}
-
-
-void
-mpz_mul (mpz_t r, const mpz_t u, const mpz_t v)
-{
- int sign;
- mp_size_t un, vn, rn;
- mpz_t t;
- mp_ptr tp;
-
- un = u->_mp_size;
- vn = v->_mp_size;
-
- if (un == 0 || vn == 0)
- {
- r->_mp_size = 0;
- return;
- }
-
- sign = (un ^ vn) < 0;
-
- un = GMP_ABS (un);
- vn = GMP_ABS (vn);
-
- mpz_init2 (t, (un + vn) * GMP_LIMB_BITS);
-
- tp = t->_mp_d;
- if (un >= vn)
- mpn_mul (tp, u->_mp_d, un, v->_mp_d, vn);
- else
- mpn_mul (tp, v->_mp_d, vn, u->_mp_d, un);
-
- rn = un + vn;
- rn -= tp[rn-1] == 0;
-
- t->_mp_size = sign ? - rn : rn;
- mpz_swap (r, t);
- mpz_clear (t);
-}
-
-void
-mpn_copyd (mp_ptr d, mp_srcptr s, mp_size_t n)
-{
- while (--n >= 0)
- d[n] = s[n];
-}
-
-void
-mpn_zero (mp_ptr rp, mp_size_t n)
-{
- while (--n >= 0)
- rp[n] = 0;
-}
-
-
-void
-mpz_mul_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bits)
-{
- mp_size_t un, rn;
- mp_size_t limbs;
- unsigned shift;
- mp_ptr rp;
-
- un = GMP_ABS (u->_mp_size);
- if (un == 0)
- {
- r->_mp_size = 0;
- return;
- }
-
- limbs = bits / GMP_LIMB_BITS;
- shift = bits % GMP_LIMB_BITS;
-
- rn = un + limbs + (shift > 0);
- rp = MPZ_REALLOC (r, rn);
- if (shift > 0)
- {
- mp_limb_t cy = mpn_lshift (rp + limbs, u->_mp_d, un, shift);
- rp[rn-1] = cy;
- rn -= (cy == 0);
- }
- else
- mpn_copyd (rp + limbs, u->_mp_d, un);
-
- mpn_zero (rp, limbs);
-
- r->_mp_size = (u->_mp_size < 0) ? - rn : rn;
-}
-
-int
-mpn_zero_p(mp_srcptr rp, mp_size_t n)
-{
- return mpn_normalized_size (rp, n) == 0;
-}
-
-
-void
-mpz_div_q_2exp (mpz_t q, const mpz_t u, mp_bitcnt_t bit_index,
- enum mpz_div_round_mode mode)
-{
- mp_size_t un, qn;
- mp_size_t limb_cnt;
- mp_ptr qp;
- int adjust;
-
- un = u->_mp_size;
- if (un == 0)
- {
- q->_mp_size = 0;
- return;
- }
- limb_cnt = bit_index / GMP_LIMB_BITS;
- qn = GMP_ABS (un) - limb_cnt;
- bit_index %= GMP_LIMB_BITS;
-
- if (mode == ((un > 0) ? GMP_DIV_CEIL : GMP_DIV_FLOOR)) /* un != 0 here. */
- /* Note: Below, the final indexing at limb_cnt is valid because at
- that point we have qn > 0. */
- adjust = (qn <= 0
- || !mpn_zero_p (u->_mp_d, limb_cnt)
- || (u->_mp_d[limb_cnt]
- & (((mp_limb_t) 1 << bit_index) - 1)));
- else
- adjust = 0;
-
- if (qn <= 0)
- qn = 0;
- else
- {
- qp = MPZ_REALLOC (q, qn);
-
- if (bit_index != 0)
- {
- mpn_rshift (qp, u->_mp_d + limb_cnt, qn, bit_index);
- qn -= qp[qn - 1] == 0;
- }
- else
- {
- mpn_copyi (qp, u->_mp_d + limb_cnt, qn);
- }
- }
-
- q->_mp_size = qn;
-
- if (adjust)
- mpz_add_ui (q, q, 1);
- if (un < 0)
- mpz_neg (q, q);
-}
-
-void
-mpz_tdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
-{
- mpz_div_q_2exp (r, u, cnt, GMP_DIV_TRUNC);
-}
-
-int
-mpz_cmp (const mpz_t a, const mpz_t b)
-{
- mp_size_t asize = a->_mp_size;
- mp_size_t bsize = b->_mp_size;
-
- if (asize != bsize)
- return (asize < bsize) ? -1 : 1;
- else if (asize >= 0)
- return mpn_cmp (a->_mp_d, b->_mp_d, asize);
- else
- return mpn_cmp (b->_mp_d, a->_mp_d, -asize);
-}
-
-void
-mpz_add (mpz_t r, const mpz_t a, const mpz_t b)
-{
- mp_size_t rn;
-
- if ( (a->_mp_size ^ b->_mp_size) >= 0)
- rn = mpz_abs_add (r, a, b);
- else
- rn = mpz_abs_sub (r, a, b);
-
- r->_mp_size = a->_mp_size >= 0 ? rn : - rn;
-}
-
-
-int
-mpz_tstbit (const mpz_t d, mp_bitcnt_t bit_index)
-{
- mp_size_t limb_index;
- unsigned shift;
- mp_size_t ds;
- mp_size_t dn;
- mp_limb_t w;
- int bit;
-
- ds = d->_mp_size;
- dn = GMP_ABS (ds);
- limb_index = bit_index / GMP_LIMB_BITS;
- if (limb_index >= dn)
- return ds < 0;
-
- shift = bit_index % GMP_LIMB_BITS;
- w = d->_mp_d[limb_index];
- bit = (w >> shift) & 1;
-
- if (ds < 0)
- {
- /* d < 0. Check if any of the bits below is set: If so, our bit
- must be complemented. */
- if (shift > 0 && (mp_limb_t) (w << (GMP_LIMB_BITS - shift)) > 0)
- return bit ^ 1;
- while (--limb_index >= 0)
- if (d->_mp_d[limb_index] > 0)
- return bit ^ 1;
- }
- return bit;
-}
-
-mp_bitcnt_t
-mpn_limb_size_in_base_2 (mp_limb_t u)
-{
- unsigned shift;
-
- assert (u > 0);
- gmp_clz (shift, u);
- return GMP_LIMB_BITS - shift;
-}
-
-size_t
-mpz_sizeinbase (const mpz_t u, int base)
-{
- mp_size_t un, tn;
- mp_srcptr up;
- //mp_ptr tp;
- mpz_t tp;
-
- mp_bitcnt_t bits;
- struct gmp_div_inverse bi;
- size_t ndigits;
-
- mpz_init(tp);
-
- assert (base >= 2);
-assert (base <= 62);
-
- un = GMP_ABS (u->_mp_size);
- if (un == 0)
- return 1;
-
- up = u->_mp_d;
-
- bits = (un - 1) * GMP_LIMB_BITS + mpn_limb_size_in_base_2 (up[un-1]);
- switch (base)
- {
- case 2:
- return bits;
- case 4:
- return (bits + 1) / 2;
- case 8:
- return (bits + 2) / 3;
- case 16:
- return (bits + 3) / 4;
- case 32:
- return (bits + 4) / 5;
- /* FIXME: Do something more clever for the common case of base
- 10. */
- }
-
- //tp = gmp_alloc_limbs (un);
-
- mpn_copyi (tp->_mp_d, up, un);
- mpn_div_qr_1_invert (&bi, base);
-
- tn = un;
- ndigits = 0;
- do
- {
- ndigits++;
- mpn_div_qr_1_preinv (tp->_mp_d, tp->_mp_d, tn, &bi);
- tn -= (tp->_mp_d[tn-1] == 0);
- }
- while (tn > 0);
-
- //gmp_free_limbs (tp, un);
- return ndigits;
-}
-
-int
-mpz_sgn (const mpz_t u)
-{
- return GMP_CMP (u->_mp_size, 0);
-}
-
-mp_bitcnt_t
-mpn_common_scan (mp_limb_t limb, mp_size_t i, mp_srcptr up, mp_size_t un,
- mp_limb_t ux)
-{
- unsigned cnt;
-
- assert (ux == 0 || ux == GMP_LIMB_MAX);
- assert (0 <= i && i <= un );
-
- while (limb == 0)
- {
- i++;
- if (i == un)
- return (ux == 0 ? ~(mp_bitcnt_t) 0 : un * GMP_LIMB_BITS);
- limb = ux ^ up[i];
- }
- gmp_ctz (cnt, limb);
- return (mp_bitcnt_t) i * GMP_LIMB_BITS + cnt;
-}
-
-void
-mpz_abs (mpz_t r, const mpz_t u)
-{
- mpz_set (r, u);
- r->_mp_size = GMP_ABS (r->_mp_size);
-}
-
-
-mp_bitcnt_t
-mpn_scan1 (mp_srcptr ptr, mp_bitcnt_t bit)
-{
- mp_size_t i;
- i = bit / GMP_LIMB_BITS;
-
- return mpn_common_scan ( ptr[i] & (GMP_LIMB_MAX << (bit % GMP_LIMB_BITS)),
- i, ptr, i, 0);
-}
-
-mp_bitcnt_t
-mpz_scan1 (mpz_t u, mp_bitcnt_t starting_bit)
-{
- mp_ptr up;
- mp_size_t us, un, i;
- mp_limb_t limb, ux;
-
- us = u->_mp_size;
- un = GMP_ABS (us);
- i = starting_bit / GMP_LIMB_BITS;
-
- /* Past the end there's no 1 bits for u>=0, or an immediate 1 bit
- for u<0. Notice this test picks up any u==0 too. */
- if (i >= un)
- return (us >= 0 ? ~(mp_bitcnt_t) 0 : starting_bit);
-
- up = u->_mp_d;
- ux = 0;
- limb = up[i];
-
- if (starting_bit != 0)
- {
- if (us < 0)
- {
- ux = mpn_zero_p (up, i);
- limb = ~ limb + ux;
- ux = - (mp_limb_t) (limb >= ux);
- }
-
- /* Mask to 0 all bits before starting_bit, thus ignoring them. */
- limb &= GMP_LIMB_MAX << (starting_bit % GMP_LIMB_BITS);
- }
-
- return mpn_common_scan (limb, i, up, un, ux);
-}
-
-
-mp_bitcnt_t
-mpz_make_odd (mpz_t r)
-{
- mp_bitcnt_t shift;
-
- assert (r->_mp_size > 0);
- /* Count trailing zeros, equivalent to mpn_scan1, because we know that there is a 1 */
- shift = mpn_scan1 (r->_mp_d, 0);
- mpz_tdiv_q_2exp (r, r, shift);
-
- return shift;
-}
-
-void
-mpz_tdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d)
-{
- mpz_div_qr (q, r, n, d, GMP_DIV_TRUNC);
-}
-
-void
-mpz_abs_add_bit (mpz_t d, mp_bitcnt_t bit_index)
-{
- mp_size_t dn, limb_index;
- mp_limb_t bit;
- mp_ptr dp;
-
- dn = GMP_ABS (d->_mp_size);
-
- limb_index = bit_index / GMP_LIMB_BITS;
- bit = (mp_limb_t) 1 << (bit_index % GMP_LIMB_BITS);
-
- if (limb_index >= dn)
- {
- mp_size_t i;
- /* The bit should be set outside of the end of the number.
- We have to increase the size of the number. */
- dp = MPZ_REALLOC (d, limb_index + 1);
-
- dp[limb_index] = bit;
- for (i = dn; i < limb_index; i++)
- dp[i] = 0;
- dn = limb_index + 1;
- }
- else
- {
- mp_limb_t cy;
-
- dp = d->_mp_d;
-
- cy = mpn_add_1 (dp + limb_index, dp + limb_index, dn - limb_index, bit);
- if (cy > 0)
- {
- dp = MPZ_REALLOC (d, dn + 1);
- dp[dn++] = cy;
- }
- }
-
- d->_mp_size = (d->_mp_size < 0) ? - dn : dn;
-}
-
-void
-mpz_abs_sub_bit (mpz_t d, mp_bitcnt_t bit_index)
-{
- mp_size_t dn, limb_index;
- mp_ptr dp;
- mp_limb_t bit;
-
- dn = GMP_ABS (d->_mp_size);
- dp = d->_mp_d;
-
- limb_index = bit_index / GMP_LIMB_BITS;
- bit = (mp_limb_t) 1 << (bit_index % GMP_LIMB_BITS);
-
- assert (limb_index < dn);
-
- gmp_assert_nocarry (mpn_sub_1 (dp + limb_index, dp + limb_index,
- dn - limb_index, bit));
- dn = mpn_normalized_size (dp, dn);
- d->_mp_size = (d->_mp_size < 0) ? - dn : dn;
-}
-
-void
-mpz_setbit (mpz_t d, mp_bitcnt_t bit_index)
-{
- if (!mpz_tstbit (d, bit_index))
- {
- if (d->_mp_size >= 0)
- mpz_abs_add_bit (d, bit_index);
- else
- mpz_abs_sub_bit (d, bit_index);
- }
-}
-
-void
-mpz_divexact (mpz_t q, const mpz_t n, const mpz_t d)
-{
- gmp_assert_nocarry (mpz_div_qr (q, NULL, n, d, GMP_DIV_TRUNC));
-}
-
-#define mpz_odd_p(z) (((z)->_mp_size != 0) & (int) (z)->_mp_d[0])
-#define mpz_even_p(z) (! mpz_odd_p (z))
-
-int
-mpz_cmpabs (const mpz_t u, const mpz_t v)
-{
- return mpn_cmp4 (u->_mp_d, GMP_ABS (u->_mp_size),
- v->_mp_d, GMP_ABS (v->_mp_size));
-}
-
-void
-mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v)
-{
- mpz_t tu, tv, s0, s1, t0, t1;
- mp_bitcnt_t uz, vz, gz;
- mp_bitcnt_t power;
-
- if (u->_mp_size == 0)
- {
- /* g = 0 u + sgn(v) v */
- signed long sign = mpz_sgn (v);
- mpz_abs (g, v);
- if (s)
- s->_mp_size = 0;
- if (t)
- mpz_set_si (t, sign);
- return;
- }
-
- if (v->_mp_size == 0)
- {
- /* g = sgn(u) u + 0 v */
- signed long sign = mpz_sgn (u);
- mpz_abs (g, u);
- if (s)
- mpz_set_si (s, sign);
- if (t)
- t->_mp_size = 0;
- return;
- }
-
- mpz_init (tu);
- mpz_init (tv);
- mpz_init (s0);
- mpz_init (s1);
- mpz_init (t0);
- mpz_init (t1);
-
- mpz_abs (tu, u);
- uz = mpz_make_odd (tu);
- mpz_abs (tv, v);
- vz = mpz_make_odd (tv);
-gz = GMP_MIN (uz, vz);
-
- uz -= gz;
- vz -= gz;
-
- /* Cofactors corresponding to odd gcd. gz handled later. */
- if (tu->_mp_size < tv->_mp_size)
- {
- mpz_swap (tu, tv);
- MPZ_SRCPTR_SWAP (u, v);
- MPZ_PTR_SWAP (s, t);
- MP_BITCNT_T_SWAP (uz, vz);
- }
-
- /* Maintain
- *
- * u = t0 tu + t1 tv
- * v = s0 tu + s1 tv
- *
- * where u and v denote the inputs with common factors of two
- * eliminated, and det (s0, t0; s1, t1) = 2^p. Then
- *
- * 2^p tu = s1 u - t1 v
- * 2^p tv = -s0 u + t0 v
- */
-
- /* After initial division, tu = q tv + tu', we have
- *
- * u = 2^uz (tu' + q tv)
- * v = 2^vz tv
- *
- * or
- *
- * t0 = 2^uz, t1 = 2^uz q
- * s0 = 0, s1 = 2^vz
- */
-
- mpz_tdiv_qr (t1, tu, tu, tv);
- mpz_mul_2exp (t1, t1, uz);
-
- mpz_setbit (s1, vz);
- power = uz + vz;
-
- if (tu->_mp_size > 0)
- {
- mp_bitcnt_t shift;
- shift = mpz_make_odd (tu);
- mpz_setbit (t0, uz + shift);
- power += shift;
-
- for (;;)
- {
- int c;
- c = mpz_cmp (tu, tv);
- if (c == 0)
- break;
-
- if (c < 0)
- {
- /* tv = tv' + tu
- *
- * u = t0 tu + t1 (tv' + tu) = (t0 + t1) tu + t1 tv'
- * v = s0 tu + s1 (tv' + tu) = (s0 + s1) tu + s1 tv' */
-
- mpz_sub (tv, tv, tu);
- mpz_add (t0, t0, t1);
- mpz_add (s0, s0, s1);
-
- shift = mpz_make_odd (tv);
- mpz_mul_2exp (t1, t1, shift);
- mpz_mul_2exp (s1, s1, shift);
- }
- else
- {
- mpz_sub (tu, tu, tv);
- mpz_add (t1, t0, t1);
- mpz_add (s1, s0, s1);
-
- shift = mpz_make_odd (tu);
- mpz_mul_2exp (t0, t0, shift);
- mpz_mul_2exp (s0, s0, shift);
- }
- power += shift;
- }
- }
- else
- mpz_setbit (t0, uz);
-
- /* Now tv = odd part of gcd, and -s0 and t0 are corresponding
- cofactors. */
-
- mpz_mul_2exp (tv, tv, gz);
- mpz_neg (s0, s0);
-
- /* 2^p g = s0 u + t0 v. Eliminate one factor of two at a time. To
- adjust cofactors, we need u / g and v / g */
-
- mpz_divexact (s1, v, tv);
- mpz_abs (s1, s1);
- mpz_divexact (t1, u, tv);
- mpz_abs (t1, t1);
-
- while (power-- > 0)
- {
- /* s0 u + t0 v = (s0 - v/g) u - (t0 + u/g) v */
- if (mpz_odd_p (s0) || mpz_odd_p (t0))
- {
- mpz_sub (s0, s0, s1);
- mpz_add (t0, t0, t1);
- }
- //assert (mpz_even_p (t0) && mpz_even_p (s0));
- mpz_tdiv_q_2exp (s0, s0, 1);
- mpz_tdiv_q_2exp (t0, t0, 1);
- }
-
- /* Arrange so that |s| < |u| / 2g */
- mpz_add (s1, s0, s1);
- if (mpz_cmpabs (s0, s1) > 0)
- {
- mpz_swap (s0, s1);
- mpz_sub (t0, t0, t1);
- }
- if (u->_mp_size < 0)
- mpz_neg (s0, s0);
- if (v->_mp_size < 0)
- mpz_neg (t0, t0);
-
- mpz_swap (g, tv);
- if (s)
- mpz_swap (s, s0);
- if (t)
- mpz_swap (t, t0);
-
- mpz_clear (tu);
- mpz_clear (tv);
- mpz_clear (s0);
- mpz_clear (s1);
- mpz_clear (t0);
- mpz_clear (t1);
-}
-
-
-void
-mpz_addmul_ui (mpz_t r, const mpz_t u, unsigned long int v)
-{
- mpz_t t;
- mpz_init_set_ui (t, v);
- mpz_mul (t, u, t);
- mpz_add (r, r, t);
- mpz_clear (t);
-}
-
-
-// STRING CONVERSION
-
-unsigned
-mpn_base_power_of_two_p (unsigned b)
-{
- switch (b)
- {
- case 2: return 1;
- case 4: return 2;
- case 8: return 3;
- case 16: return 4;
- case 32: return 5;
- case 64: return 6;
- case 128: return 7;
- case 256: return 8;
- default: return 0;
- }
-}
-
-
-
-void
-mpn_get_base_info (struct mpn_base_info *info, mp_limb_t b)
-{
- mp_limb_t m;
- mp_limb_t p;
- unsigned exp;
-
- m = GMP_LIMB_MAX / b;
- for (exp = 1, p = b; p <= m; exp++)
- p *= b;
-
- info->exp = exp;
- info->bb = p;
-}
-
-int isspace_gpu(unsigned char c) {
- if (c == '\n' || c == ' ' || c == '\t' || c == '\r' || c == '\f' || c == '\v')
- return 1;
- return 0;
-}
-
-int strlen_c(__global char *c) {
-
- // rather naive implementation – we assume a string is terminated, and is not 0 characters long.
-
- int i = 0;
- while (1) {
- if (c[i] == '\0')
- return i;
- i++;
- }
- return i;
-}
-
-mp_size_t
-mpn_set_str_bits (mp_ptr rp, const unsigned char *sp, size_t sn,
- unsigned bits)
-{
- mp_size_t rn;
- mp_limb_t limb;
- unsigned shift;
-
- for (limb = 0, rn = 0, shift = 0; sn-- > 0; )
- {
- limb |= (mp_limb_t) sp[sn] << shift;
- shift += bits;
- if (shift >= GMP_LIMB_BITS)
- {
- shift -= GMP_LIMB_BITS;
- rp[rn++] = limb;
- /* Next line is correct also if shift == 0,
- bits == 8, and mp_limb_t == unsigned char. */
- limb = (unsigned int) sp[sn] >> (bits - shift);
- }
- }
- if (limb != 0)
- rp[rn++] = limb;
- else
- rn = mpn_normalized_size (rp, rn);
- return rn;
-}
-
-mp_size_t
-mpn_set_str_other (mp_ptr rp, const unsigned char *sp, size_t sn,
- mp_limb_t b, const struct mpn_base_info *info)
-{
- mp_size_t rn;
- mp_limb_t w;
- unsigned k;
- size_t j;
-
- assert (sn > 0);
-
- k = 1 + (sn - 1) % info->exp;
-
- j = 0;
- w = sp[j++];
- while (--k != 0)
- w = w * b + sp[j++];
-
- rp[0] = w;
-
- for (rn = 1; j < sn;)
- {
- mp_limb_t cy;
-
- w = sp[j++];
- for (k = 1; k < info->exp; k++)
- w = w * b + sp[j++];
-
- cy = mpn_mul_1 (rp, rp, rn, info->bb);
- cy += mpn_add_1 (rp, rp, rn, w);
- if (cy > 0)
- rp[rn++] = cy;
- }
- assert (j == sn);
-
- return rn;
-}
-
-
-int
-mpz_set_str (mpz_t r, __global char *sp, int base)
-{
- unsigned bits, value_of_a;
- mp_size_t rn, alloc;
- mp_ptr rp;
- size_t dn, sn;
- int sign;
- unsigned char dp[256];
-
- assert (base == 0 || (base >= 2 && base <= 62));
-
- while (isspace_gpu( (unsigned char) *sp))
- sp++;
-
- sign = (*sp == '-');
- sp += sign;
-
- if (base == 0)
- {
- if (sp[0] == '0')
- {
- if (sp[1] == 'x' || sp[1] == 'X')
- {
- base = 16;
- sp += 2;
- }
- else if (sp[1] == 'b' || sp[1] == 'B')
- {
- base = 2;
- sp += 2;
- }
- else
- base = 8;
- }
- else
- base = 10;
- }
-
- if (!*sp)
- {
- r->_mp_size = 0;
- return -1;
- }
- sn = strlen_c(sp);
- //dp = (unsigned char *) gmp_alloc (sn);
-
-
- value_of_a = (base > 36) ? 36 : 10;
- for (dn = 0; *sp; sp++)
- {
- unsigned digit;
-
- if (isspace_gpu ((unsigned char) *sp))
- continue;
- else if (*sp >= '0' && *sp <= '9')
- digit = *sp - '0';
- else if (*sp >= 'a' && *sp <= 'z')
- digit = *sp - 'a' + value_of_a;
- else if (*sp >= 'A' && *sp <= 'Z')
- digit = *sp - 'A' + 10;
- else
- digit = base; /* fail */
-
- if (digit >= (unsigned) base)
- {
- //gmp_free (dp, sn);
- r->_mp_size = 0;
- return -1;
- }
-
- dp[dn++] = digit;
- }
-
- if (!dn)
- {
- //gmp_free (dp, sn);
- r->_mp_size = 0;
- return -1;
- }
- bits = mpn_base_power_of_two_p (base);
-
- if (bits > 0)
- {
- alloc = (dn * bits + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS;
- rp = MPZ_REALLOC (r, alloc);
- rn = mpn_set_str_bits (rp, dp, dn, bits);
- }
- else
- {
- struct mpn_base_info info;
- mpn_get_base_info (&info, base);
- alloc = (dn + info.exp - 1) / info.exp;
- rp = MPZ_REALLOC (r, alloc);
- rn = mpn_set_str_other (rp, dp, dn, base, &info);
- /* Normalization, needed for all-zero input. */
- assert (rn > 0);
- rn -= rp[rn-1] == 0;
- }
- assert (rn <= alloc);
- //gmp_free (dp, sn);
-
- r->_mp_size = sign ? - rn : rn;
-
- return 0;
-}
-
-
-
-int
-mpz_init_set_str (mpz_t r, __global char *sp, int base)
-{
- mpz_init (r);
- return mpz_set_str (r, sp, base);
-}
-
-
-// Montgomery multiplication
-
-void mont_prepare(mpz_t b, mpz_t e, mpz_t m,
- mpz_t r, mpz_t r_1,
- mpz_t ni, mpz_t M, mpz_t x
- );
-
-void mont_product(mpz_t ret,
- const mpz_t a, const mpz_t b,
- const mpz_t r, const mpz_t r_1,
- const mpz_t n, const mpz_t ni
- );
-
-void mont_modexp(mpz_t ret,
- mpz_t a, mpz_t e,
- const mpz_t M,
- const mpz_t n, const mpz_t ni,
- const mpz_t r, const mpz_t r_1
- );
-
-void mont_finish(mpz_t ret,
- const mpz_t xx,
- const mpz_t n, const mpz_t ni,
- const mpz_t r, const mpz_t r_1
- );
-
-void mont_prepare_even_modulus(mpz_t m, mpz_t q, mpz_t powj);
-
-void mont_mulmod(mpz_t res, const mpz_t a, const mpz_t b, const mpz_t mod);
-
-
-
-
-void mont_prepare_even_modulus(mpz_t m, mpz_t q, mpz_t powj) {
-
- mpz_t two; // powj == 2^j
-
- mpz_init_set_ui(two, 2);
-
- mp_bitcnt_t j = mpz_scan1(m, 0);
-
- mpz_tdiv_q_2exp(q,m,j);
- mpz_mul_2exp(powj,two,j - 1);
-
- mpz_clear(two);
-
-}
-
-// CPU
-void mont_prepare(mpz_t b, mpz_t e, mpz_t m,
- mpz_t r, mpz_t r_1,
- mpz_t ni, mpz_t M, mpz_t x) {
-
-
- // r and n (modulus) must be relatively prime (this is a given if n (modulus) is odd)
-
- // calculate r, which must be larger than the modulo and also a power of 2
-
- mpz_t one, oo; // some helper variables
- mpz_init_set_si(one,1);
- mpz_init_set_si(oo,0);
-
- //unsigned long len = mpz_sizeinbase(m,2);
-
- unsigned long len = 2048;
-
- mpz_mul_2exp(r,one,len);
-
- mpz_set_si(one, 0);
-
-
- mpz_gcdext(one, r_1, ni, r, m); // set r_1 and ni
-
- int sgn = mpz_sgn(r_1);
-
- mpz_abs(r_1, r_1);
- mpz_abs(ni, ni);
-
- if (sgn == -1) {
- mpz_sub(ni, r, ni);
- mpz_sub(r_1, m, r_1);
- }
-
- if (mpz_cmp_ui(one, 1))
- assert(0);
-
- mpz_mul(one, r, r_1);
- mpz_mul(oo,ni,m);
-
- mpz_sub(one, one, oo); // oo must be one
-
- if (mpz_cmp_ui(one, 1))
- assert(0);
-
- mpz_mul(M, b, r);
- mpz_mod(M, M, m); // set M
-
- mpz_mod(x, r, m); // set x
-
-
-
-}
-
-// maybe GPU?
-// MARK: n MUST be an odd number
-void mont_modexp(mpz_t ret,
- mpz_t a, mpz_t e,
- const mpz_t M,
- const mpz_t n, const mpz_t ni,
- const mpz_t r, const mpz_t r_1
- ) {
-
- mpz_t aa,xx;
-
- mpz_init_set(aa, M);
- mpz_init_set(xx, a);
-
- int k = (int)mpz_sizeinbase(e,2);
-
- for (int i = k - 1; i >= 0; i--) {
-
- mont_product(xx, xx, xx, r, r_1, n, ni);
-
- if (mpz_tstbit(e, i))
- mont_product(xx, aa, xx, r, r_1, n, ni);
-
- }
-
- mpz_set(ret, xx);
-
-}
-
-void mont_finish(mpz_t ret,
- const mpz_t xx,
- const mpz_t n, const mpz_t ni,
- const mpz_t r, const mpz_t r_1
- ) {
-
-
- mpz_t x,one;
-
- mpz_init(x);
- mpz_init_set_ui(one, 1);
-
- mont_product(x, xx, one, r, r_1, n, ni);
-
- mpz_set(ret, x);
-
- mpz_clear(x);
- mpz_clear(one);
-
-}
-
-
-// GPU
-void mont_product(mpz_t ret,
- const mpz_t a, const mpz_t b,
- const mpz_t r, const mpz_t r_1,
- const mpz_t n, const mpz_t ni
- ) {
-
- mpz_t t,m,u;
-
- mpz_init(t);
- mpz_init(m);
- mpz_init(u);
-
-
-
- mont_mulmod(t, b, a, r);
-
- mont_mulmod(m, ni, t, r);
-
- mpz_t ab,mn;
-
- mpz_init(ab);
- mpz_init(mn);
-
- mpz_mul(ab, a, b);
- mpz_mul(mn, m, n);
-
- mpz_add(ab, ab, mn);
-
- unsigned long sz = mpz_sizeinbase(r,2) - 1;
- mpz_tdiv_q_2exp(u, ab, sz); // this is essentially a bit shift, instead of a division
-
- if (mpz_cmp(u, n) >= 0)
- mpz_sub(u, u, n);
-
- mpz_set(ret, u);
-
-
-
-}
-
-// not the fastest... but it does not increase the variable sizes
-void mont_mulmod(mpz_t res, const mpz_t a, const mpz_t b, const mpz_t mod) {
-
- mpz_t aa, bb;
- mpz_init_set(aa, a);
- mpz_init_set(bb,b);
-
- mpz_mod(aa, aa, mod); // in case a is bigger
-
- while (mpz_cmp_ui(bb, 0) > 0) {
- if (mpz_odd_p(bb)) {
- mpz_add(res, res, aa);
- mpz_mod(res, res, mod);
- }
-
- mpz_mul_2exp(aa,aa,1);
- mpz_mod(aa, aa, mod);
- mpz_tdiv_q_2exp(bb, bb, 1);
- }
-}
-
-void printmpz(mpz_t n) {
-
- for (int i = 0; i < n->_mp_size; i++) {
-
- printf((char __constant *)"%lu", n->_mp_d[i]);
-
- }
- printf((char __constant *)"\n\n");
-
-}
-
-__kernel void montgomery(__global void *signature, __global unsigned long *s_offsets,
- __global void *exponent, __global unsigned long *e_offsets,
- __global void *modulus, __global unsigned long *m_offsets,
- __global void *base, __global unsigned long *b_offsets,
- __global unsigned long *valid,
- __global unsigned long *pks,
- unsigned long n)
-{
-
-
- int index = get_global_id(0);
-
- int pk = 0;
-
- while (1) {
- if (pks[pk] >= index)
- break;
- pk++;
- }
-
- mpz_t b,e,m,sig,res;
- mpz_init(res);
-
- mpz_set_lg((unsigned long *)b,&base[b_offsets[index]]); // this is sacrilegious really...
-
- mpz_set_lg((unsigned long *)sig,&signature[s_offsets[index]]);
-
- mpz_set_lg((unsigned long *)e,&exponent[e_offsets[pk]]);
- mpz_set_lg((unsigned long *)m,&modulus[m_offsets[pk]]); // n
-
-
- mpz_t r, r_1, ni, M, x;
- mpz_init(r);
- mpz_init(r_1);
- mpz_init(ni);
- mpz_init(M);
- mpz_init(x);
-
-
- mpz_t xx;
- mpz_init(xx);
-
- // MARK: prepare might not have to run individually on each kernel (prepare might even run on CPU)
- mont_prepare(b, e, m, r, r_1, ni, M, x);
- mont_modexp(xx, x, e, M, m, ni, r, r_1);
- mont_finish(res, xx, m, ni, r, r_1);
-//
- if (mpz_cmp(sig,res) != 0) {
-
- *valid += 1;
-
- }
-
-
-
-
-}