libgpuverify

Signature verification on GPUs (WiP)
Log | Files | Refs | README | LICENSE

commit 422f4db0f24c2630e75ff2c8c1028f6ecfdc21d5
parent e96d6ec827703c8213bc5ebc1321f2a4954166dd
Author: Cedric <cedric.zwahlen@students.bfh.ch>
Date:   Mon,  1 Jan 2024 23:58:51 +0100

Refactor

Diffstat:
Msource/Makefile | 11+++++++++--
Dsource/gmp.c | 4627-------------------------------------------------------------------------------
Dsource/gmp.h | 310-------------------------------------------------------------------------------
Asource/gpu-verify | 0
Asource/gpu-verify.dSYM/Contents/Info.plist | 20++++++++++++++++++++
Asource/gpu-verify.dSYM/Contents/Resources/DWARF/gpu-verify | 0
Asource/gpu-verify.dSYM/Contents/Resources/Relocations/x86_64/gpu-verify.yml | 54++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asource/gpuv-montg.c | 657+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asource/gpuv-montg.cl | 2++
Asource/gpuv-montg.h | 33+++++++++++++++++++++++++++++++++
Asource/gpuv-ref.c | 257+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asource/gpuv-ref.h | 16++++++++++++++++
Asource/gpuv.c | 622+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asource/gpuv.cl | 2++
Asource/gpuv.h | 30++++++++++++++++++++++++++++++
Msource/lib-gpu-verify.c | 15+++++++--------
Dsource/montgomery-test.c | 375-------------------------------------------------------------------------------
Dsource/montgomery-test.h | 15---------------
Dsource/montgomery.c | 431-------------------------------------------------------------------------------
Dsource/montgomery.cl | 2--
Dsource/montgomery.h | 27---------------------------
Dsource/montmodmult.c | 662-------------------------------------------------------------------------------
Dsource/montmodmult.cl | 2--
Dsource/montmodmult.h | 20--------------------
Dsource/reference-test.c | 260-------------------------------------------------------------------------------
Dsource/reference-test.h | 15---------------
Dsource/rsa-test.c | 639-------------------------------------------------------------------------------
Dsource/rsa-test.h | 33---------------------------------
Msource/util.c | 4++--
Msource/util.h | 9++-------
Mxcode/.DS_Store | 0
Rxcode/montmodmult.cl -> xcode/gpuv-montg.cl | 0
Rxcode/verify.cl -> xcode/gpuv.cl | 0
Mxcode/lib-gpu-generate/main.c | 4++--
Mxcode/lib-gpu-verify.xcodeproj/project.pbxproj | 78+++++++++++++++++++++++++++---------------------------------------------------
Mxcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcuserdata/cedriczwahlen.xcuserdatad/UserInterfaceState.xcuserstate | 0
Mxcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist | 256++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Dxcode/montgomery.cl | 2954-------------------------------------------------------------------------------
38 files changed, 1885 insertions(+), 10557 deletions(-)

diff --git a/source/Makefile b/source/Makefile @@ -1,3 +1,10 @@ -all: - gcc -g -O0 -D CL_TARGET_OPENCL_VERSION=100 -o gpu-verify lib-gpu-verify.c rsa-test.c montgomery-test.c reference-test.c montmodmult.c util.c gmp.c -lgcrypt -lgmp -lOpenCL -lm +linux: + gcc -g -O0 -D CL_TARGET_OPENCL_VERSION=100 -o gpu-verify lib-gpu-verify.c gpuv.c gpuv-ref.c gpuv-montg.c util.c -lgmp -lgcrypt -lOpenCL -lm + +windows: + gcc -g -O0 -D CL_TARGET_OPENCL_VERSION=100 -o gpu-verify lib-gpu-verify.c gpuv.c gpuv-ref.c gpuv-montg.c util.c -lgmp -lgcrypt -lOpenCL -lm + +macos: + gcc -g -O0 -D CL_TARGET_OPENCL_VERSION=100 -o gpu-verify lib-gpu-verify.c gpuv.c gpuv-ref.c gpuv-montg.c util.c -lgmp -lgcrypt -framework OpenCL -lm + diff --git a/source/gmp.c b/source/gmp.c @@ -1,4627 +0,0 @@ -/* mini-gmp, a minimalistic implementation of a GNU GMP subset. - - Contributed to the GNU project by Niels Möller - Additional functionalities and improvements by Marco Bodrato. - -Copyright 1991-1997, 1999-2022 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -/* NOTE: All functions in this file which are not declared in - mini-gmp.h are internal, and are not intended to be compatible - with GMP or with future versions of mini-gmp. */ - -/* Much of the material copied from GMP files, including: gmp-impl.h, - longlong.h, mpn/generic/add_n.c, mpn/generic/addmul_1.c, - mpn/generic/lshift.c, mpn/generic/mul_1.c, - mpn/generic/mul_basecase.c, mpn/generic/rshift.c, - mpn/generic/sbpi1_div_qr.c, mpn/generic/sub_n.c, - mpn/generic/submul_1.c. */ - -#include <assert.h> -#include <ctype.h> -#include <limits.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include "gmp.h" - -#if !defined(MINI_GMP_DONT_USE_FLOAT_H) -#include <float.h> -#endif - - -/* Macros */ -#define GMP_LIMB_BITS (sizeof(mp_limb_t) * CHAR_BIT) - -#define GMP_LIMB_MAX ((mp_limb_t) ~ (mp_limb_t) 0) -#define GMP_LIMB_HIGHBIT ((mp_limb_t) 1 << (GMP_LIMB_BITS - 1)) - -#define GMP_HLIMB_BIT ((mp_limb_t) 1 << (GMP_LIMB_BITS / 2)) -#define GMP_LLIMB_MASK (GMP_HLIMB_BIT - 1) - -#define GMP_ULONG_BITS (sizeof(unsigned long) * CHAR_BIT) -#define GMP_ULONG_HIGHBIT ((unsigned long) 1 << (GMP_ULONG_BITS - 1)) - -#define GMP_ABS(x) ((x) >= 0 ? (x) : -(x)) -#define GMP_NEG_CAST(T,x) (-((T)((x) + 1) - 1)) - -#define GMP_MIN(a, b) ((a) < (b) ? (a) : (b)) -#define GMP_MAX(a, b) ((a) > (b) ? (a) : (b)) - -#define GMP_CMP(a,b) (((a) > (b)) - ((a) < (b))) - -#if defined(DBL_MANT_DIG) && FLT_RADIX == 2 -#define GMP_DBL_MANT_BITS DBL_MANT_DIG -#else -#define GMP_DBL_MANT_BITS (53) -#endif - -/* Return non-zero if xp,xsize and yp,ysize overlap. - If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no - overlap. If both these are false, there's an overlap. */ -#define GMP_MPN_OVERLAP_P(xp, xsize, yp, ysize) \ - ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp)) - -#define gmp_assert_nocarry(x) do { \ - mp_limb_t __cy = (x); \ - assert (__cy == 0); \ - (void) (__cy); \ - } while (0) - -#define gmp_clz(count, x) do { \ - mp_limb_t __clz_x = (x); \ - unsigned __clz_c = 0; \ - int LOCAL_SHIFT_BITS = 8; \ - if (GMP_LIMB_BITS > LOCAL_SHIFT_BITS) \ - for (; \ - (__clz_x & ((mp_limb_t) 0xff << (GMP_LIMB_BITS - 8))) == 0; \ - __clz_c += 8) \ - { __clz_x <<= LOCAL_SHIFT_BITS; } \ - for (; (__clz_x & GMP_LIMB_HIGHBIT) == 0; __clz_c++) \ - __clz_x <<= 1; \ - (count) = __clz_c; \ - } while (0) - -#define gmp_ctz(count, x) do { \ - mp_limb_t __ctz_x = (x); \ - unsigned __ctz_c = 0; \ - gmp_clz (__ctz_c, __ctz_x & - __ctz_x); \ - (count) = GMP_LIMB_BITS - 1 - __ctz_c; \ - } while (0) - -#define gmp_add_ssaaaa(sh, sl, ah, al, bh, bl) \ - do { \ - mp_limb_t __x; \ - __x = (al) + (bl); \ - (sh) = (ah) + (bh) + (__x < (al)); \ - (sl) = __x; \ - } while (0) - -#define gmp_sub_ddmmss(sh, sl, ah, al, bh, bl) \ - do { \ - mp_limb_t __x; \ - __x = (al) - (bl); \ - (sh) = (ah) - (bh) - ((al) < (bl)); \ - (sl) = __x; \ - } while (0) - -#define gmp_umul_ppmm(w1, w0, u, v) \ - do { \ - int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS; \ - if (sizeof(unsigned int) * CHAR_BIT >= 2 * GMP_LIMB_BITS) \ - { \ - unsigned int __ww = (unsigned int) (u) * (v); \ - w0 = (mp_limb_t) __ww; \ - w1 = (mp_limb_t) (__ww >> LOCAL_GMP_LIMB_BITS); \ - } \ - else if (GMP_ULONG_BITS >= 2 * GMP_LIMB_BITS) \ - { \ - unsigned long int __ww = (unsigned long int) (u) * (v); \ - w0 = (mp_limb_t) __ww; \ - w1 = (mp_limb_t) (__ww >> LOCAL_GMP_LIMB_BITS); \ - } \ - else { \ - mp_limb_t __x0, __x1, __x2, __x3; \ - unsigned __ul, __vl, __uh, __vh; \ - mp_limb_t __u = (u), __v = (v); \ - assert (sizeof (unsigned) * 2 >= sizeof (mp_limb_t)); \ - \ - __ul = __u & GMP_LLIMB_MASK; \ - __uh = __u >> (GMP_LIMB_BITS / 2); \ - __vl = __v & GMP_LLIMB_MASK; \ - __vh = __v >> (GMP_LIMB_BITS / 2); \ - \ - __x0 = (mp_limb_t) __ul * __vl; \ - __x1 = (mp_limb_t) __ul * __vh; \ - __x2 = (mp_limb_t) __uh * __vl; \ - __x3 = (mp_limb_t) __uh * __vh; \ - \ - __x1 += __x0 >> (GMP_LIMB_BITS / 2);/* this can't give carry */ \ - __x1 += __x2; /* but this indeed can */ \ - if (__x1 < __x2) /* did we get it? */ \ - __x3 += GMP_HLIMB_BIT; /* yes, add it in the proper pos. */ \ - \ - (w1) = __x3 + (__x1 >> (GMP_LIMB_BITS / 2)); \ - (w0) = (__x1 << (GMP_LIMB_BITS / 2)) + (__x0 & GMP_LLIMB_MASK); \ - } \ - } while (0) - -/* If mp_limb_t is of size smaller than int, plain u*v implies - automatic promotion to *signed* int, and then multiply may overflow - and cause undefined behavior. Explicitly cast to unsigned int for - that case. */ -#define gmp_umullo_limb(u, v) \ - ((sizeof(mp_limb_t) >= sizeof(int)) ? (u)*(v) : (unsigned int)(u) * (v)) - -#define gmp_udiv_qrnnd_preinv(q, r, nh, nl, d, di) \ - do { \ - mp_limb_t _qh, _ql, _r, _mask; \ - gmp_umul_ppmm (_qh, _ql, (nh), (di)); \ - gmp_add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \ - _r = (nl) - gmp_umullo_limb (_qh, (d)); \ - _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \ - _qh += _mask; \ - _r += _mask & (d); \ - if (_r >= (d)) \ - { \ - _r -= (d); \ - _qh++; \ - } \ - \ - (r) = _r; \ - (q) = _qh; \ - } while (0) - -#define gmp_udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv) \ - do { \ - mp_limb_t _q0, _t1, _t0, _mask; \ - gmp_umul_ppmm ((q), _q0, (n2), (dinv)); \ - gmp_add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1)); \ - \ - /* Compute the two most significant limbs of n - q'd */ \ - (r1) = (n1) - gmp_umullo_limb ((d1), (q)); \ - gmp_sub_ddmmss ((r1), (r0), (r1), (n0), (d1), (d0)); \ - gmp_umul_ppmm (_t1, _t0, (d0), (q)); \ - gmp_sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0); \ - (q)++; \ - \ - /* Conditionally adjust q and the remainders */ \ - _mask = - (mp_limb_t) ((r1) >= _q0); \ - (q) += _mask; \ - gmp_add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0)); \ - if ((r1) >= (d1)) \ - { \ - if ((r1) > (d1) || (r0) >= (d0)) \ - { \ - (q)++; \ - gmp_sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0)); \ - } \ - } \ - } while (0) - -/* Swap macros. */ -#define MP_LIMB_T_SWAP(x, y) \ - do { \ - mp_limb_t __mp_limb_t_swap__tmp = (x); \ - (x) = (y); \ - (y) = __mp_limb_t_swap__tmp; \ - } while (0) -#define MP_SIZE_T_SWAP(x, y) \ - do { \ - mp_size_t __mp_size_t_swap__tmp = (x); \ - (x) = (y); \ - (y) = __mp_size_t_swap__tmp; \ - } while (0) -#define MP_BITCNT_T_SWAP(x,y) \ - do { \ - mp_bitcnt_t __mp_bitcnt_t_swap__tmp = (x); \ - (x) = (y); \ - (y) = __mp_bitcnt_t_swap__tmp; \ - } while (0) -#define MP_PTR_SWAP(x, y) \ - do { \ - mp_ptr __mp_ptr_swap__tmp = (x); \ - (x) = (y); \ - (y) = __mp_ptr_swap__tmp; \ - } while (0) -#define MP_SRCPTR_SWAP(x, y) \ - do { \ - mp_srcptr __mp_srcptr_swap__tmp = (x); \ - (x) = (y); \ - (y) = __mp_srcptr_swap__tmp; \ - } while (0) - -#define MPN_PTR_SWAP(xp,xs, yp,ys) \ - do { \ - MP_PTR_SWAP (xp, yp); \ - MP_SIZE_T_SWAP (xs, ys); \ - } while(0) -#define MPN_SRCPTR_SWAP(xp,xs, yp,ys) \ - do { \ - MP_SRCPTR_SWAP (xp, yp); \ - MP_SIZE_T_SWAP (xs, ys); \ - } while(0) - -#define MPZ_PTR_SWAP(x, y) \ - do { \ - mpz_ptr __mpz_ptr_swap__tmp = (x); \ - (x) = (y); \ - (y) = __mpz_ptr_swap__tmp; \ - } while (0) -#define MPZ_SRCPTR_SWAP(x, y) \ - do { \ - mpz_srcptr __mpz_srcptr_swap__tmp = (x); \ - (x) = (y); \ - (y) = __mpz_srcptr_swap__tmp; \ - } while (0) - -const int mp_bits_per_limb = GMP_LIMB_BITS; - - -/* Memory allocation and other helper functions. */ -static void -gmp_die (const char *msg) -{ - fprintf (stderr, "%s\n", msg); - abort(); -} - -static void * -gmp_default_alloc (size_t size) -{ - void *p; - - assert (size > 0); - - p = malloc (size); - if (!p) - gmp_die("gmp_default_alloc: Virtual memory exhausted."); - - return p; -} - -static void * -gmp_default_realloc (void *old, size_t unused_old_size, size_t new_size) -{ - void * p; - - p = realloc (old, new_size); - - if (!p) - gmp_die("gmp_default_realloc: Virtual memory exhausted."); - - return p; -} - -static void -gmp_default_free (void *p, size_t unused_size) -{ - free (p); -} - -static void * (*gmp_allocate_func) (size_t) = gmp_default_alloc; -static void * (*gmp_reallocate_func) (void *, size_t, size_t) = gmp_default_realloc; -static void (*gmp_free_func) (void *, size_t) = gmp_default_free; - -void -mp_get_memory_functions (void *(**alloc_func) (size_t), - void *(**realloc_func) (void *, size_t, size_t), - void (**free_func) (void *, size_t)) -{ - if (alloc_func) - *alloc_func = gmp_allocate_func; - - if (realloc_func) - *realloc_func = gmp_reallocate_func; - - if (free_func) - *free_func = gmp_free_func; -} - -void -mp_set_memory_functions (void *(*alloc_func) (size_t), - void *(*realloc_func) (void *, size_t, size_t), - void (*free_func) (void *, size_t)) -{ - if (!alloc_func) - alloc_func = gmp_default_alloc; - if (!realloc_func) - realloc_func = gmp_default_realloc; - if (!free_func) - free_func = gmp_default_free; - - gmp_allocate_func = alloc_func; - gmp_reallocate_func = realloc_func; - gmp_free_func = free_func; -} - -#define gmp_alloc(size) ((*gmp_allocate_func)((size))) -#define gmp_free(p, size) ((*gmp_free_func) ((p), (size))) -#define gmp_realloc(ptr, old_size, size) ((*gmp_reallocate_func)(ptr, old_size, size)) - -static mp_ptr -gmp_alloc_limbs (mp_size_t size) -{ - return (mp_ptr) gmp_alloc (size * sizeof (mp_limb_t)); -} - -static mp_ptr -gmp_realloc_limbs (mp_ptr old, mp_size_t old_size, mp_size_t size) -{ - assert (size > 0); - return (mp_ptr) gmp_realloc (old, old_size * sizeof (mp_limb_t), size * sizeof (mp_limb_t)); -} - -static void -gmp_free_limbs (mp_ptr old, mp_size_t size) -{ - gmp_free (old, size * sizeof (mp_limb_t)); -} - - -/* MPN interface */ - -void -mpn_copyi (mp_ptr d, mp_srcptr s, mp_size_t n) -{ - mp_size_t i; - for (i = 0; i < n; i++) - d[i] = s[i]; -} - -void -mpn_copyd (mp_ptr d, mp_srcptr s, mp_size_t n) -{ - while (--n >= 0) - d[n] = s[n]; -} - -int -mpn_cmp (mp_srcptr ap, mp_srcptr bp, mp_size_t n) -{ - while (--n >= 0) - { - if (ap[n] != bp[n]) - return ap[n] > bp[n] ? 1 : -1; - } - return 0; -} - -static int -mpn_cmp4 (mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn) -{ - if (an != bn) - return an < bn ? -1 : 1; - else - return mpn_cmp (ap, bp, an); -} - -static mp_size_t -mpn_normalized_size (mp_srcptr xp, mp_size_t n) -{ - while (n > 0 && xp[n-1] == 0) - --n; - return n; -} - -int -mpn_zero_p(mp_srcptr rp, mp_size_t n) -{ - return mpn_normalized_size (rp, n) == 0; -} - -void -mpn_zero (mp_ptr rp, mp_size_t n) -{ - while (--n >= 0) - rp[n] = 0; -} - -mp_limb_t -mpn_add_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b) -{ - mp_size_t i; - - assert (n > 0); - i = 0; - do - { - mp_limb_t r = ap[i] + b; - /* Carry out */ - b = (r < b); - rp[i] = r; - } - while (++i < n); - - return b; -} - -mp_limb_t -mpn_add_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n) -{ - mp_size_t i; - mp_limb_t cy; - - for (i = 0, cy = 0; i < n; i++) - { - mp_limb_t a, b, r; - a = ap[i]; b = bp[i]; - r = a + cy; - cy = (r < cy); - r += b; - cy += (r < b); - rp[i] = r; - } - return cy; -} - -mp_limb_t -mpn_add (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn) -{ - mp_limb_t cy; - - assert (an >= bn); - - cy = mpn_add_n (rp, ap, bp, bn); - if (an > bn) - cy = mpn_add_1 (rp + bn, ap + bn, an - bn, cy); - return cy; -} - -mp_limb_t -mpn_sub_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b) -{ - mp_size_t i; - - assert (n > 0); - - i = 0; - do - { - mp_limb_t a = ap[i]; - /* Carry out */ - mp_limb_t cy = a < b; - rp[i] = a - b; - b = cy; - } - while (++i < n); - - return b; -} - -mp_limb_t -mpn_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n) -{ - mp_size_t i; - mp_limb_t cy; - - for (i = 0, cy = 0; i < n; i++) - { - mp_limb_t a, b; - a = ap[i]; b = bp[i]; - b += cy; - cy = (b < cy); - cy += (a < b); - rp[i] = a - b; - } - return cy; -} - -mp_limb_t -mpn_sub (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn) -{ - mp_limb_t cy; - - assert (an >= bn); - - cy = mpn_sub_n (rp, ap, bp, bn); - if (an > bn) - cy = mpn_sub_1 (rp + bn, ap + bn, an - bn, cy); - return cy; -} - -mp_limb_t -mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl) -{ - mp_limb_t ul, cl, hpl, lpl; - - assert (n >= 1); - - cl = 0; - do - { - ul = *up++; - gmp_umul_ppmm (hpl, lpl, ul, vl); - - lpl += cl; - cl = (lpl < cl) + hpl; - - *rp++ = lpl; - } - while (--n != 0); - - return cl; -} - -mp_limb_t -mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl) -{ - mp_limb_t ul, cl, hpl, lpl, rl; - - assert (n >= 1); - - cl = 0; - do - { - ul = *up++; - gmp_umul_ppmm (hpl, lpl, ul, vl); - - lpl += cl; - cl = (lpl < cl) + hpl; - - rl = *rp; - lpl = rl + lpl; - cl += lpl < rl; - *rp++ = lpl; - } - while (--n != 0); - - return cl; -} - -mp_limb_t -mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl) -{ - mp_limb_t ul, cl, hpl, lpl, rl; - - assert (n >= 1); - - cl = 0; - do - { - ul = *up++; - gmp_umul_ppmm (hpl, lpl, ul, vl); - - lpl += cl; - cl = (lpl < cl) + hpl; - - rl = *rp; - lpl = rl - lpl; - cl += lpl > rl; - *rp++ = lpl; - } - while (--n != 0); - - return cl; -} - -mp_limb_t -mpn_mul (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn) -{ - assert (un >= vn); - assert (vn >= 1); - assert (!GMP_MPN_OVERLAP_P(rp, un + vn, up, un)); - assert (!GMP_MPN_OVERLAP_P(rp, un + vn, vp, vn)); - - /* We first multiply by the low order limb. This result can be - stored, not added, to rp. We also avoid a loop for zeroing this - way. */ - - rp[un] = mpn_mul_1 (rp, up, un, vp[0]); - - /* Now accumulate the product of up[] and the next higher limb from - vp[]. */ - - while (--vn >= 1) - { - rp += 1, vp += 1; - rp[un] = mpn_addmul_1 (rp, up, un, vp[0]); - } - return rp[un]; -} - -void -mpn_mul_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n) -{ - mpn_mul (rp, ap, n, bp, n); -} - -void -mpn_sqr (mp_ptr rp, mp_srcptr ap, mp_size_t n) -{ - mpn_mul (rp, ap, n, ap, n); -} - -mp_limb_t -mpn_lshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt) -{ - mp_limb_t high_limb, low_limb; - unsigned int tnc; - mp_limb_t retval; - - assert (n >= 1); - assert (cnt >= 1); - assert (cnt < GMP_LIMB_BITS); - - up += n; - rp += n; - - tnc = GMP_LIMB_BITS - cnt; - low_limb = *--up; - retval = low_limb >> tnc; - high_limb = (low_limb << cnt); - - while (--n != 0) - { - low_limb = *--up; - *--rp = high_limb | (low_limb >> tnc); - high_limb = (low_limb << cnt); - } - *--rp = high_limb; - - return retval; -} - -mp_limb_t -mpn_rshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt) -{ - mp_limb_t high_limb, low_limb; - unsigned int tnc; - mp_limb_t retval; - - assert (n >= 1); - assert (cnt >= 1); - assert (cnt < GMP_LIMB_BITS); - - tnc = GMP_LIMB_BITS - cnt; - high_limb = *up++; - retval = (high_limb << tnc); - low_limb = high_limb >> cnt; - - while (--n != 0) - { - high_limb = *up++; - *rp++ = low_limb | (high_limb << tnc); - low_limb = high_limb >> cnt; - } - *rp = low_limb; - - return retval; -} - -static mp_bitcnt_t -mpn_common_scan (mp_limb_t limb, mp_size_t i, mp_srcptr up, mp_size_t un, - mp_limb_t ux) -{ - unsigned cnt; - - assert (ux == 0 || ux == GMP_LIMB_MAX); - assert (0 <= i && i <= un ); - - while (limb == 0) - { - i++; - if (i == un) - return (ux == 0 ? ~(mp_bitcnt_t) 0 : un * GMP_LIMB_BITS); - limb = ux ^ up[i]; - } - gmp_ctz (cnt, limb); - return (mp_bitcnt_t) i * GMP_LIMB_BITS + cnt; -} - -mp_bitcnt_t -mpn_scan1 (mp_srcptr ptr, mp_bitcnt_t bit) -{ - mp_size_t i; - i = bit / GMP_LIMB_BITS; - - return mpn_common_scan ( ptr[i] & (GMP_LIMB_MAX << (bit % GMP_LIMB_BITS)), - i, ptr, i, 0); -} - -mp_bitcnt_t -mpn_scan0 (mp_srcptr ptr, mp_bitcnt_t bit) -{ - mp_size_t i; - i = bit / GMP_LIMB_BITS; - - return mpn_common_scan (~ptr[i] & (GMP_LIMB_MAX << (bit % GMP_LIMB_BITS)), - i, ptr, i, GMP_LIMB_MAX); -} - -void -mpn_com (mp_ptr rp, mp_srcptr up, mp_size_t n) -{ - while (--n >= 0) - *rp++ = ~ *up++; -} - -mp_limb_t -mpn_neg (mp_ptr rp, mp_srcptr up, mp_size_t n) -{ - while (*up == 0) - { - *rp = 0; - if (!--n) - return 0; - ++up; ++rp; - } - *rp = - *up; - mpn_com (++rp, ++up, --n); - return 1; -} - - -/* MPN division interface. */ - -/* The 3/2 inverse is defined as - - m = floor( (B^3-1) / (B u1 + u0)) - B -*/ -mp_limb_t -mpn_invert_3by2 (mp_limb_t u1, mp_limb_t u0) -{ - mp_limb_t r, m; - - { - mp_limb_t p, ql; - unsigned ul, uh, qh; - - assert (sizeof (unsigned) * 2 >= sizeof (mp_limb_t)); - /* For notation, let b denote the half-limb base, so that B = b^2. - Split u1 = b uh + ul. */ - ul = u1 & GMP_LLIMB_MASK; - uh = u1 >> (GMP_LIMB_BITS / 2); - - /* Approximation of the high half of quotient. Differs from the 2/1 - inverse of the half limb uh, since we have already subtracted - u0. */ - qh = (u1 ^ GMP_LIMB_MAX) / uh; - - /* Adjust to get a half-limb 3/2 inverse, i.e., we want - - qh' = floor( (b^3 - 1) / u) - b = floor ((b^3 - b u - 1) / u - = floor( (b (~u) + b-1) / u), - - and the remainder - - r = b (~u) + b-1 - qh (b uh + ul) - = b (~u - qh uh) + b-1 - qh ul - - Subtraction of qh ul may underflow, which implies adjustments. - But by normalization, 2 u >= B > qh ul, so we need to adjust by - at most 2. - */ - - r = ((~u1 - (mp_limb_t) qh * uh) << (GMP_LIMB_BITS / 2)) | GMP_LLIMB_MASK; - - p = (mp_limb_t) qh * ul; - /* Adjustment steps taken from udiv_qrnnd_c */ - if (r < p) - { - qh--; - r += u1; - if (r >= u1) /* i.e. we didn't get carry when adding to r */ - if (r < p) - { - qh--; - r += u1; - } - } - r -= p; - - /* Low half of the quotient is - - ql = floor ( (b r + b-1) / u1). - - This is a 3/2 division (on half-limbs), for which qh is a - suitable inverse. */ - - p = (r >> (GMP_LIMB_BITS / 2)) * qh + r; - /* Unlike full-limb 3/2, we can add 1 without overflow. For this to - work, it is essential that ql is a full mp_limb_t. */ - ql = (p >> (GMP_LIMB_BITS / 2)) + 1; - - /* By the 3/2 trick, we don't need the high half limb. */ - r = (r << (GMP_LIMB_BITS / 2)) + GMP_LLIMB_MASK - ql * u1; - - if (r >= (GMP_LIMB_MAX & (p << (GMP_LIMB_BITS / 2)))) - { - ql--; - r += u1; - } - m = ((mp_limb_t) qh << (GMP_LIMB_BITS / 2)) + ql; - if (r >= u1) - { - m++; - r -= u1; - } - } - - /* Now m is the 2/1 inverse of u1. If u0 > 0, adjust it to become a - 3/2 inverse. */ - if (u0 > 0) - { - mp_limb_t th, tl; - r = ~r; - r += u0; - if (r < u0) - { - m--; - if (r >= u1) - { - m--; - r -= u1; - } - r -= u1; - } - gmp_umul_ppmm (th, tl, u0, m); - r += th; - if (r < th) - { - m--; - m -= ((r > u1) | ((r == u1) & (tl > u0))); - } - } - - return m; -} - -struct gmp_div_inverse -{ - /* Normalization shift count. */ - unsigned shift; - /* Normalized divisor (d0 unused for mpn_div_qr_1) */ - mp_limb_t d1, d0; - /* Inverse, for 2/1 or 3/2. */ - mp_limb_t di; -}; - -static void -mpn_div_qr_1_invert (struct gmp_div_inverse *inv, mp_limb_t d) -{ - unsigned shift; - - assert (d > 0); - gmp_clz (shift, d); - inv->shift = shift; - inv->d1 = d << shift; - inv->di = mpn_invert_limb (inv->d1); -} - -static void -mpn_div_qr_2_invert (struct gmp_div_inverse *inv, - mp_limb_t d1, mp_limb_t d0) -{ - unsigned shift; - - assert (d1 > 0); - gmp_clz (shift, d1); - inv->shift = shift; - if (shift > 0) - { - d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift)); - d0 <<= shift; - } - inv->d1 = d1; - inv->d0 = d0; - inv->di = mpn_invert_3by2 (d1, d0); -} - -static void -mpn_div_qr_invert (struct gmp_div_inverse *inv, - mp_srcptr dp, mp_size_t dn) -{ - assert (dn > 0); - - if (dn == 1) - mpn_div_qr_1_invert (inv, dp[0]); - else if (dn == 2) - mpn_div_qr_2_invert (inv, dp[1], dp[0]); - else - { - unsigned shift; - mp_limb_t d1, d0; - - d1 = dp[dn-1]; - d0 = dp[dn-2]; - assert (d1 > 0); - gmp_clz (shift, d1); - inv->shift = shift; - if (shift > 0) - { - d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift)); - d0 = (d0 << shift) | (dp[dn-3] >> (GMP_LIMB_BITS - shift)); - } - inv->d1 = d1; - inv->d0 = d0; - inv->di = mpn_invert_3by2 (d1, d0); - } -} - -/* Not matching current public gmp interface, rather corresponding to - the sbpi1_div_* functions. */ -static mp_limb_t -mpn_div_qr_1_preinv (mp_ptr qp, mp_srcptr np, mp_size_t nn, - const struct gmp_div_inverse *inv) -{ - mp_limb_t d, di; - mp_limb_t r; - mp_ptr tp = NULL; - mp_size_t tn = 0; - - if (inv->shift > 0) - { - /* Shift, reusing qp area if possible. In-place shift if qp == np. */ - tp = qp; - if (!tp) - { - tn = nn; - tp = gmp_alloc_limbs (tn); - } - r = mpn_lshift (tp, np, nn, inv->shift); - np = tp; - } - else - r = 0; - - d = inv->d1; - di = inv->di; - while (--nn >= 0) - { - mp_limb_t q; - - gmp_udiv_qrnnd_preinv (q, r, r, np[nn], d, di); - if (qp) - qp[nn] = q; - } - if (tn) - gmp_free_limbs (tp, tn); - - return r >> inv->shift; -} - -static void -mpn_div_qr_2_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn, - const struct gmp_div_inverse *inv) -{ - unsigned shift; - mp_size_t i; - mp_limb_t d1, d0, di, r1, r0; - - assert (nn >= 2); - shift = inv->shift; - d1 = inv->d1; - d0 = inv->d0; - di = inv->di; - - if (shift > 0) - r1 = mpn_lshift (np, np, nn, shift); - else - r1 = 0; - - r0 = np[nn - 1]; - - i = nn - 2; - do - { - mp_limb_t n0, q; - n0 = np[i]; - gmp_udiv_qr_3by2 (q, r1, r0, r1, r0, n0, d1, d0, di); - - if (qp) - qp[i] = q; - } - while (--i >= 0); - - if (shift > 0) - { - assert ((r0 & (GMP_LIMB_MAX >> (GMP_LIMB_BITS - shift))) == 0); - r0 = (r0 >> shift) | (r1 << (GMP_LIMB_BITS - shift)); - r1 >>= shift; - } - - np[1] = r1; - np[0] = r0; -} - -static void -mpn_div_qr_pi1 (mp_ptr qp, - mp_ptr np, mp_size_t nn, mp_limb_t n1, - mp_srcptr dp, mp_size_t dn, - mp_limb_t dinv) -{ - mp_size_t i; - - mp_limb_t d1, d0; - mp_limb_t cy, cy1; - mp_limb_t q; - - assert (dn > 2); - assert (nn >= dn); - - d1 = dp[dn - 1]; - d0 = dp[dn - 2]; - - assert ((d1 & GMP_LIMB_HIGHBIT) != 0); - /* Iteration variable is the index of the q limb. - * - * We divide <n1, np[dn-1+i], np[dn-2+i], np[dn-3+i],..., np[i]> - * by <d1, d0, dp[dn-3], ..., dp[0] > - */ - - i = nn - dn; - do - { - mp_limb_t n0 = np[dn-1+i]; - - if (n1 == d1 && n0 == d0) - { - q = GMP_LIMB_MAX; - mpn_submul_1 (np+i, dp, dn, q); - n1 = np[dn-1+i]; /* update n1, last loop's value will now be invalid */ - } - else - { - gmp_udiv_qr_3by2 (q, n1, n0, n1, n0, np[dn-2+i], d1, d0, dinv); - - cy = mpn_submul_1 (np + i, dp, dn-2, q); - - cy1 = n0 < cy; - n0 = n0 - cy; - cy = n1 < cy1; - n1 = n1 - cy1; - np[dn-2+i] = n0; - - if (cy != 0) - { - n1 += d1 + mpn_add_n (np + i, np + i, dp, dn - 1); - q--; - } - } - - if (qp) - qp[i] = q; - } - while (--i >= 0); - - np[dn - 1] = n1; -} - -static void -mpn_div_qr_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn, - mp_srcptr dp, mp_size_t dn, - const struct gmp_div_inverse *inv) -{ - assert (dn > 0); - assert (nn >= dn); - - if (dn == 1) - np[0] = mpn_div_qr_1_preinv (qp, np, nn, inv); - else if (dn == 2) - mpn_div_qr_2_preinv (qp, np, nn, inv); - else - { - mp_limb_t nh; - unsigned shift; - - assert (inv->d1 == dp[dn-1]); - assert (inv->d0 == dp[dn-2]); - assert ((inv->d1 & GMP_LIMB_HIGHBIT) != 0); - - shift = inv->shift; - if (shift > 0) - nh = mpn_lshift (np, np, nn, shift); - else - nh = 0; - - mpn_div_qr_pi1 (qp, np, nn, nh, dp, dn, inv->di); - - if (shift > 0) - gmp_assert_nocarry (mpn_rshift (np, np, dn, shift)); - } -} - -static void -mpn_div_qr (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn) -{ - struct gmp_div_inverse inv; - mp_ptr tp = NULL; - - assert (dn > 0); - assert (nn >= dn); - - mpn_div_qr_invert (&inv, dp, dn); - if (dn > 2 && inv.shift > 0) - { - tp = gmp_alloc_limbs (dn); - gmp_assert_nocarry (mpn_lshift (tp, dp, dn, inv.shift)); - dp = tp; - } - mpn_div_qr_preinv (qp, np, nn, dp, dn, &inv); - if (tp) - gmp_free_limbs (tp, dn); -} - - -/* MPN base conversion. */ -static unsigned -mpn_base_power_of_two_p (unsigned b) -{ - switch (b) - { - case 2: return 1; - case 4: return 2; - case 8: return 3; - case 16: return 4; - case 32: return 5; - case 64: return 6; - case 128: return 7; - case 256: return 8; - default: return 0; - } -} - -struct mpn_base_info -{ - /* bb is the largest power of the base which fits in one limb, and - exp is the corresponding exponent. */ - unsigned exp; - mp_limb_t bb; -}; - -static void -mpn_get_base_info (struct mpn_base_info *info, mp_limb_t b) -{ - mp_limb_t m; - mp_limb_t p; - unsigned exp; - - m = GMP_LIMB_MAX / b; - for (exp = 1, p = b; p <= m; exp++) - p *= b; - - info->exp = exp; - info->bb = p; -} - -static mp_bitcnt_t -mpn_limb_size_in_base_2 (mp_limb_t u) -{ - unsigned shift; - - assert (u > 0); - gmp_clz (shift, u); - return GMP_LIMB_BITS - shift; -} - -static size_t -mpn_get_str_bits (unsigned char *sp, unsigned bits, mp_srcptr up, mp_size_t un) -{ - unsigned char mask; - size_t sn, j; - mp_size_t i; - unsigned shift; - - sn = ((un - 1) * GMP_LIMB_BITS + mpn_limb_size_in_base_2 (up[un-1]) - + bits - 1) / bits; - - mask = (1U << bits) - 1; - - for (i = 0, j = sn, shift = 0; j-- > 0;) - { - unsigned char digit = up[i] >> shift; - - shift += bits; - - if (shift >= GMP_LIMB_BITS && ++i < un) - { - shift -= GMP_LIMB_BITS; - digit |= up[i] << (bits - shift); - } - sp[j] = digit & mask; - } - return sn; -} - -/* We generate digits from the least significant end, and reverse at - the end. */ -static size_t -mpn_limb_get_str (unsigned char *sp, mp_limb_t w, - const struct gmp_div_inverse *binv) -{ - mp_size_t i; - for (i = 0; w > 0; i++) - { - mp_limb_t h, l, r; - - h = w >> (GMP_LIMB_BITS - binv->shift); - l = w << binv->shift; - - gmp_udiv_qrnnd_preinv (w, r, h, l, binv->d1, binv->di); - assert ((r & (GMP_LIMB_MAX >> (GMP_LIMB_BITS - binv->shift))) == 0); - r >>= binv->shift; - - sp[i] = r; - } - return i; -} - -static size_t -mpn_get_str_other (unsigned char *sp, - int base, const struct mpn_base_info *info, - mp_ptr up, mp_size_t un) -{ - struct gmp_div_inverse binv; - size_t sn; - size_t i; - - mpn_div_qr_1_invert (&binv, base); - - sn = 0; - - if (un > 1) - { - struct gmp_div_inverse bbinv; - mpn_div_qr_1_invert (&bbinv, info->bb); - - do - { - mp_limb_t w; - size_t done; - w = mpn_div_qr_1_preinv (up, up, un, &bbinv); - un -= (up[un-1] == 0); - done = mpn_limb_get_str (sp + sn, w, &binv); - - for (sn += done; done < info->exp; done++) - sp[sn++] = 0; - } - while (un > 1); - } - sn += mpn_limb_get_str (sp + sn, up[0], &binv); - - /* Reverse order */ - for (i = 0; 2*i + 1 < sn; i++) - { - unsigned char t = sp[i]; - sp[i] = sp[sn - i - 1]; - sp[sn - i - 1] = t; - } - - return sn; -} - -size_t -mpn_get_str (unsigned char *sp, int base, mp_ptr up, mp_size_t un) -{ - unsigned bits; - - assert (un > 0); - assert (up[un-1] > 0); - - bits = mpn_base_power_of_two_p (base); - if (bits) - return mpn_get_str_bits (sp, bits, up, un); - else - { - struct mpn_base_info info; - - mpn_get_base_info (&info, base); - return mpn_get_str_other (sp, base, &info, up, un); - } -} - -static mp_size_t -mpn_set_str_bits (mp_ptr rp, const unsigned char *sp, size_t sn, - unsigned bits) -{ - mp_size_t rn; - mp_limb_t limb; - unsigned shift; - - for (limb = 0, rn = 0, shift = 0; sn-- > 0; ) - { - limb |= (mp_limb_t) sp[sn] << shift; - shift += bits; - if (shift >= GMP_LIMB_BITS) - { - shift -= GMP_LIMB_BITS; - rp[rn++] = limb; - /* Next line is correct also if shift == 0, - bits == 8, and mp_limb_t == unsigned char. */ - limb = (unsigned int) sp[sn] >> (bits - shift); - } - } - if (limb != 0) - rp[rn++] = limb; - else - rn = mpn_normalized_size (rp, rn); - return rn; -} - -/* Result is usually normalized, except for all-zero input, in which - case a single zero limb is written at *RP, and 1 is returned. */ -static mp_size_t -mpn_set_str_other (mp_ptr rp, const unsigned char *sp, size_t sn, - mp_limb_t b, const struct mpn_base_info *info) -{ - mp_size_t rn; - mp_limb_t w; - unsigned k; - size_t j; - - assert (sn > 0); - - k = 1 + (sn - 1) % info->exp; - - j = 0; - w = sp[j++]; - while (--k != 0) - w = w * b + sp[j++]; - - rp[0] = w; - - for (rn = 1; j < sn;) - { - mp_limb_t cy; - - w = sp[j++]; - for (k = 1; k < info->exp; k++) - w = w * b + sp[j++]; - - cy = mpn_mul_1 (rp, rp, rn, info->bb); - cy += mpn_add_1 (rp, rp, rn, w); - if (cy > 0) - rp[rn++] = cy; - } - assert (j == sn); - - return rn; -} - -mp_size_t -mpn_set_str (mp_ptr rp, const unsigned char *sp, size_t sn, int base) -{ - unsigned bits; - - if (sn == 0) - return 0; - - bits = mpn_base_power_of_two_p (base); - if (bits) - return mpn_set_str_bits (rp, sp, sn, bits); - else - { - struct mpn_base_info info; - - mpn_get_base_info (&info, base); - return mpn_set_str_other (rp, sp, sn, base, &info); - } -} - - -/* MPZ interface */ -void -mpz_init (mpz_t r) -{ - static const mp_limb_t dummy_limb = GMP_LIMB_MAX & 0xc1a0; - - r->_mp_alloc = 0; - r->_mp_size = 0; - r->_mp_d = (mp_ptr) &dummy_limb; -} - -/* The utility of this function is a bit limited, since many functions - assigns the result variable using mpz_swap. */ -void -mpz_init2 (mpz_t r, mp_bitcnt_t bits) -{ - mp_size_t rn; - - bits -= (bits != 0); /* Round down, except if 0 */ - rn = 1 + bits / GMP_LIMB_BITS; - - r->_mp_alloc = rn; - r->_mp_size = 0; - r->_mp_d = gmp_alloc_limbs (rn); -} - -void -mpz_clear (mpz_t r) -{ - if (r->_mp_alloc) - gmp_free_limbs (r->_mp_d, r->_mp_alloc); -} - -static mp_ptr -mpz_realloc (mpz_t r, mp_size_t size) -{ - size = GMP_MAX (size, 1); - - if (r->_mp_alloc) - r->_mp_d = gmp_realloc_limbs (r->_mp_d, r->_mp_alloc, size); - else - r->_mp_d = gmp_alloc_limbs (size); - r->_mp_alloc = size; - - if (GMP_ABS (r->_mp_size) > size) - r->_mp_size = 0; - - return r->_mp_d; -} - -/* Realloc for an mpz_t WHAT if it has less than NEEDED limbs. */ -#define MPZ_REALLOC(z,n) ((n) > (z)->_mp_alloc \ - ? mpz_realloc(z,n) \ - : (z)->_mp_d) - -/* MPZ assignment and basic conversions. */ -void -mpz_set_si (mpz_t r, signed long int x) -{ - if (x >= 0) - mpz_set_ui (r, x); - else /* (x < 0) */ - if (GMP_LIMB_BITS < GMP_ULONG_BITS) - { - mpz_set_ui (r, GMP_NEG_CAST (unsigned long int, x)); - mpz_neg (r, r); - } - else - { - r->_mp_size = -1; - MPZ_REALLOC (r, 1)[0] = GMP_NEG_CAST (unsigned long int, x); - } -} - -void -mpz_set_ui (mpz_t r, unsigned long int x) -{ - if (x > 0) - { - r->_mp_size = 1; - MPZ_REALLOC (r, 1)[0] = x; - if (GMP_LIMB_BITS < GMP_ULONG_BITS) - { - int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS; - while (x >>= LOCAL_GMP_LIMB_BITS) - { - ++ r->_mp_size; - MPZ_REALLOC (r, r->_mp_size)[r->_mp_size - 1] = x; - } - } - } - else - r->_mp_size = 0; -} - -void -mpz_set (mpz_t r, const mpz_t x) -{ - /* Allow the NOP r == x */ - if (r != x) - { - mp_size_t n; - mp_ptr rp; - - n = GMP_ABS (x->_mp_size); - rp = MPZ_REALLOC (r, n); - - mpn_copyi (rp, x->_mp_d, n); - r->_mp_size = x->_mp_size; - } -} - -void -mpz_init_set_si (mpz_t r, signed long int x) -{ - mpz_init (r); - mpz_set_si (r, x); -} - -void -mpz_init_set_ui (mpz_t r, unsigned long int x) -{ - mpz_init (r); - mpz_set_ui (r, x); -} - -void -mpz_init_set (mpz_t r, const mpz_t x) -{ - mpz_init (r); - mpz_set (r, x); -} - -int -mpz_fits_slong_p (const mpz_t u) -{ - return mpz_cmp_si (u, LONG_MAX) <= 0 && mpz_cmp_si (u, LONG_MIN) >= 0; -} - -static int -mpn_absfits_ulong_p (mp_srcptr up, mp_size_t un) -{ - int ulongsize = GMP_ULONG_BITS / GMP_LIMB_BITS; - mp_limb_t ulongrem = 0; - - if (GMP_ULONG_BITS % GMP_LIMB_BITS != 0) - ulongrem = (mp_limb_t) (ULONG_MAX >> GMP_LIMB_BITS * ulongsize) + 1; - - return un <= ulongsize || (up[ulongsize] < ulongrem && un == ulongsize + 1); -} - -int -mpz_fits_ulong_p (const mpz_t u) -{ - mp_size_t us = u->_mp_size; - - return us >= 0 && mpn_absfits_ulong_p (u->_mp_d, us); -} - -int -mpz_fits_sint_p (const mpz_t u) -{ - return mpz_cmp_si (u, INT_MAX) <= 0 && mpz_cmp_si (u, INT_MIN) >= 0; -} - -int -mpz_fits_uint_p (const mpz_t u) -{ - return u->_mp_size >= 0 && mpz_cmpabs_ui (u, UINT_MAX) <= 0; -} - -int -mpz_fits_sshort_p (const mpz_t u) -{ - return mpz_cmp_si (u, SHRT_MAX) <= 0 && mpz_cmp_si (u, SHRT_MIN) >= 0; -} - -int -mpz_fits_ushort_p (const mpz_t u) -{ - return u->_mp_size >= 0 && mpz_cmpabs_ui (u, USHRT_MAX) <= 0; -} - -long int -mpz_get_si (const mpz_t u) -{ - unsigned long r = mpz_get_ui (u); - unsigned long c = -LONG_MAX - LONG_MIN; - - if (u->_mp_size < 0) - /* This expression is necessary to properly handle -LONG_MIN */ - return -(long) c - (long) ((r - c) & LONG_MAX); - else - return (long) (r & LONG_MAX); -} - -unsigned long int -mpz_get_ui (const mpz_t u) -{ - if (GMP_LIMB_BITS < GMP_ULONG_BITS) - { - int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS; - unsigned long r = 0; - mp_size_t n = GMP_ABS (u->_mp_size); - n = GMP_MIN (n, 1 + (mp_size_t) (GMP_ULONG_BITS - 1) / GMP_LIMB_BITS); - while (--n >= 0) - r = (r << LOCAL_GMP_LIMB_BITS) + u->_mp_d[n]; - return r; - } - - return u->_mp_size == 0 ? 0 : u->_mp_d[0]; -} - -size_t -mpz_size (const mpz_t u) -{ - return GMP_ABS (u->_mp_size); -} - -mp_limb_t -mpz_getlimbn (const mpz_t u, mp_size_t n) -{ - if (n >= 0 && n < GMP_ABS (u->_mp_size)) - return u->_mp_d[n]; - else - return 0; -} - -void -mpz_realloc2 (mpz_t x, mp_bitcnt_t n) -{ - mpz_realloc (x, 1 + (n - (n != 0)) / GMP_LIMB_BITS); -} - -mp_srcptr -mpz_limbs_read (mpz_srcptr x) -{ - return x->_mp_d; -} - -mp_ptr -mpz_limbs_modify (mpz_t x, mp_size_t n) -{ - assert (n > 0); - return MPZ_REALLOC (x, n); -} - -mp_ptr -mpz_limbs_write (mpz_t x, mp_size_t n) -{ - return mpz_limbs_modify (x, n); -} - -void -mpz_limbs_finish (mpz_t x, mp_size_t xs) -{ - mp_size_t xn; - xn = mpn_normalized_size (x->_mp_d, GMP_ABS (xs)); - x->_mp_size = xs < 0 ? -xn : xn; -} - -static mpz_srcptr -mpz_roinit_normal_n (mpz_t x, mp_srcptr xp, mp_size_t xs) -{ - x->_mp_alloc = 0; - x->_mp_d = (mp_ptr) xp; - x->_mp_size = xs; - return x; -} - -mpz_srcptr -mpz_roinit_n (mpz_t x, mp_srcptr xp, mp_size_t xs) -{ - mpz_roinit_normal_n (x, xp, xs); - mpz_limbs_finish (x, xs); - return x; -} - - -/* Conversions and comparison to double. */ -void -mpz_set_d (mpz_t r, double x) -{ - int sign; - mp_ptr rp; - mp_size_t rn, i; - double B; - double Bi; - mp_limb_t f; - - /* x != x is true when x is a NaN, and x == x * 0.5 is true when x is - zero or infinity. */ - if (x != x || x == x * 0.5) - { - r->_mp_size = 0; - return; - } - - sign = x < 0.0 ; - if (sign) - x = - x; - - if (x < 1.0) - { - r->_mp_size = 0; - return; - } - B = 4.0 * (double) (GMP_LIMB_HIGHBIT >> 1); - Bi = 1.0 / B; - for (rn = 1; x >= B; rn++) - x *= Bi; - - rp = MPZ_REALLOC (r, rn); - - f = (mp_limb_t) x; - x -= f; - assert (x < 1.0); - i = rn-1; - rp[i] = f; - while (--i >= 0) - { - x = B * x; - f = (mp_limb_t) x; - x -= f; - assert (x < 1.0); - rp[i] = f; - } - - r->_mp_size = sign ? - rn : rn; -} - -void -mpz_init_set_d (mpz_t r, double x) -{ - mpz_init (r); - mpz_set_d (r, x); -} - -double -mpz_get_d (const mpz_t u) -{ - int m; - mp_limb_t l; - mp_size_t un; - double x; - double B = 4.0 * (double) (GMP_LIMB_HIGHBIT >> 1); - - un = GMP_ABS (u->_mp_size); - - if (un == 0) - return 0.0; - - l = u->_mp_d[--un]; - gmp_clz (m, l); - m = m + GMP_DBL_MANT_BITS - GMP_LIMB_BITS; - if (m < 0) - l &= GMP_LIMB_MAX << -m; - - for (x = l; --un >= 0;) - { - x = B*x; - if (m > 0) { - l = u->_mp_d[un]; - m -= GMP_LIMB_BITS; - if (m < 0) - l &= GMP_LIMB_MAX << -m; - x += l; - } - } - - if (u->_mp_size < 0) - x = -x; - - return x; -} - -int -mpz_cmpabs_d (const mpz_t x, double d) -{ - mp_size_t xn; - double B, Bi; - mp_size_t i; - - xn = x->_mp_size; - d = GMP_ABS (d); - - if (xn != 0) - { - xn = GMP_ABS (xn); - - B = 4.0 * (double) (GMP_LIMB_HIGHBIT >> 1); - Bi = 1.0 / B; - - /* Scale d so it can be compared with the top limb. */ - for (i = 1; i < xn; i++) - d *= Bi; - - if (d >= B) - return -1; - - /* Compare floor(d) to top limb, subtract and cancel when equal. */ - for (i = xn; i-- > 0;) - { - mp_limb_t f, xl; - - f = (mp_limb_t) d; - xl = x->_mp_d[i]; - if (xl > f) - return 1; - else if (xl < f) - return -1; - d = B * (d - f); - } - } - return - (d > 0.0); -} - -int -mpz_cmp_d (const mpz_t x, double d) -{ - if (x->_mp_size < 0) - { - if (d >= 0.0) - return -1; - else - return -mpz_cmpabs_d (x, d); - } - else - { - if (d < 0.0) - return 1; - else - return mpz_cmpabs_d (x, d); - } -} - - -/* MPZ comparisons and the like. */ -int -mpz_sgn (const mpz_t u) -{ - return GMP_CMP (u->_mp_size, 0); -} - -int -mpz_cmp_si (const mpz_t u, long v) -{ - mp_size_t usize = u->_mp_size; - - if (v >= 0) - return mpz_cmp_ui (u, v); - else if (usize >= 0) - return 1; - else - return - mpz_cmpabs_ui (u, GMP_NEG_CAST (unsigned long int, v)); -} - -int -mpz_cmp_ui (const mpz_t u, unsigned long v) -{ - mp_size_t usize = u->_mp_size; - - if (usize < 0) - return -1; - else - return mpz_cmpabs_ui (u, v); -} - -int -mpz_cmp (const mpz_t a, const mpz_t b) -{ - mp_size_t asize = a->_mp_size; - mp_size_t bsize = b->_mp_size; - - if (asize != bsize) - return (asize < bsize) ? -1 : 1; - else if (asize >= 0) - return mpn_cmp (a->_mp_d, b->_mp_d, asize); - else - return mpn_cmp (b->_mp_d, a->_mp_d, -asize); -} - -int -mpz_cmpabs_ui (const mpz_t u, unsigned long v) -{ - mp_size_t un = GMP_ABS (u->_mp_size); - - if (! mpn_absfits_ulong_p (u->_mp_d, un)) - return 1; - else - { - unsigned long uu = mpz_get_ui (u); - return GMP_CMP(uu, v); - } -} - -int -mpz_cmpabs (const mpz_t u, const mpz_t v) -{ - return mpn_cmp4 (u->_mp_d, GMP_ABS (u->_mp_size), - v->_mp_d, GMP_ABS (v->_mp_size)); -} - -void -mpz_abs (mpz_t r, const mpz_t u) -{ - mpz_set (r, u); - r->_mp_size = GMP_ABS (r->_mp_size); -} - -void -mpz_neg (mpz_t r, const mpz_t u) -{ - mpz_set (r, u); - r->_mp_size = -r->_mp_size; -} - -void -mpz_swap (mpz_t u, mpz_t v) -{ - MP_SIZE_T_SWAP (u->_mp_alloc, v->_mp_alloc); - MPN_PTR_SWAP (u->_mp_d, u->_mp_size, v->_mp_d, v->_mp_size); -} - - -/* MPZ addition and subtraction */ - - -void -mpz_add_ui (mpz_t r, const mpz_t a, unsigned long b) -{ - mpz_t bb; - mpz_init_set_ui (bb, b); - mpz_add (r, a, bb); - mpz_clear (bb); -} - -void -mpz_sub_ui (mpz_t r, const mpz_t a, unsigned long b) -{ - mpz_ui_sub (r, b, a); - mpz_neg (r, r); -} - -void -mpz_ui_sub (mpz_t r, unsigned long a, const mpz_t b) -{ - mpz_neg (r, b); - mpz_add_ui (r, r, a); -} - -static mp_size_t -mpz_abs_add (mpz_t r, const mpz_t a, const mpz_t b) -{ - mp_size_t an = GMP_ABS (a->_mp_size); - mp_size_t bn = GMP_ABS (b->_mp_size); - mp_ptr rp; - mp_limb_t cy; - - if (an < bn) - { - MPZ_SRCPTR_SWAP (a, b); - MP_SIZE_T_SWAP (an, bn); - } - - rp = MPZ_REALLOC (r, an + 1); - cy = mpn_add (rp, a->_mp_d, an, b->_mp_d, bn); - - rp[an] = cy; - - return an + cy; -} - -static mp_size_t -mpz_abs_sub (mpz_t r, const mpz_t a, const mpz_t b) -{ - mp_size_t an = GMP_ABS (a->_mp_size); - mp_size_t bn = GMP_ABS (b->_mp_size); - int cmp; - mp_ptr rp; - - cmp = mpn_cmp4 (a->_mp_d, an, b->_mp_d, bn); - if (cmp > 0) - { - rp = MPZ_REALLOC (r, an); - gmp_assert_nocarry (mpn_sub (rp, a->_mp_d, an, b->_mp_d, bn)); - return mpn_normalized_size (rp, an); - } - else if (cmp < 0) - { - rp = MPZ_REALLOC (r, bn); - gmp_assert_nocarry (mpn_sub (rp, b->_mp_d, bn, a->_mp_d, an)); - return -mpn_normalized_size (rp, bn); - } - else - return 0; -} - -void -mpz_add (mpz_t r, const mpz_t a, const mpz_t b) -{ - mp_size_t rn; - - if ( (a->_mp_size ^ b->_mp_size) >= 0) - rn = mpz_abs_add (r, a, b); - else - rn = mpz_abs_sub (r, a, b); - - r->_mp_size = a->_mp_size >= 0 ? rn : - rn; -} - -void -mpz_sub (mpz_t r, const mpz_t a, const mpz_t b) -{ - mp_size_t rn; - - if ( (a->_mp_size ^ b->_mp_size) >= 0) - rn = mpz_abs_sub (r, a, b); - else - rn = mpz_abs_add (r, a, b); - - r->_mp_size = a->_mp_size >= 0 ? rn : - rn; -} - - -/* MPZ multiplication */ -void -mpz_mul_si (mpz_t r, const mpz_t u, long int v) -{ - if (v < 0) - { - mpz_mul_ui (r, u, GMP_NEG_CAST (unsigned long int, v)); - mpz_neg (r, r); - } - else - mpz_mul_ui (r, u, v); -} - -void -mpz_mul_ui (mpz_t r, const mpz_t u, unsigned long int v) -{ - mpz_t vv; - mpz_init_set_ui (vv, v); - mpz_mul (r, u, vv); - mpz_clear (vv); - return; -} - -void -mpz_mul (mpz_t r, const mpz_t u, const mpz_t v) -{ - int sign; - mp_size_t un, vn, rn; - mpz_t t; - mp_ptr tp; - - un = u->_mp_size; - vn = v->_mp_size; - - if (un == 0 || vn == 0) - { - r->_mp_size = 0; - return; - } - - sign = (un ^ vn) < 0; - - un = GMP_ABS (un); - vn = GMP_ABS (vn); - - mpz_init2 (t, (un + vn) * GMP_LIMB_BITS); - - tp = t->_mp_d; - if (un >= vn) - mpn_mul (tp, u->_mp_d, un, v->_mp_d, vn); - else - mpn_mul (tp, v->_mp_d, vn, u->_mp_d, un); - - rn = un + vn; - rn -= tp[rn-1] == 0; - - t->_mp_size = sign ? - rn : rn; - mpz_swap (r, t); - mpz_clear (t); -} - -void -mpz_mul_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bits) -{ - mp_size_t un, rn; - mp_size_t limbs; - unsigned shift; - mp_ptr rp; - - un = GMP_ABS (u->_mp_size); - if (un == 0) - { - r->_mp_size = 0; - return; - } - - limbs = bits / GMP_LIMB_BITS; - shift = bits % GMP_LIMB_BITS; - - rn = un + limbs + (shift > 0); - rp = MPZ_REALLOC (r, rn); - if (shift > 0) - { - mp_limb_t cy = mpn_lshift (rp + limbs, u->_mp_d, un, shift); - rp[rn-1] = cy; - rn -= (cy == 0); - } - else - mpn_copyd (rp + limbs, u->_mp_d, un); - - mpn_zero (rp, limbs); - - r->_mp_size = (u->_mp_size < 0) ? - rn : rn; -} - -void -mpz_addmul_ui (mpz_t r, const mpz_t u, unsigned long int v) -{ - mpz_t t; - mpz_init_set_ui (t, v); - mpz_mul (t, u, t); - mpz_add (r, r, t); - mpz_clear (t); -} - -void -mpz_submul_ui (mpz_t r, const mpz_t u, unsigned long int v) -{ - mpz_t t; - mpz_init_set_ui (t, v); - mpz_mul (t, u, t); - mpz_sub (r, r, t); - mpz_clear (t); -} - -void -mpz_addmul (mpz_t r, const mpz_t u, const mpz_t v) -{ - mpz_t t; - mpz_init (t); - mpz_mul (t, u, v); - mpz_add (r, r, t); - mpz_clear (t); -} - -void -mpz_submul (mpz_t r, const mpz_t u, const mpz_t v) -{ - mpz_t t; - mpz_init (t); - mpz_mul (t, u, v); - mpz_sub (r, r, t); - mpz_clear (t); -} - - -/* MPZ division */ -enum mpz_div_round_mode { GMP_DIV_FLOOR, GMP_DIV_CEIL, GMP_DIV_TRUNC }; - -/* Allows q or r to be zero. Returns 1 iff remainder is non-zero. */ -static int -mpz_div_qr (mpz_t q, mpz_t r, - const mpz_t n, const mpz_t d, enum mpz_div_round_mode mode) -{ - mp_size_t ns, ds, nn, dn, qs; - ns = n->_mp_size; - ds = d->_mp_size; - - if (ds == 0) - gmp_die("mpz_div_qr: Divide by zero."); - - if (ns == 0) - { - if (q) - q->_mp_size = 0; - if (r) - r->_mp_size = 0; - return 0; - } - - nn = GMP_ABS (ns); - dn = GMP_ABS (ds); - - qs = ds ^ ns; - - if (nn < dn) - { - if (mode == GMP_DIV_CEIL && qs >= 0) - { - /* q = 1, r = n - d */ - if (r) - mpz_sub (r, n, d); - if (q) - mpz_set_ui (q, 1); - } - else if (mode == GMP_DIV_FLOOR && qs < 0) - { - /* q = -1, r = n + d */ - if (r) - mpz_add (r, n, d); - if (q) - mpz_set_si (q, -1); - } - else - { - /* q = 0, r = d */ - if (r) - mpz_set (r, n); - if (q) - q->_mp_size = 0; - } - return 1; - } - else - { - mp_ptr np, qp; - mp_size_t qn, rn; - mpz_t tq, tr; - - mpz_init_set (tr, n); - np = tr->_mp_d; - - qn = nn - dn + 1; - - if (q) - { - mpz_init2 (tq, qn * GMP_LIMB_BITS); - qp = tq->_mp_d; - } - else - qp = NULL; - - mpn_div_qr (qp, np, nn, d->_mp_d, dn); - - if (qp) - { - qn -= (qp[qn-1] == 0); - - tq->_mp_size = qs < 0 ? -qn : qn; - } - rn = mpn_normalized_size (np, dn); - tr->_mp_size = ns < 0 ? - rn : rn; - - if (mode == GMP_DIV_FLOOR && qs < 0 && rn != 0) - { - if (q) - mpz_sub_ui (tq, tq, 1); - if (r) - mpz_add (tr, tr, d); - } - else if (mode == GMP_DIV_CEIL && qs >= 0 && rn != 0) - { - if (q) - mpz_add_ui (tq, tq, 1); - if (r) - mpz_sub (tr, tr, d); - } - - if (q) - { - mpz_swap (tq, q); - mpz_clear (tq); - } - if (r) - mpz_swap (tr, r); - - mpz_clear (tr); - - return rn != 0; - } -} - -void -mpz_cdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d) -{ - mpz_div_qr (q, r, n, d, GMP_DIV_CEIL); -} - -void -mpz_fdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d) -{ - mpz_div_qr (q, r, n, d, GMP_DIV_FLOOR); -} - -void -mpz_tdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d) -{ - mpz_div_qr (q, r, n, d, GMP_DIV_TRUNC); -} - -void -mpz_cdiv_q (mpz_t q, const mpz_t n, const mpz_t d) -{ - mpz_div_qr (q, NULL, n, d, GMP_DIV_CEIL); -} - -void -mpz_fdiv_q (mpz_t q, const mpz_t n, const mpz_t d) -{ - mpz_div_qr (q, NULL, n, d, GMP_DIV_FLOOR); -} - -void -mpz_tdiv_q (mpz_t q, const mpz_t n, const mpz_t d) -{ - mpz_div_qr (q, NULL, n, d, GMP_DIV_TRUNC); -} - -void -mpz_cdiv_r (mpz_t r, const mpz_t n, const mpz_t d) -{ - mpz_div_qr (NULL, r, n, d, GMP_DIV_CEIL); -} - -void -mpz_fdiv_r (mpz_t r, const mpz_t n, const mpz_t d) -{ - mpz_div_qr (NULL, r, n, d, GMP_DIV_FLOOR); -} - -void -mpz_tdiv_r (mpz_t r, const mpz_t n, const mpz_t d) -{ - mpz_div_qr (NULL, r, n, d, GMP_DIV_TRUNC); -} - -void -mpz_mod (mpz_t r, const mpz_t n, const mpz_t d) -{ - mpz_div_qr (NULL, r, n, d, d->_mp_size >= 0 ? GMP_DIV_FLOOR : GMP_DIV_CEIL); -} - -static void -mpz_div_q_2exp (mpz_t q, const mpz_t u, mp_bitcnt_t bit_index, - enum mpz_div_round_mode mode) -{ - mp_size_t un, qn; - mp_size_t limb_cnt; - mp_ptr qp; - int adjust; - - un = u->_mp_size; - if (un == 0) - { - q->_mp_size = 0; - return; - } - limb_cnt = bit_index / GMP_LIMB_BITS; - qn = GMP_ABS (un) - limb_cnt; - bit_index %= GMP_LIMB_BITS; - - if (mode == ((un > 0) ? GMP_DIV_CEIL : GMP_DIV_FLOOR)) /* un != 0 here. */ - /* Note: Below, the final indexing at limb_cnt is valid because at - that point we have qn > 0. */ - adjust = (qn <= 0 - || !mpn_zero_p (u->_mp_d, limb_cnt) - || (u->_mp_d[limb_cnt] - & (((mp_limb_t) 1 << bit_index) - 1))); - else - adjust = 0; - - if (qn <= 0) - qn = 0; - else - { - qp = MPZ_REALLOC (q, qn); - - if (bit_index != 0) - { - mpn_rshift (qp, u->_mp_d + limb_cnt, qn, bit_index); - qn -= qp[qn - 1] == 0; - } - else - { - mpn_copyi (qp, u->_mp_d + limb_cnt, qn); - } - } - - q->_mp_size = qn; - - if (adjust) - mpz_add_ui (q, q, 1); - if (un < 0) - mpz_neg (q, q); -} - -static void -mpz_div_r_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bit_index, - enum mpz_div_round_mode mode) -{ - mp_size_t us, un, rn; - mp_ptr rp; - mp_limb_t mask; - - us = u->_mp_size; - if (us == 0 || bit_index == 0) - { - r->_mp_size = 0; - return; - } - rn = (bit_index + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS; - assert (rn > 0); - - rp = MPZ_REALLOC (r, rn); - un = GMP_ABS (us); - - mask = GMP_LIMB_MAX >> (rn * GMP_LIMB_BITS - bit_index); - - if (rn > un) - { - /* Quotient (with truncation) is zero, and remainder is - non-zero */ - if (mode == ((us > 0) ? GMP_DIV_CEIL : GMP_DIV_FLOOR)) /* us != 0 here. */ - { - /* Have to negate and sign extend. */ - mp_size_t i; - - gmp_assert_nocarry (! mpn_neg (rp, u->_mp_d, un)); - for (i = un; i < rn - 1; i++) - rp[i] = GMP_LIMB_MAX; - - rp[rn-1] = mask; - us = -us; - } - else - { - /* Just copy */ - if (r != u) - mpn_copyi (rp, u->_mp_d, un); - - rn = un; - } - } - else - { - if (r != u) - mpn_copyi (rp, u->_mp_d, rn - 1); - - rp[rn-1] = u->_mp_d[rn-1] & mask; - - if (mode == ((us > 0) ? GMP_DIV_CEIL : GMP_DIV_FLOOR)) /* us != 0 here. */ - { - /* If r != 0, compute 2^{bit_count} - r. */ - mpn_neg (rp, rp, rn); - - rp[rn-1] &= mask; - - /* us is not used for anything else, so we can modify it - here to indicate flipped sign. */ - us = -us; - } - } - rn = mpn_normalized_size (rp, rn); - r->_mp_size = us < 0 ? -rn : rn; -} - -void -mpz_cdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt) -{ - mpz_div_q_2exp (r, u, cnt, GMP_DIV_CEIL); -} - -void -mpz_fdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt) -{ - mpz_div_q_2exp (r, u, cnt, GMP_DIV_FLOOR); -} - -void -mpz_tdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt) -{ - mpz_div_q_2exp (r, u, cnt, GMP_DIV_TRUNC); -} - -void -mpz_cdiv_r_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt) -{ - mpz_div_r_2exp (r, u, cnt, GMP_DIV_CEIL); -} - -void -mpz_fdiv_r_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt) -{ - mpz_div_r_2exp (r, u, cnt, GMP_DIV_FLOOR); -} - -void -mpz_tdiv_r_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt) -{ - mpz_div_r_2exp (r, u, cnt, GMP_DIV_TRUNC); -} - -void -mpz_divexact (mpz_t q, const mpz_t n, const mpz_t d) -{ - gmp_assert_nocarry (mpz_div_qr (q, NULL, n, d, GMP_DIV_TRUNC)); -} - -int -mpz_divisible_p (const mpz_t n, const mpz_t d) -{ - return mpz_div_qr (NULL, NULL, n, d, GMP_DIV_TRUNC) == 0; -} - -int -mpz_congruent_p (const mpz_t a, const mpz_t b, const mpz_t m) -{ - mpz_t t; - int res; - - /* a == b (mod 0) iff a == b */ - if (mpz_sgn (m) == 0) - return (mpz_cmp (a, b) == 0); - - mpz_init (t); - mpz_sub (t, a, b); - res = mpz_divisible_p (t, m); - mpz_clear (t); - - return res; -} - -static unsigned long -mpz_div_qr_ui (mpz_t q, mpz_t r, - const mpz_t n, unsigned long d, enum mpz_div_round_mode mode) -{ - unsigned long ret; - mpz_t rr, dd; - - mpz_init (rr); - mpz_init_set_ui (dd, d); - mpz_div_qr (q, rr, n, dd, mode); - mpz_clear (dd); - ret = mpz_get_ui (rr); - - if (r) - mpz_swap (r, rr); - mpz_clear (rr); - - return ret; -} - -unsigned long -mpz_cdiv_qr_ui (mpz_t q, mpz_t r, const mpz_t n, unsigned long d) -{ - return mpz_div_qr_ui (q, r, n, d, GMP_DIV_CEIL); -} - -unsigned long -mpz_fdiv_qr_ui (mpz_t q, mpz_t r, const mpz_t n, unsigned long d) -{ - return mpz_div_qr_ui (q, r, n, d, GMP_DIV_FLOOR); -} - -unsigned long -mpz_tdiv_qr_ui (mpz_t q, mpz_t r, const mpz_t n, unsigned long d) -{ - return mpz_div_qr_ui (q, r, n, d, GMP_DIV_TRUNC); -} - -unsigned long -mpz_cdiv_q_ui (mpz_t q, const mpz_t n, unsigned long d) -{ - return mpz_div_qr_ui (q, NULL, n, d, GMP_DIV_CEIL); -} - -unsigned long -mpz_fdiv_q_ui (mpz_t q, const mpz_t n, unsigned long d) -{ - return mpz_div_qr_ui (q, NULL, n, d, GMP_DIV_FLOOR); -} - -unsigned long -mpz_tdiv_q_ui (mpz_t q, const mpz_t n, unsigned long d) -{ - return mpz_div_qr_ui (q, NULL, n, d, GMP_DIV_TRUNC); -} - -unsigned long -mpz_cdiv_r_ui (mpz_t r, const mpz_t n, unsigned long d) -{ - return mpz_div_qr_ui (NULL, r, n, d, GMP_DIV_CEIL); -} -unsigned long -mpz_fdiv_r_ui (mpz_t r, const mpz_t n, unsigned long d) -{ - return mpz_div_qr_ui (NULL, r, n, d, GMP_DIV_FLOOR); -} -unsigned long -mpz_tdiv_r_ui (mpz_t r, const mpz_t n, unsigned long d) -{ - return mpz_div_qr_ui (NULL, r, n, d, GMP_DIV_TRUNC); -} - -unsigned long -mpz_cdiv_ui (const mpz_t n, unsigned long d) -{ - return mpz_div_qr_ui (NULL, NULL, n, d, GMP_DIV_CEIL); -} - -unsigned long -mpz_fdiv_ui (const mpz_t n, unsigned long d) -{ - return mpz_div_qr_ui (NULL, NULL, n, d, GMP_DIV_FLOOR); -} - -unsigned long -mpz_tdiv_ui (const mpz_t n, unsigned long d) -{ - return mpz_div_qr_ui (NULL, NULL, n, d, GMP_DIV_TRUNC); -} - -unsigned long -mpz_mod_ui (mpz_t r, const mpz_t n, unsigned long d) -{ - return mpz_div_qr_ui (NULL, r, n, d, GMP_DIV_FLOOR); -} - -void -mpz_divexact_ui (mpz_t q, const mpz_t n, unsigned long d) -{ - gmp_assert_nocarry (mpz_div_qr_ui (q, NULL, n, d, GMP_DIV_TRUNC)); -} - -int -mpz_divisible_ui_p (const mpz_t n, unsigned long d) -{ - return mpz_div_qr_ui (NULL, NULL, n, d, GMP_DIV_TRUNC) == 0; -} - - -/* GCD */ -static mp_limb_t -mpn_gcd_11 (mp_limb_t u, mp_limb_t v) -{ - unsigned shift; - - assert ( (u | v) > 0); - - if (u == 0) - return v; - else if (v == 0) - return u; - - gmp_ctz (shift, u | v); - - u >>= shift; - v >>= shift; - - if ( (u & 1) == 0) - MP_LIMB_T_SWAP (u, v); - - while ( (v & 1) == 0) - v >>= 1; - - while (u != v) - { - if (u > v) - { - u -= v; - do - u >>= 1; - while ( (u & 1) == 0); - } - else - { - v -= u; - do - v >>= 1; - while ( (v & 1) == 0); - } - } - return u << shift; -} - -unsigned long -mpz_gcd_ui (mpz_t g, const mpz_t u, unsigned long v) -{ - mpz_t t; - mpz_init_set_ui(t, v); - mpz_gcd (t, u, t); - if (v > 0) - v = mpz_get_ui (t); - - if (g) - mpz_swap (t, g); - - mpz_clear (t); - - return v; -} - -static mp_bitcnt_t -mpz_make_odd (mpz_t r) -{ - mp_bitcnt_t shift; - - assert (r->_mp_size > 0); - /* Count trailing zeros, equivalent to mpn_scan1, because we know that there is a 1 */ - shift = mpn_scan1 (r->_mp_d, 0); - mpz_tdiv_q_2exp (r, r, shift); - - return shift; -} - -void -mpz_gcd (mpz_t g, const mpz_t u, const mpz_t v) -{ - mpz_t tu, tv; - mp_bitcnt_t uz, vz, gz; - - if (u->_mp_size == 0) - { - mpz_abs (g, v); - return; - } - if (v->_mp_size == 0) - { - mpz_abs (g, u); - return; - } - - mpz_init (tu); - mpz_init (tv); - - mpz_abs (tu, u); - uz = mpz_make_odd (tu); - mpz_abs (tv, v); - vz = mpz_make_odd (tv); - gz = GMP_MIN (uz, vz); - - if (tu->_mp_size < tv->_mp_size) - mpz_swap (tu, tv); - - mpz_tdiv_r (tu, tu, tv); - if (tu->_mp_size == 0) - { - mpz_swap (g, tv); - } - else - for (;;) - { - int c; - - mpz_make_odd (tu); - c = mpz_cmp (tu, tv); - if (c == 0) - { - mpz_swap (g, tu); - break; - } - if (c < 0) - mpz_swap (tu, tv); - - if (tv->_mp_size == 1) - { - mp_limb_t *gp; - - mpz_tdiv_r (tu, tu, tv); - gp = MPZ_REALLOC (g, 1); /* gp = mpz_limbs_modify (g, 1); */ - *gp = mpn_gcd_11 (tu->_mp_d[0], tv->_mp_d[0]); - - g->_mp_size = *gp != 0; /* mpz_limbs_finish (g, 1); */ - break; - } - mpz_sub (tu, tu, tv); - } - mpz_clear (tu); - mpz_clear (tv); - mpz_mul_2exp (g, g, gz); -} - -void -mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v) -{ - mpz_t tu, tv, s0, s1, t0, t1; - mp_bitcnt_t uz, vz, gz; - mp_bitcnt_t power; - - if (u->_mp_size == 0) - { - /* g = 0 u + sgn(v) v */ - signed long sign = mpz_sgn (v); - mpz_abs (g, v); - if (s) - s->_mp_size = 0; - if (t) - mpz_set_si (t, sign); - return; - } - - if (v->_mp_size == 0) - { - /* g = sgn(u) u + 0 v */ - signed long sign = mpz_sgn (u); - mpz_abs (g, u); - if (s) - mpz_set_si (s, sign); - if (t) - t->_mp_size = 0; - return; - } - - mpz_init (tu); - mpz_init (tv); - mpz_init (s0); - mpz_init (s1); - mpz_init (t0); - mpz_init (t1); - - mpz_abs (tu, u); - uz = mpz_make_odd (tu); - mpz_abs (tv, v); - vz = mpz_make_odd (tv); - gz = GMP_MIN (uz, vz); - - uz -= gz; - vz -= gz; - - /* Cofactors corresponding to odd gcd. gz handled later. */ - if (tu->_mp_size < tv->_mp_size) - { - mpz_swap (tu, tv); - MPZ_SRCPTR_SWAP (u, v); - MPZ_PTR_SWAP (s, t); - MP_BITCNT_T_SWAP (uz, vz); - } - - /* Maintain - * - * u = t0 tu + t1 tv - * v = s0 tu + s1 tv - * - * where u and v denote the inputs with common factors of two - * eliminated, and det (s0, t0; s1, t1) = 2^p. Then - * - * 2^p tu = s1 u - t1 v - * 2^p tv = -s0 u + t0 v - */ - - /* After initial division, tu = q tv + tu', we have - * - * u = 2^uz (tu' + q tv) - * v = 2^vz tv - * - * or - * - * t0 = 2^uz, t1 = 2^uz q - * s0 = 0, s1 = 2^vz - */ - - mpz_tdiv_qr (t1, tu, tu, tv); - mpz_mul_2exp (t1, t1, uz); - - mpz_setbit (s1, vz); - power = uz + vz; - - if (tu->_mp_size > 0) - { - mp_bitcnt_t shift; - shift = mpz_make_odd (tu); - mpz_setbit (t0, uz + shift); - power += shift; - - for (;;) - { - int c; - c = mpz_cmp (tu, tv); - if (c == 0) - break; - - if (c < 0) - { - /* tv = tv' + tu - * - * u = t0 tu + t1 (tv' + tu) = (t0 + t1) tu + t1 tv' - * v = s0 tu + s1 (tv' + tu) = (s0 + s1) tu + s1 tv' */ - - mpz_sub (tv, tv, tu); - mpz_add (t0, t0, t1); - mpz_add (s0, s0, s1); - - shift = mpz_make_odd (tv); - mpz_mul_2exp (t1, t1, shift); - mpz_mul_2exp (s1, s1, shift); - } - else - { - mpz_sub (tu, tu, tv); - mpz_add (t1, t0, t1); - mpz_add (s1, s0, s1); - - shift = mpz_make_odd (tu); - mpz_mul_2exp (t0, t0, shift); - mpz_mul_2exp (s0, s0, shift); - } - power += shift; - } - } - else - mpz_setbit (t0, uz); - - /* Now tv = odd part of gcd, and -s0 and t0 are corresponding - cofactors. */ - - mpz_mul_2exp (tv, tv, gz); - mpz_neg (s0, s0); - - /* 2^p g = s0 u + t0 v. Eliminate one factor of two at a time. To - adjust cofactors, we need u / g and v / g */ - - mpz_divexact (s1, v, tv); - mpz_abs (s1, s1); - mpz_divexact (t1, u, tv); - mpz_abs (t1, t1); - - while (power-- > 0) - { - /* s0 u + t0 v = (s0 - v/g) u - (t0 + u/g) v */ - if (mpz_odd_p (s0) || mpz_odd_p (t0)) - { - mpz_sub (s0, s0, s1); - mpz_add (t0, t0, t1); - } - assert (mpz_even_p (t0) && mpz_even_p (s0)); - mpz_tdiv_q_2exp (s0, s0, 1); - mpz_tdiv_q_2exp (t0, t0, 1); - } - - /* Arrange so that |s| < |u| / 2g */ - mpz_add (s1, s0, s1); - if (mpz_cmpabs (s0, s1) > 0) - { - mpz_swap (s0, s1); - mpz_sub (t0, t0, t1); - } - if (u->_mp_size < 0) - mpz_neg (s0, s0); - if (v->_mp_size < 0) - mpz_neg (t0, t0); - - mpz_swap (g, tv); - if (s) - mpz_swap (s, s0); - if (t) - mpz_swap (t, t0); - - mpz_clear (tu); - mpz_clear (tv); - mpz_clear (s0); - mpz_clear (s1); - mpz_clear (t0); - mpz_clear (t1); -} - -void -mpz_lcm (mpz_t r, const mpz_t u, const mpz_t v) -{ - mpz_t g; - - if (u->_mp_size == 0 || v->_mp_size == 0) - { - r->_mp_size = 0; - return; - } - - mpz_init (g); - - mpz_gcd (g, u, v); - mpz_divexact (g, u, g); - mpz_mul (r, g, v); - - mpz_clear (g); - mpz_abs (r, r); -} - -void -mpz_lcm_ui (mpz_t r, const mpz_t u, unsigned long v) -{ - if (v == 0 || u->_mp_size == 0) - { - r->_mp_size = 0; - return; - } - - v /= mpz_gcd_ui (NULL, u, v); - mpz_mul_ui (r, u, v); - - mpz_abs (r, r); -} - -int -mpz_invert (mpz_t r, const mpz_t u, const mpz_t m) -{ - mpz_t g, tr; - int invertible; - - if (u->_mp_size == 0 || mpz_cmpabs_ui (m, 1) <= 0) - return 0; - - mpz_init (g); - mpz_init (tr); - - mpz_gcdext (g, tr, NULL, u, m); - invertible = (mpz_cmp_ui (g, 1) == 0); - - if (invertible) - { - if (tr->_mp_size < 0) - { - if (m->_mp_size >= 0) - mpz_add (tr, tr, m); - else - mpz_sub (tr, tr, m); - } - mpz_swap (r, tr); - } - - mpz_clear (g); - mpz_clear (tr); - return invertible; -} - - -/* Higher level operations (sqrt, pow and root) */ - -void -mpz_pow_ui (mpz_t r, const mpz_t b, unsigned long e) -{ - unsigned long bit; - mpz_t tr; - mpz_init_set_ui (tr, 1); - - bit = GMP_ULONG_HIGHBIT; - do - { - mpz_mul (tr, tr, tr); - if (e & bit) - mpz_mul (tr, tr, b); - bit >>= 1; - } - while (bit > 0); - - mpz_swap (r, tr); - mpz_clear (tr); -} - -void -mpz_ui_pow_ui (mpz_t r, unsigned long blimb, unsigned long e) -{ - mpz_t b; - - mpz_init_set_ui (b, blimb); - mpz_pow_ui (r, b, e); - mpz_clear (b); -} - -void -mpz_powm (mpz_t r, const mpz_t b, const mpz_t e, const mpz_t m) -{ - mpz_t tr; - mpz_t base; - mp_size_t en, mn; - mp_srcptr mp; - struct gmp_div_inverse minv; - unsigned shift; - mp_ptr tp = NULL; - - en = GMP_ABS (e->_mp_size); - mn = GMP_ABS (m->_mp_size); - if (mn == 0) - gmp_die ("mpz_powm: Zero modulo."); - - if (en == 0) - { - mpz_set_ui (r, mpz_cmpabs_ui (m, 1)); - return; - } - - mp = m->_mp_d; - mpn_div_qr_invert (&minv, mp, mn); - shift = minv.shift; - - if (shift > 0) - { - /* To avoid shifts, we do all our reductions, except the final - one, using a *normalized* m. */ - minv.shift = 0; - - tp = gmp_alloc_limbs (mn); - gmp_assert_nocarry (mpn_lshift (tp, mp, mn, shift)); - mp = tp; - } - - mpz_init (base); - - if (e->_mp_size < 0) - { - if (!mpz_invert (base, b, m)) - gmp_die ("mpz_powm: Negative exponent and non-invertible base."); - } - else - { - mp_size_t bn; - mpz_abs (base, b); - - bn = base->_mp_size; - if (bn >= mn) - { - mpn_div_qr_preinv (NULL, base->_mp_d, base->_mp_size, mp, mn, &minv); - bn = mn; - } - - /* We have reduced the absolute value. Now take care of the - sign. Note that we get zero represented non-canonically as - m. */ - if (b->_mp_size < 0) - { - mp_ptr bp = MPZ_REALLOC (base, mn); - gmp_assert_nocarry (mpn_sub (bp, mp, mn, bp, bn)); - bn = mn; - } - base->_mp_size = mpn_normalized_size (base->_mp_d, bn); - } - mpz_init_set_ui (tr, 1); - - while (--en >= 0) - { - mp_limb_t w = e->_mp_d[en]; - mp_limb_t bit; - - bit = GMP_LIMB_HIGHBIT; - do - { - mpz_mul (tr, tr, tr); - if (w & bit) - mpz_mul (tr, tr, base); - if (tr->_mp_size > mn) - { - mpn_div_qr_preinv (NULL, tr->_mp_d, tr->_mp_size, mp, mn, &minv); - tr->_mp_size = mpn_normalized_size (tr->_mp_d, mn); - } - bit >>= 1; - } - while (bit > 0); - } - - /* Final reduction */ - if (tr->_mp_size >= mn) - { - minv.shift = shift; - mpn_div_qr_preinv (NULL, tr->_mp_d, tr->_mp_size, mp, mn, &minv); - tr->_mp_size = mpn_normalized_size (tr->_mp_d, mn); - } - if (tp) - gmp_free_limbs (tp, mn); - - mpz_swap (r, tr); - mpz_clear (tr); - mpz_clear (base); -} - -void -mpz_powm_ui (mpz_t r, const mpz_t b, unsigned long elimb, const mpz_t m) -{ - mpz_t e; - - mpz_init_set_ui (e, elimb); - mpz_powm (r, b, e, m); - mpz_clear (e); -} - -/* x=trunc(y^(1/z)), r=y-x^z */ -void -mpz_rootrem (mpz_t x, mpz_t r, const mpz_t y, unsigned long z) -{ - int sgn; - mp_bitcnt_t bc; - mpz_t t, u; - - sgn = y->_mp_size < 0; - if ((~z & sgn) != 0) - gmp_die ("mpz_rootrem: Negative argument, with even root."); - if (z == 0) - gmp_die ("mpz_rootrem: Zeroth root."); - - if (mpz_cmpabs_ui (y, 1) <= 0) { - if (x) - mpz_set (x, y); - if (r) - r->_mp_size = 0; - return; - } - - mpz_init (u); - mpz_init (t); - bc = (mpz_sizeinbase (y, 2) - 1) / z + 1; - mpz_setbit (t, bc); - - if (z == 2) /* simplify sqrt loop: z-1 == 1 */ - do { - mpz_swap (u, t); /* u = x */ - mpz_tdiv_q (t, y, u); /* t = y/x */ - mpz_add (t, t, u); /* t = y/x + x */ - mpz_tdiv_q_2exp (t, t, 1); /* x'= (y/x + x)/2 */ - } while (mpz_cmpabs (t, u) < 0); /* |x'| < |x| */ - else /* z != 2 */ { - mpz_t v; - - mpz_init (v); - if (sgn) - mpz_neg (t, t); - - do { - mpz_swap (u, t); /* u = x */ - mpz_pow_ui (t, u, z - 1); /* t = x^(z-1) */ - mpz_tdiv_q (t, y, t); /* t = y/x^(z-1) */ - mpz_mul_ui (v, u, z - 1); /* v = x*(z-1) */ - mpz_add (t, t, v); /* t = y/x^(z-1) + x*(z-1) */ - mpz_tdiv_q_ui (t, t, z); /* x'=(y/x^(z-1) + x*(z-1))/z */ - } while (mpz_cmpabs (t, u) < 0); /* |x'| < |x| */ - - mpz_clear (v); - } - - if (r) { - mpz_pow_ui (t, u, z); - mpz_sub (r, y, t); - } - if (x) - mpz_swap (x, u); - mpz_clear (u); - mpz_clear (t); -} - -int -mpz_root (mpz_t x, const mpz_t y, unsigned long z) -{ - int res; - mpz_t r; - - mpz_init (r); - mpz_rootrem (x, r, y, z); - res = r->_mp_size == 0; - mpz_clear (r); - - return res; -} - -/* Compute s = floor(sqrt(u)) and r = u - s^2. Allows r == NULL */ -void -mpz_sqrtrem (mpz_t s, mpz_t r, const mpz_t u) -{ - mpz_rootrem (s, r, u, 2); -} - -void -mpz_sqrt (mpz_t s, const mpz_t u) -{ - mpz_rootrem (s, NULL, u, 2); -} - -int -mpz_perfect_square_p (const mpz_t u) -{ - if (u->_mp_size <= 0) - return (u->_mp_size == 0); - else - return mpz_root (NULL, u, 2); -} - -int -mpn_perfect_square_p (mp_srcptr p, mp_size_t n) -{ - mpz_t t; - - assert (n > 0); - assert (p [n-1] != 0); - return mpz_root (NULL, mpz_roinit_normal_n (t, p, n), 2); -} - -mp_size_t -mpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr p, mp_size_t n) -{ - mpz_t s, r, u; - mp_size_t res; - - assert (n > 0); - assert (p [n-1] != 0); - - mpz_init (r); - mpz_init (s); - mpz_rootrem (s, r, mpz_roinit_normal_n (u, p, n), 2); - - assert (s->_mp_size == (n+1)/2); - mpn_copyd (sp, s->_mp_d, s->_mp_size); - mpz_clear (s); - res = r->_mp_size; - if (rp) - mpn_copyd (rp, r->_mp_d, res); - mpz_clear (r); - return res; -} - -/* Combinatorics */ - -void -mpz_mfac_uiui (mpz_t x, unsigned long n, unsigned long m) -{ - mpz_set_ui (x, n + (n == 0)); - if (m + 1 < 2) return; - while (n > m + 1) - mpz_mul_ui (x, x, n -= m); -} - -void -mpz_2fac_ui (mpz_t x, unsigned long n) -{ - mpz_mfac_uiui (x, n, 2); -} - -void -mpz_fac_ui (mpz_t x, unsigned long n) -{ - mpz_mfac_uiui (x, n, 1); -} - -void -mpz_bin_uiui (mpz_t r, unsigned long n, unsigned long k) -{ - mpz_t t; - - mpz_set_ui (r, k <= n); - - if (k > (n >> 1)) - k = (k <= n) ? n - k : 0; - - mpz_init (t); - mpz_fac_ui (t, k); - - for (; k > 0; --k) - mpz_mul_ui (r, r, n--); - - mpz_divexact (r, r, t); - mpz_clear (t); -} - - -/* Primality testing */ - -/* Computes Kronecker (a/b) with odd b, a!=0 and GCD(a,b) = 1 */ -/* Adapted from JACOBI_BASE_METHOD==4 in mpn/generic/jacbase.c */ -static int -gmp_jacobi_coprime (mp_limb_t a, mp_limb_t b) -{ - int c, bit = 0; - - assert (b & 1); - assert (a != 0); - /* assert (mpn_gcd_11 (a, b) == 1); */ - - /* Below, we represent a and b shifted right so that the least - significant one bit is implicit. */ - b >>= 1; - - gmp_ctz(c, a); - a >>= 1; - - for (;;) - { - a >>= c; - /* (2/b) = -1 if b = 3 or 5 mod 8 */ - bit ^= c & (b ^ (b >> 1)); - if (a < b) - { - if (a == 0) - return bit & 1 ? -1 : 1; - bit ^= a & b; - a = b - a; - b -= a; - } - else - { - a -= b; - assert (a != 0); - } - - gmp_ctz(c, a); - ++c; - } -} - -static void -gmp_lucas_step_k_2k (mpz_t V, mpz_t Qk, const mpz_t n) -{ - mpz_mod (Qk, Qk, n); - /* V_{2k} <- V_k ^ 2 - 2Q^k */ - mpz_mul (V, V, V); - mpz_submul_ui (V, Qk, 2); - mpz_tdiv_r (V, V, n); - /* Q^{2k} = (Q^k)^2 */ - mpz_mul (Qk, Qk, Qk); -} - -/* Computes V_k, Q^k (mod n) for the Lucas' sequence */ -/* with P=1, Q=Q; k = (n>>b0)|1. */ -/* Requires an odd n > 4; b0 > 0; -2*Q must not overflow a long */ -/* Returns (U_k == 0) and sets V=V_k and Qk=Q^k. */ -static int -gmp_lucas_mod (mpz_t V, mpz_t Qk, long Q, - mp_bitcnt_t b0, const mpz_t n) -{ - mp_bitcnt_t bs; - mpz_t U; - int res; - - assert (b0 > 0); - assert (Q <= - (LONG_MIN / 2)); - assert (Q >= - (LONG_MAX / 2)); - assert (mpz_cmp_ui (n, 4) > 0); - assert (mpz_odd_p (n)); - - mpz_init_set_ui (U, 1); /* U1 = 1 */ - mpz_set_ui (V, 1); /* V1 = 1 */ - mpz_set_si (Qk, Q); - - for (bs = mpz_sizeinbase (n, 2) - 1; --bs >= b0;) - { - /* U_{2k} <- U_k * V_k */ - mpz_mul (U, U, V); - /* V_{2k} <- V_k ^ 2 - 2Q^k */ - /* Q^{2k} = (Q^k)^2 */ - gmp_lucas_step_k_2k (V, Qk, n); - - /* A step k->k+1 is performed if the bit in $n$ is 1 */ - /* mpz_tstbit(n,bs) or the bit is 0 in $n$ but */ - /* should be 1 in $n+1$ (bs == b0) */ - if (b0 == bs || mpz_tstbit (n, bs)) - { - /* Q^{k+1} <- Q^k * Q */ - mpz_mul_si (Qk, Qk, Q); - /* U_{k+1} <- (U_k + V_k) / 2 */ - mpz_swap (U, V); /* Keep in V the old value of U_k */ - mpz_add (U, U, V); - /* We have to compute U/2, so we need an even value, */ - /* equivalent (mod n) */ - if (mpz_odd_p (U)) - mpz_add (U, U, n); - mpz_tdiv_q_2exp (U, U, 1); - /* V_{k+1} <-(D*U_k + V_k) / 2 = - U_{k+1} + (D-1)/2*U_k = U_{k+1} - 2Q*U_k */ - mpz_mul_si (V, V, -2*Q); - mpz_add (V, U, V); - mpz_tdiv_r (V, V, n); - } - mpz_tdiv_r (U, U, n); - } - - res = U->_mp_size == 0; - mpz_clear (U); - return res; -} - -/* Performs strong Lucas' test on x, with parameters suggested */ -/* for the BPSW test. Qk is only passed to recycle a variable. */ -/* Requires GCD (x,6) = 1.*/ -static int -gmp_stronglucas (const mpz_t x, mpz_t Qk) -{ - mp_bitcnt_t b0; - mpz_t V, n; - mp_limb_t maxD, D; /* The absolute value is stored. */ - long Q; - mp_limb_t tl; - - /* Test on the absolute value. */ - mpz_roinit_normal_n (n, x->_mp_d, GMP_ABS (x->_mp_size)); - - assert (mpz_odd_p (n)); - /* assert (mpz_gcd_ui (NULL, n, 6) == 1); */ - if (mpz_root (Qk, n, 2)) - return 0; /* A square is composite. */ - - /* Check Ds up to square root (in case, n is prime) - or avoid overflows */ - maxD = (Qk->_mp_size == 1) ? Qk->_mp_d [0] - 1 : GMP_LIMB_MAX; - - D = 3; - /* Search a D such that (D/n) = -1 in the sequence 5,-7,9,-11,.. */ - /* For those Ds we have (D/n) = (n/|D|) */ - do - { - if (D >= maxD) - return 1 + (D != GMP_LIMB_MAX); /* (1 + ! ~ D) */ - D += 2; - tl = mpz_tdiv_ui (n, D); - if (tl == 0) - return 0; - } - while (gmp_jacobi_coprime (tl, D) == 1); - - mpz_init (V); - - /* n-(D/n) = n+1 = d*2^{b0}, with d = (n>>b0) | 1 */ - b0 = mpn_common_scan (~ n->_mp_d[0], 0, n->_mp_d, n->_mp_size, GMP_LIMB_MAX); - /* b0 = mpz_scan0 (n, 0); */ - - /* D= P^2 - 4Q; P = 1; Q = (1-D)/4 */ - Q = (D & 2) ? (long) (D >> 2) + 1 : -(long) (D >> 2); - - if (! gmp_lucas_mod (V, Qk, Q, b0, n)) /* If Ud != 0 */ - while (V->_mp_size != 0 && --b0 != 0) /* while Vk != 0 */ - /* V <- V ^ 2 - 2Q^k */ - /* Q^{2k} = (Q^k)^2 */ - gmp_lucas_step_k_2k (V, Qk, n); - - mpz_clear (V); - return (b0 != 0); -} - -static int -gmp_millerrabin (const mpz_t n, const mpz_t nm1, mpz_t y, - const mpz_t q, mp_bitcnt_t k) -{ - assert (k > 0); - - /* Caller must initialize y to the base. */ - mpz_powm (y, y, q, n); - - if (mpz_cmp_ui (y, 1) == 0 || mpz_cmp (y, nm1) == 0) - return 1; - - while (--k > 0) - { - mpz_powm_ui (y, y, 2, n); - if (mpz_cmp (y, nm1) == 0) - return 1; - } - return 0; -} - -/* This product is 0xc0cfd797, and fits in 32 bits. */ -#define GMP_PRIME_PRODUCT \ - (3UL*5UL*7UL*11UL*13UL*17UL*19UL*23UL*29UL) - -/* Bit (p+1)/2 is set, for each odd prime <= 61 */ -#define GMP_PRIME_MASK 0xc96996dcUL - -int -mpz_probab_prime_p (const mpz_t n, int reps) -{ - mpz_t nm1; - mpz_t q; - mpz_t y; - mp_bitcnt_t k; - int is_prime; - int j; - - /* Note that we use the absolute value of n only, for compatibility - with the real GMP. */ - if (mpz_even_p (n)) - return (mpz_cmpabs_ui (n, 2) == 0) ? 2 : 0; - - /* Above test excludes n == 0 */ - assert (n->_mp_size != 0); - - if (mpz_cmpabs_ui (n, 64) < 0) - return (GMP_PRIME_MASK >> (n->_mp_d[0] >> 1)) & 2; - - if (mpz_gcd_ui (NULL, n, GMP_PRIME_PRODUCT) != 1) - return 0; - - /* All prime factors are >= 31. */ - if (mpz_cmpabs_ui (n, 31*31) < 0) - return 2; - - mpz_init (nm1); - mpz_init (q); - - /* Find q and k, where q is odd and n = 1 + 2**k * q. */ - mpz_abs (nm1, n); - nm1->_mp_d[0] -= 1; - /* Count trailing zeros, equivalent to mpn_scan1, because we know that there is a 1 */ - k = mpn_scan1 (nm1->_mp_d, 0); - mpz_tdiv_q_2exp (q, nm1, k); - - /* BPSW test */ - mpz_init_set_ui (y, 2); - is_prime = gmp_millerrabin (n, nm1, y, q, k) && gmp_stronglucas (n, y); - reps -= 24; /* skip the first 24 repetitions */ - - /* Use Miller-Rabin, with a deterministic sequence of bases, a[j] = - j^2 + j + 41 using Euler's polynomial. We potentially stop early, - if a[j] >= n - 1. Since n >= 31*31, this can happen only if reps > - 30 (a[30] == 971 > 31*31 == 961). */ - - for (j = 0; is_prime & (j < reps); j++) - { - mpz_set_ui (y, (unsigned long) j*j+j+41); - if (mpz_cmp (y, nm1) >= 0) - { - /* Don't try any further bases. This "early" break does not affect - the result for any reasonable reps value (<=5000 was tested) */ - assert (j >= 30); - break; - } - is_prime = gmp_millerrabin (n, nm1, y, q, k); - } - mpz_clear (nm1); - mpz_clear (q); - mpz_clear (y); - - return is_prime; -} - - -/* Logical operations and bit manipulation. */ - -/* Numbers are treated as if represented in two's complement (and - infinitely sign extended). For a negative values we get the two's - complement from -x = ~x + 1, where ~ is bitwise complement. - Negation transforms - - xxxx10...0 - - into - - yyyy10...0 - - where yyyy is the bitwise complement of xxxx. So least significant - bits, up to and including the first one bit, are unchanged, and - the more significant bits are all complemented. - - To change a bit from zero to one in a negative number, subtract the - corresponding power of two from the absolute value. This can never - underflow. To change a bit from one to zero, add the corresponding - power of two, and this might overflow. E.g., if x = -001111, the - two's complement is 110001. Clearing the least significant bit, we - get two's complement 110000, and -010000. */ - -int -mpz_tstbit (const mpz_t d, mp_bitcnt_t bit_index) -{ - mp_size_t limb_index; - unsigned shift; - mp_size_t ds; - mp_size_t dn; - mp_limb_t w; - int bit; - - ds = d->_mp_size; - dn = GMP_ABS (ds); - limb_index = bit_index / GMP_LIMB_BITS; - if (limb_index >= dn) - return ds < 0; - - shift = bit_index % GMP_LIMB_BITS; - w = d->_mp_d[limb_index]; - bit = (w >> shift) & 1; - - if (ds < 0) - { - /* d < 0. Check if any of the bits below is set: If so, our bit - must be complemented. */ - if (shift > 0 && (mp_limb_t) (w << (GMP_LIMB_BITS - shift)) > 0) - return bit ^ 1; - while (--limb_index >= 0) - if (d->_mp_d[limb_index] > 0) - return bit ^ 1; - } - return bit; -} - -static void -mpz_abs_add_bit (mpz_t d, mp_bitcnt_t bit_index) -{ - mp_size_t dn, limb_index; - mp_limb_t bit; - mp_ptr dp; - - dn = GMP_ABS (d->_mp_size); - - limb_index = bit_index / GMP_LIMB_BITS; - bit = (mp_limb_t) 1 << (bit_index % GMP_LIMB_BITS); - - if (limb_index >= dn) - { - mp_size_t i; - /* The bit should be set outside of the end of the number. - We have to increase the size of the number. */ - dp = MPZ_REALLOC (d, limb_index + 1); - - dp[limb_index] = bit; - for (i = dn; i < limb_index; i++) - dp[i] = 0; - dn = limb_index + 1; - } - else - { - mp_limb_t cy; - - dp = d->_mp_d; - - cy = mpn_add_1 (dp + limb_index, dp + limb_index, dn - limb_index, bit); - if (cy > 0) - { - dp = MPZ_REALLOC (d, dn + 1); - dp[dn++] = cy; - } - } - - d->_mp_size = (d->_mp_size < 0) ? - dn : dn; -} - -static void -mpz_abs_sub_bit (mpz_t d, mp_bitcnt_t bit_index) -{ - mp_size_t dn, limb_index; - mp_ptr dp; - mp_limb_t bit; - - dn = GMP_ABS (d->_mp_size); - dp = d->_mp_d; - - limb_index = bit_index / GMP_LIMB_BITS; - bit = (mp_limb_t) 1 << (bit_index % GMP_LIMB_BITS); - - assert (limb_index < dn); - - gmp_assert_nocarry (mpn_sub_1 (dp + limb_index, dp + limb_index, - dn - limb_index, bit)); - dn = mpn_normalized_size (dp, dn); - d->_mp_size = (d->_mp_size < 0) ? - dn : dn; -} - -void -mpz_setbit (mpz_t d, mp_bitcnt_t bit_index) -{ - if (!mpz_tstbit (d, bit_index)) - { - if (d->_mp_size >= 0) - mpz_abs_add_bit (d, bit_index); - else - mpz_abs_sub_bit (d, bit_index); - } -} - -void -mpz_clrbit (mpz_t d, mp_bitcnt_t bit_index) -{ - if (mpz_tstbit (d, bit_index)) - { - if (d->_mp_size >= 0) - mpz_abs_sub_bit (d, bit_index); - else - mpz_abs_add_bit (d, bit_index); - } -} - -void -mpz_combit (mpz_t d, mp_bitcnt_t bit_index) -{ - if (mpz_tstbit (d, bit_index) ^ (d->_mp_size < 0)) - mpz_abs_sub_bit (d, bit_index); - else - mpz_abs_add_bit (d, bit_index); -} - -void -mpz_com (mpz_t r, const mpz_t u) -{ - mpz_add_ui (r, u, 1); - mpz_neg (r, r); -} - -void -mpz_and (mpz_t r, const mpz_t u, const mpz_t v) -{ - mp_size_t un, vn, rn, i; - mp_ptr up, vp, rp; - - mp_limb_t ux, vx, rx; - mp_limb_t uc, vc, rc; - mp_limb_t ul, vl, rl; - - un = GMP_ABS (u->_mp_size); - vn = GMP_ABS (v->_mp_size); - if (un < vn) - { - MPZ_SRCPTR_SWAP (u, v); - MP_SIZE_T_SWAP (un, vn); - } - if (vn == 0) - { - r->_mp_size = 0; - return; - } - - uc = u->_mp_size < 0; - vc = v->_mp_size < 0; - rc = uc & vc; - - ux = -uc; - vx = -vc; - rx = -rc; - - /* If the smaller input is positive, higher limbs don't matter. */ - rn = vx ? un : vn; - - rp = MPZ_REALLOC (r, rn + (mp_size_t) rc); - - up = u->_mp_d; - vp = v->_mp_d; - - i = 0; - do - { - ul = (up[i] ^ ux) + uc; - uc = ul < uc; - - vl = (vp[i] ^ vx) + vc; - vc = vl < vc; - - rl = ( (ul & vl) ^ rx) + rc; - rc = rl < rc; - rp[i] = rl; - } - while (++i < vn); - assert (vc == 0); - - for (; i < rn; i++) - { - ul = (up[i] ^ ux) + uc; - uc = ul < uc; - - rl = ( (ul & vx) ^ rx) + rc; - rc = rl < rc; - rp[i] = rl; - } - if (rc) - rp[rn++] = rc; - else - rn = mpn_normalized_size (rp, rn); - - r->_mp_size = rx ? -rn : rn; -} - -void -mpz_ior (mpz_t r, const mpz_t u, const mpz_t v) -{ - mp_size_t un, vn, rn, i; - mp_ptr up, vp, rp; - - mp_limb_t ux, vx, rx; - mp_limb_t uc, vc, rc; - mp_limb_t ul, vl, rl; - - un = GMP_ABS (u->_mp_size); - vn = GMP_ABS (v->_mp_size); - if (un < vn) - { - MPZ_SRCPTR_SWAP (u, v); - MP_SIZE_T_SWAP (un, vn); - } - if (vn == 0) - { - mpz_set (r, u); - return; - } - - uc = u->_mp_size < 0; - vc = v->_mp_size < 0; - rc = uc | vc; - - ux = -uc; - vx = -vc; - rx = -rc; - - /* If the smaller input is negative, by sign extension higher limbs - don't matter. */ - rn = vx ? vn : un; - - rp = MPZ_REALLOC (r, rn + (mp_size_t) rc); - - up = u->_mp_d; - vp = v->_mp_d; - - i = 0; - do - { - ul = (up[i] ^ ux) + uc; - uc = ul < uc; - - vl = (vp[i] ^ vx) + vc; - vc = vl < vc; - - rl = ( (ul | vl) ^ rx) + rc; - rc = rl < rc; - rp[i] = rl; - } - while (++i < vn); - assert (vc == 0); - - for (; i < rn; i++) - { - ul = (up[i] ^ ux) + uc; - uc = ul < uc; - - rl = ( (ul | vx) ^ rx) + rc; - rc = rl < rc; - rp[i] = rl; - } - if (rc) - rp[rn++] = rc; - else - rn = mpn_normalized_size (rp, rn); - - r->_mp_size = rx ? -rn : rn; -} - -void -mpz_xor (mpz_t r, const mpz_t u, const mpz_t v) -{ - mp_size_t un, vn, i; - mp_ptr up, vp, rp; - - mp_limb_t ux, vx, rx; - mp_limb_t uc, vc, rc; - mp_limb_t ul, vl, rl; - - un = GMP_ABS (u->_mp_size); - vn = GMP_ABS (v->_mp_size); - if (un < vn) - { - MPZ_SRCPTR_SWAP (u, v); - MP_SIZE_T_SWAP (un, vn); - } - if (vn == 0) - { - mpz_set (r, u); - return; - } - - uc = u->_mp_size < 0; - vc = v->_mp_size < 0; - rc = uc ^ vc; - - ux = -uc; - vx = -vc; - rx = -rc; - - rp = MPZ_REALLOC (r, un + (mp_size_t) rc); - - up = u->_mp_d; - vp = v->_mp_d; - - i = 0; - do - { - ul = (up[i] ^ ux) + uc; - uc = ul < uc; - - vl = (vp[i] ^ vx) + vc; - vc = vl < vc; - - rl = (ul ^ vl ^ rx) + rc; - rc = rl < rc; - rp[i] = rl; - } - while (++i < vn); - assert (vc == 0); - - for (; i < un; i++) - { - ul = (up[i] ^ ux) + uc; - uc = ul < uc; - - rl = (ul ^ ux) + rc; - rc = rl < rc; - rp[i] = rl; - } - if (rc) - rp[un++] = rc; - else - un = mpn_normalized_size (rp, un); - - r->_mp_size = rx ? -un : un; -} - -static unsigned -gmp_popcount_limb (mp_limb_t x) -{ - unsigned c; - - /* Do 16 bits at a time, to avoid limb-sized constants. */ - int LOCAL_SHIFT_BITS = 16; - for (c = 0; x > 0;) - { - unsigned w = x - ((x >> 1) & 0x5555); - w = ((w >> 2) & 0x3333) + (w & 0x3333); - w = (w >> 4) + w; - w = ((w >> 8) & 0x000f) + (w & 0x000f); - c += w; - if (GMP_LIMB_BITS > LOCAL_SHIFT_BITS) - x >>= LOCAL_SHIFT_BITS; - else - x = 0; - } - return c; -} - -mp_bitcnt_t -mpn_popcount (mp_srcptr p, mp_size_t n) -{ - mp_size_t i; - mp_bitcnt_t c; - - for (c = 0, i = 0; i < n; i++) - c += gmp_popcount_limb (p[i]); - - return c; -} - -mp_bitcnt_t -mpz_popcount (const mpz_t u) -{ - mp_size_t un; - - un = u->_mp_size; - - if (un < 0) - return ~(mp_bitcnt_t) 0; - - return mpn_popcount (u->_mp_d, un); -} - -mp_bitcnt_t -mpz_hamdist (const mpz_t u, const mpz_t v) -{ - mp_size_t un, vn, i; - mp_limb_t uc, vc, ul, vl, comp; - mp_srcptr up, vp; - mp_bitcnt_t c; - - un = u->_mp_size; - vn = v->_mp_size; - - if ( (un ^ vn) < 0) - return ~(mp_bitcnt_t) 0; - - comp = - (uc = vc = (un < 0)); - if (uc) - { - assert (vn < 0); - un = -un; - vn = -vn; - } - - up = u->_mp_d; - vp = v->_mp_d; - - if (un < vn) - MPN_SRCPTR_SWAP (up, un, vp, vn); - - for (i = 0, c = 0; i < vn; i++) - { - ul = (up[i] ^ comp) + uc; - uc = ul < uc; - - vl = (vp[i] ^ comp) + vc; - vc = vl < vc; - - c += gmp_popcount_limb (ul ^ vl); - } - assert (vc == 0); - - for (; i < un; i++) - { - ul = (up[i] ^ comp) + uc; - uc = ul < uc; - - c += gmp_popcount_limb (ul ^ comp); - } - - return c; -} - -mp_bitcnt_t -mpz_scan1 (const mpz_t u, mp_bitcnt_t starting_bit) -{ - mp_ptr up; - mp_size_t us, un, i; - mp_limb_t limb, ux; - - us = u->_mp_size; - un = GMP_ABS (us); - i = starting_bit / GMP_LIMB_BITS; - - /* Past the end there's no 1 bits for u>=0, or an immediate 1 bit - for u<0. Notice this test picks up any u==0 too. */ - if (i >= un) - return (us >= 0 ? ~(mp_bitcnt_t) 0 : starting_bit); - - up = u->_mp_d; - ux = 0; - limb = up[i]; - - if (starting_bit != 0) - { - if (us < 0) - { - ux = mpn_zero_p (up, i); - limb = ~ limb + ux; - ux = - (mp_limb_t) (limb >= ux); - } - - /* Mask to 0 all bits before starting_bit, thus ignoring them. */ - limb &= GMP_LIMB_MAX << (starting_bit % GMP_LIMB_BITS); - } - - return mpn_common_scan (limb, i, up, un, ux); -} - -mp_bitcnt_t -mpz_scan0 (const mpz_t u, mp_bitcnt_t starting_bit) -{ - mp_ptr up; - mp_size_t us, un, i; - mp_limb_t limb, ux; - - us = u->_mp_size; - ux = - (mp_limb_t) (us >= 0); - un = GMP_ABS (us); - i = starting_bit / GMP_LIMB_BITS; - - /* When past end, there's an immediate 0 bit for u>=0, or no 0 bits for - u<0. Notice this test picks up all cases of u==0 too. */ - if (i >= un) - return (ux ? starting_bit : ~(mp_bitcnt_t) 0); - - up = u->_mp_d; - limb = up[i] ^ ux; - - if (ux == 0) - limb -= mpn_zero_p (up, i); /* limb = ~(~limb + zero_p) */ - - /* Mask all bits before starting_bit, thus ignoring them. */ - limb &= GMP_LIMB_MAX << (starting_bit % GMP_LIMB_BITS); - - return mpn_common_scan (limb, i, up, un, ux); -} - - -/* MPZ base conversion. */ - -size_t -mpz_sizeinbase (const mpz_t u, int base) -{ - mp_size_t un, tn; - mp_srcptr up; - mp_ptr tp; - mp_bitcnt_t bits; - struct gmp_div_inverse bi; - size_t ndigits; - - assert (base >= 2); - assert (base <= 62); - - un = GMP_ABS (u->_mp_size); - if (un == 0) - return 1; - - up = u->_mp_d; - - bits = (un - 1) * GMP_LIMB_BITS + mpn_limb_size_in_base_2 (up[un-1]); - switch (base) - { - case 2: - return bits; - case 4: - return (bits + 1) / 2; - case 8: - return (bits + 2) / 3; - case 16: - return (bits + 3) / 4; - case 32: - return (bits + 4) / 5; - /* FIXME: Do something more clever for the common case of base - 10. */ - } - - tp = gmp_alloc_limbs (un); - mpn_copyi (tp, up, un); - mpn_div_qr_1_invert (&bi, base); - - tn = un; - ndigits = 0; - do - { - ndigits++; - mpn_div_qr_1_preinv (tp, tp, tn, &bi); - tn -= (tp[tn-1] == 0); - } - while (tn > 0); - - gmp_free_limbs (tp, un); - return ndigits; -} - -char * -mpz_get_str (char *sp, int base, const mpz_t u) -{ - unsigned bits; - const char *digits; - mp_size_t un; - size_t i, sn, osn; - - digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; - if (base > 1) - { - if (base <= 36) - digits = "0123456789abcdefghijklmnopqrstuvwxyz"; - else if (base > 62) - return NULL; - } - else if (base >= -1) - base = 10; - else - { - base = -base; - if (base > 36) - return NULL; - } - - sn = 1 + mpz_sizeinbase (u, base); - if (!sp) - { - osn = 1 + sn; - sp = (char *) gmp_alloc (osn); - } - else - osn = 0; - un = GMP_ABS (u->_mp_size); - - if (un == 0) - { - sp[0] = '0'; - sn = 1; - goto ret; - } - - i = 0; - - if (u->_mp_size < 0) - sp[i++] = '-'; - - bits = mpn_base_power_of_two_p (base); - - if (bits) - /* Not modified in this case. */ - sn = i + mpn_get_str_bits ((unsigned char *) sp + i, bits, u->_mp_d, un); - else - { - struct mpn_base_info info; - mp_ptr tp; - - mpn_get_base_info (&info, base); - tp = gmp_alloc_limbs (un); - mpn_copyi (tp, u->_mp_d, un); - - sn = i + mpn_get_str_other ((unsigned char *) sp + i, base, &info, tp, un); - gmp_free_limbs (tp, un); - } - - for (; i < sn; i++) - sp[i] = digits[(unsigned char) sp[i]]; - -ret: - sp[sn] = '\0'; - if (osn && osn != sn + 1) - sp = (char*) gmp_realloc (sp, osn, sn + 1); - return sp; -} - -int -mpz_set_str (mpz_t r, const char *sp, int base) -{ - unsigned bits, value_of_a; - mp_size_t rn, alloc; - mp_ptr rp; - size_t dn, sn; - int sign; - unsigned char *dp; - - assert (base == 0 || (base >= 2 && base <= 62)); - - while (isspace( (unsigned char) *sp)) - sp++; - - sign = (*sp == '-'); - sp += sign; - - if (base == 0) - { - if (sp[0] == '0') - { - if (sp[1] == 'x' || sp[1] == 'X') - { - base = 16; - sp += 2; - } - else if (sp[1] == 'b' || sp[1] == 'B') - { - base = 2; - sp += 2; - } - else - base = 8; - } - else - base = 10; - } - - if (!*sp) - { - r->_mp_size = 0; - return -1; - } - sn = strlen(sp); - dp = (unsigned char *) gmp_alloc (sn); - - value_of_a = (base > 36) ? 36 : 10; - for (dn = 0; *sp; sp++) - { - unsigned digit; - - if (isspace ((unsigned char) *sp)) - continue; - else if (*sp >= '0' && *sp <= '9') - digit = *sp - '0'; - else if (*sp >= 'a' && *sp <= 'z') - digit = *sp - 'a' + value_of_a; - else if (*sp >= 'A' && *sp <= 'Z') - digit = *sp - 'A' + 10; - else - digit = base; /* fail */ - - if (digit >= (unsigned) base) - { - gmp_free (dp, sn); - r->_mp_size = 0; - return -1; - } - - dp[dn++] = digit; - } - - if (!dn) - { - gmp_free (dp, sn); - r->_mp_size = 0; - return -1; - } - bits = mpn_base_power_of_two_p (base); - - if (bits > 0) - { - alloc = (dn * bits + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS; - rp = MPZ_REALLOC (r, alloc); - rn = mpn_set_str_bits (rp, dp, dn, bits); - } - else - { - struct mpn_base_info info; - mpn_get_base_info (&info, base); - alloc = (dn + info.exp - 1) / info.exp; - rp = MPZ_REALLOC (r, alloc); - rn = mpn_set_str_other (rp, dp, dn, base, &info); - /* Normalization, needed for all-zero input. */ - assert (rn > 0); - rn -= rp[rn-1] == 0; - } - assert (rn <= alloc); - gmp_free (dp, sn); - - r->_mp_size = sign ? - rn : rn; - - return 0; -} - -int -mpz_init_set_str (mpz_t r, const char *sp, int base) -{ - mpz_init (r); - return mpz_set_str (r, sp, base); -} - -size_t -mpz_out_str (FILE *stream, int base, const mpz_t x) -{ - char *str; - size_t len, n; - - str = mpz_get_str (NULL, base, x); - if (!str) - return 0; - len = strlen (str); - n = fwrite (str, 1, len, stream); - gmp_free (str, len + 1); - return n; -} - - -static int -gmp_detect_endian (void) -{ - static const int i = 2; - const unsigned char *p = (const unsigned char *) &i; - return 1 - *p; -} - -/* Import and export. Does not support nails. */ -void -mpz_import (mpz_t r, size_t count, int order, size_t size, int endian, - size_t nails, const void *src) -{ - const unsigned char *p; - ptrdiff_t word_step; - mp_ptr rp; - mp_size_t rn; - - /* The current (partial) limb. */ - mp_limb_t limb; - /* The number of bytes already copied to this limb (starting from - the low end). */ - size_t bytes; - /* The index where the limb should be stored, when completed. */ - mp_size_t i; - - if (nails != 0) - gmp_die ("mpz_import: Nails not supported."); - - assert (order == 1 || order == -1); - assert (endian >= -1 && endian <= 1); - - if (endian == 0) - endian = gmp_detect_endian (); - - p = (unsigned char *) src; - - word_step = (order != endian) ? 2 * size : 0; - - /* Process bytes from the least significant end, so point p at the - least significant word. */ - if (order == 1) - { - p += size * (count - 1); - word_step = - word_step; - } - - /* And at least significant byte of that word. */ - if (endian == 1) - p += (size - 1); - - rn = (size * count + sizeof(mp_limb_t) - 1) / sizeof(mp_limb_t); - rp = MPZ_REALLOC (r, rn); - - for (limb = 0, bytes = 0, i = 0; count > 0; count--, p += word_step) - { - size_t j; - for (j = 0; j < size; j++, p -= (ptrdiff_t) endian) - { - limb |= (mp_limb_t) *p << (bytes++ * CHAR_BIT); - if (bytes == sizeof(mp_limb_t)) - { - rp[i++] = limb; - bytes = 0; - limb = 0; - } - } - } - assert (i + (bytes > 0) == rn); - if (limb != 0) - rp[i++] = limb; - else - i = mpn_normalized_size (rp, i); - - r->_mp_size = i; -} - -void * -mpz_export (void *r, size_t *countp, int order, size_t size, int endian, - size_t nails, const mpz_t u) -{ - size_t count; - mp_size_t un; - - if (nails != 0) - gmp_die ("mpz_export: Nails not supported."); - - assert (order == 1 || order == -1); - assert (endian >= -1 && endian <= 1); - assert (size > 0 || u->_mp_size == 0); - - un = u->_mp_size; - count = 0; - if (un != 0) - { - size_t k; - unsigned char *p; - ptrdiff_t word_step; - /* The current (partial) limb. */ - mp_limb_t limb; - /* The number of bytes left to do in this limb. */ - size_t bytes; - /* The index where the limb was read. */ - mp_size_t i; - - un = GMP_ABS (un); - - /* Count bytes in top limb. */ - limb = u->_mp_d[un-1]; - assert (limb != 0); - - k = (GMP_LIMB_BITS <= CHAR_BIT); - if (!k) - { - do { - int LOCAL_CHAR_BIT = CHAR_BIT; - k++; limb >>= LOCAL_CHAR_BIT; - } while (limb != 0); - } - /* else limb = 0; */ - - count = (k + (un-1) * sizeof (mp_limb_t) + size - 1) / size; - - if (!r) - r = gmp_alloc (count * size); - - if (endian == 0) - endian = gmp_detect_endian (); - - p = (unsigned char *) r; - - word_step = (order != endian) ? 2 * size : 0; - - /* Process bytes from the least significant end, so point p at the - least significant word. */ - if (order == 1) - { - p += size * (count - 1); - word_step = - word_step; - } - - /* And at least significant byte of that word. */ - if (endian == 1) - p += (size - 1); - - for (bytes = 0, i = 0, k = 0; k < count; k++, p += word_step) - { - size_t j; - for (j = 0; j < size; ++j, p -= (ptrdiff_t) endian) - { - if (sizeof (mp_limb_t) == 1) - { - if (i < un) - *p = u->_mp_d[i++]; - else - *p = 0; - } - else - { - int LOCAL_CHAR_BIT = CHAR_BIT; - if (bytes == 0) - { - if (i < un) - limb = u->_mp_d[i++]; - bytes = sizeof (mp_limb_t); - } - *p = limb; - limb >>= LOCAL_CHAR_BIT; - bytes--; - } - } - } - assert (i == un); - assert (k == count); - } - - if (countp) - *countp = count; - - return r; -} diff --git a/source/gmp.h b/source/gmp.h @@ -1,310 +0,0 @@ -/* mini-gmp, a minimalistic implementation of a GNU GMP subset. - -Copyright 2011-2015, 2017, 2019-2021 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -/* About mini-gmp: This is a minimal implementation of a subset of the - GMP interface. It is intended for inclusion into applications which - have modest bignums needs, as a fallback when the real GMP library - is not installed. - - This file defines the public interface. */ - -#ifndef __MINI_GMP_H__ -#define __MINI_GMP_H__ - -/* For size_t */ -#include <stddef.h> - -#if defined (__cplusplus) -extern "C" { -#endif - -void mp_set_memory_functions (void *(*) (size_t), - void *(*) (void *, size_t, size_t), - void (*) (void *, size_t)); - -void mp_get_memory_functions (void *(**) (size_t), - void *(**) (void *, size_t, size_t), - void (**) (void *, size_t)); - -#ifndef MINI_GMP_LIMB_TYPE -#define MINI_GMP_LIMB_TYPE long -#endif - -typedef unsigned MINI_GMP_LIMB_TYPE mp_limb_t; -typedef long mp_size_t; -typedef unsigned long mp_bitcnt_t; - -typedef mp_limb_t *mp_ptr; -typedef const mp_limb_t *mp_srcptr; - -typedef struct -{ - int _mp_alloc; /* Number of *limbs* allocated and pointed - to by the _mp_d field. */ - int _mp_size; /* abs(_mp_size) is the number of limbs the - last field points to. If _mp_size is - negative this is a negative number. */ - mp_limb_t *_mp_d; /* Pointer to the limbs. */ -} __mpz_struct; - -typedef __mpz_struct mpz_t[1]; - -typedef __mpz_struct *mpz_ptr; -typedef const __mpz_struct *mpz_srcptr; - -extern const int mp_bits_per_limb; - -void mpn_copyi (mp_ptr, mp_srcptr, mp_size_t); -void mpn_copyd (mp_ptr, mp_srcptr, mp_size_t); -void mpn_zero (mp_ptr, mp_size_t); - -int mpn_cmp (mp_srcptr, mp_srcptr, mp_size_t); -int mpn_zero_p (mp_srcptr, mp_size_t); - -mp_limb_t mpn_add_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); -mp_limb_t mpn_add_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); -mp_limb_t mpn_add (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t); - -mp_limb_t mpn_sub_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); -mp_limb_t mpn_sub_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); -mp_limb_t mpn_sub (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t); - -mp_limb_t mpn_mul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); -mp_limb_t mpn_addmul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); -mp_limb_t mpn_submul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); - -mp_limb_t mpn_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t); -void mpn_mul_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); -void mpn_sqr (mp_ptr, mp_srcptr, mp_size_t); -int mpn_perfect_square_p (mp_srcptr, mp_size_t); -mp_size_t mpn_sqrtrem (mp_ptr, mp_ptr, mp_srcptr, mp_size_t); - -mp_limb_t mpn_lshift (mp_ptr, mp_srcptr, mp_size_t, unsigned int); -mp_limb_t mpn_rshift (mp_ptr, mp_srcptr, mp_size_t, unsigned int); - -mp_bitcnt_t mpn_scan0 (mp_srcptr, mp_bitcnt_t); -mp_bitcnt_t mpn_scan1 (mp_srcptr, mp_bitcnt_t); - -void mpn_com (mp_ptr, mp_srcptr, mp_size_t); -mp_limb_t mpn_neg (mp_ptr, mp_srcptr, mp_size_t); - -mp_bitcnt_t mpn_popcount (mp_srcptr, mp_size_t); - -mp_limb_t mpn_invert_3by2 (mp_limb_t, mp_limb_t); -#define mpn_invert_limb(x) mpn_invert_3by2 ((x), 0) - -size_t mpn_get_str (unsigned char *, int, mp_ptr, mp_size_t); -mp_size_t mpn_set_str (mp_ptr, const unsigned char *, size_t, int); - -void mpz_init (mpz_t); -void mpz_init2 (mpz_t, mp_bitcnt_t); -void mpz_clear (mpz_t); - -#define mpz_odd_p(z) (((z)->_mp_size != 0) & (int) (z)->_mp_d[0]) -#define mpz_even_p(z) (! mpz_odd_p (z)) - -int mpz_sgn (const mpz_t); -int mpz_cmp_si (const mpz_t, long); -int mpz_cmp_ui (const mpz_t, unsigned long); -int mpz_cmp (const mpz_t, const mpz_t); -int mpz_cmpabs_ui (const mpz_t, unsigned long); -int mpz_cmpabs (const mpz_t, const mpz_t); -int mpz_cmp_d (const mpz_t, double); -int mpz_cmpabs_d (const mpz_t, double); - -void mpz_abs (mpz_t, const mpz_t); -void mpz_neg (mpz_t, const mpz_t); -void mpz_swap (mpz_t, mpz_t); - -void mpz_add_ui (mpz_t, const mpz_t, unsigned long); -void mpz_add (mpz_t, const mpz_t, const mpz_t); -void mpz_sub_ui (mpz_t, const mpz_t, unsigned long); -void mpz_ui_sub (mpz_t, unsigned long, const mpz_t); -void mpz_sub (mpz_t, const mpz_t, const mpz_t); - -void mpz_mul_si (mpz_t, const mpz_t, long int); -void mpz_mul_ui (mpz_t, const mpz_t, unsigned long int); -void mpz_mul (mpz_t, const mpz_t, const mpz_t); -void mpz_mul_2exp (mpz_t, const mpz_t, mp_bitcnt_t); -void mpz_addmul_ui (mpz_t, const mpz_t, unsigned long int); -void mpz_addmul (mpz_t, const mpz_t, const mpz_t); -void mpz_submul_ui (mpz_t, const mpz_t, unsigned long int); -void mpz_submul (mpz_t, const mpz_t, const mpz_t); - -void mpz_cdiv_qr (mpz_t, mpz_t, const mpz_t, const mpz_t); -void mpz_fdiv_qr (mpz_t, mpz_t, const mpz_t, const mpz_t); -void mpz_tdiv_qr (mpz_t, mpz_t, const mpz_t, const mpz_t); -void mpz_cdiv_q (mpz_t, const mpz_t, const mpz_t); -void mpz_fdiv_q (mpz_t, const mpz_t, const mpz_t); -void mpz_tdiv_q (mpz_t, const mpz_t, const mpz_t); -void mpz_cdiv_r (mpz_t, const mpz_t, const mpz_t); -void mpz_fdiv_r (mpz_t, const mpz_t, const mpz_t); -void mpz_tdiv_r (mpz_t, const mpz_t, const mpz_t); - -void mpz_cdiv_q_2exp (mpz_t, const mpz_t, mp_bitcnt_t); -void mpz_fdiv_q_2exp (mpz_t, const mpz_t, mp_bitcnt_t); -void mpz_tdiv_q_2exp (mpz_t, const mpz_t, mp_bitcnt_t); -void mpz_cdiv_r_2exp (mpz_t, const mpz_t, mp_bitcnt_t); -void mpz_fdiv_r_2exp (mpz_t, const mpz_t, mp_bitcnt_t); -void mpz_tdiv_r_2exp (mpz_t, const mpz_t, mp_bitcnt_t); - -void mpz_mod (mpz_t, const mpz_t, const mpz_t); - -void mpz_divexact (mpz_t, const mpz_t, const mpz_t); - -int mpz_divisible_p (const mpz_t, const mpz_t); -int mpz_congruent_p (const mpz_t, const mpz_t, const mpz_t); - -unsigned long mpz_cdiv_qr_ui (mpz_t, mpz_t, const mpz_t, unsigned long); -unsigned long mpz_fdiv_qr_ui (mpz_t, mpz_t, const mpz_t, unsigned long); -unsigned long mpz_tdiv_qr_ui (mpz_t, mpz_t, const mpz_t, unsigned long); -unsigned long mpz_cdiv_q_ui (mpz_t, const mpz_t, unsigned long); -unsigned long mpz_fdiv_q_ui (mpz_t, const mpz_t, unsigned long); -unsigned long mpz_tdiv_q_ui (mpz_t, const mpz_t, unsigned long); -unsigned long mpz_cdiv_r_ui (mpz_t, const mpz_t, unsigned long); -unsigned long mpz_fdiv_r_ui (mpz_t, const mpz_t, unsigned long); -unsigned long mpz_tdiv_r_ui (mpz_t, const mpz_t, unsigned long); -unsigned long mpz_cdiv_ui (const mpz_t, unsigned long); -unsigned long mpz_fdiv_ui (const mpz_t, unsigned long); -unsigned long mpz_tdiv_ui (const mpz_t, unsigned long); - -unsigned long mpz_mod_ui (mpz_t, const mpz_t, unsigned long); - -void mpz_divexact_ui (mpz_t, const mpz_t, unsigned long); - -int mpz_divisible_ui_p (const mpz_t, unsigned long); - -unsigned long mpz_gcd_ui (mpz_t, const mpz_t, unsigned long); -void mpz_gcd (mpz_t, const mpz_t, const mpz_t); -void mpz_gcdext (mpz_t, mpz_t, mpz_t, const mpz_t, const mpz_t); -void mpz_lcm_ui (mpz_t, const mpz_t, unsigned long); -void mpz_lcm (mpz_t, const mpz_t, const mpz_t); -int mpz_invert (mpz_t, const mpz_t, const mpz_t); - -void mpz_sqrtrem (mpz_t, mpz_t, const mpz_t); -void mpz_sqrt (mpz_t, const mpz_t); -int mpz_perfect_square_p (const mpz_t); - -void mpz_pow_ui (mpz_t, const mpz_t, unsigned long); -void mpz_ui_pow_ui (mpz_t, unsigned long, unsigned long); -void mpz_powm (mpz_t, const mpz_t, const mpz_t, const mpz_t); -void mpz_powm_ui (mpz_t, const mpz_t, unsigned long, const mpz_t); - -void mpz_rootrem (mpz_t, mpz_t, const mpz_t, unsigned long); -int mpz_root (mpz_t, const mpz_t, unsigned long); - -void mpz_fac_ui (mpz_t, unsigned long); -void mpz_2fac_ui (mpz_t, unsigned long); -void mpz_mfac_uiui (mpz_t, unsigned long, unsigned long); -void mpz_bin_uiui (mpz_t, unsigned long, unsigned long); - -int mpz_probab_prime_p (const mpz_t, int); - -int mpz_tstbit (const mpz_t, mp_bitcnt_t); -void mpz_setbit (mpz_t, mp_bitcnt_t); -void mpz_clrbit (mpz_t, mp_bitcnt_t); -void mpz_combit (mpz_t, mp_bitcnt_t); - -void mpz_com (mpz_t, const mpz_t); -void mpz_and (mpz_t, const mpz_t, const mpz_t); -void mpz_ior (mpz_t, const mpz_t, const mpz_t); -void mpz_xor (mpz_t, const mpz_t, const mpz_t); - -mp_bitcnt_t mpz_popcount (const mpz_t); -mp_bitcnt_t mpz_hamdist (const mpz_t, const mpz_t); -mp_bitcnt_t mpz_scan0 (const mpz_t, mp_bitcnt_t); -mp_bitcnt_t mpz_scan1 (const mpz_t, mp_bitcnt_t); - -int mpz_fits_slong_p (const mpz_t); -int mpz_fits_ulong_p (const mpz_t); -int mpz_fits_sint_p (const mpz_t); -int mpz_fits_uint_p (const mpz_t); -int mpz_fits_sshort_p (const mpz_t); -int mpz_fits_ushort_p (const mpz_t); -long int mpz_get_si (const mpz_t); -unsigned long int mpz_get_ui (const mpz_t); -double mpz_get_d (const mpz_t); -size_t mpz_size (const mpz_t); -mp_limb_t mpz_getlimbn (const mpz_t, mp_size_t); - -void mpz_realloc2 (mpz_t, mp_bitcnt_t); -mp_srcptr mpz_limbs_read (mpz_srcptr); -mp_ptr mpz_limbs_modify (mpz_t, mp_size_t); -mp_ptr mpz_limbs_write (mpz_t, mp_size_t); -void mpz_limbs_finish (mpz_t, mp_size_t); -mpz_srcptr mpz_roinit_n (mpz_t, mp_srcptr, mp_size_t); - -#define MPZ_ROINIT_N(xp, xs) {{0, (xs),(xp) }} - -void mpz_set_si (mpz_t, signed long int); -void mpz_set_ui (mpz_t, unsigned long int); -void mpz_set (mpz_t, const mpz_t); -void mpz_set_d (mpz_t, double); - -void mpz_init_set_si (mpz_t, signed long int); -void mpz_init_set_ui (mpz_t, unsigned long int); -void mpz_init_set (mpz_t, const mpz_t); -void mpz_init_set_d (mpz_t, double); - -size_t mpz_sizeinbase (const mpz_t, int); -char *mpz_get_str (char *, int, const mpz_t); -int mpz_set_str (mpz_t, const char *, int); -int mpz_init_set_str (mpz_t, const char *, int); - -/* This long list taken from gmp.h. */ -/* For reference, "defined(EOF)" cannot be used here. In g++ 2.95.4, - <iostream> defines EOF but not FILE. */ -#if defined (FILE) \ - || defined (H_STDIO) \ - || defined (_H_STDIO) /* AIX */ \ - || defined (_STDIO_H) /* glibc, Sun, SCO */ \ - || defined (_STDIO_H_) /* BSD, OSF */ \ - || defined (__STDIO_H) /* Borland */ \ - || defined (__STDIO_H__) /* IRIX */ \ - || defined (_STDIO_INCLUDED) /* HPUX */ \ - || defined (__dj_include_stdio_h_) /* DJGPP */ \ - || defined (_FILE_DEFINED) /* Microsoft */ \ - || defined (__STDIO__) /* Apple MPW MrC */ \ - || defined (_MSL_STDIO_H) /* Metrowerks */ \ - || defined (_STDIO_H_INCLUDED) /* QNX4 */ \ - || defined (_ISO_STDIO_ISO_H) /* Sun C++ */ \ - || defined (__STDIO_LOADED) /* VMS */ \ - || defined (_STDIO) /* HPE NonStop */ \ - || defined (__DEFINED_FILE) /* musl */ -size_t mpz_out_str (FILE *, int, const mpz_t); -#endif - -void mpz_import (mpz_t, size_t, int, size_t, int, size_t, const void *); -void *mpz_export (void *, size_t *, int, size_t, int, size_t, const mpz_t); - -#if defined (__cplusplus) -} -#endif -#endif /* __MINI_GMP_H__ */ diff --git a/source/gpu-verify b/source/gpu-verify Binary files differ. diff --git a/source/gpu-verify.dSYM/Contents/Info.plist b/source/gpu-verify.dSYM/Contents/Info.plist @@ -0,0 +1,20 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> +<plist version="1.0"> + <dict> + <key>CFBundleDevelopmentRegion</key> + <string>English</string> + <key>CFBundleIdentifier</key> + <string>com.apple.xcode.dsym.gpu-verify</string> + <key>CFBundleInfoDictionaryVersion</key> + <string>6.0</string> + <key>CFBundlePackageType</key> + <string>dSYM</string> + <key>CFBundleSignature</key> + <string>????</string> + <key>CFBundleShortVersionString</key> + <string>1.0</string> + <key>CFBundleVersion</key> + <string>1</string> + </dict> +</plist> diff --git a/source/gpu-verify.dSYM/Contents/Resources/DWARF/gpu-verify b/source/gpu-verify.dSYM/Contents/Resources/DWARF/gpu-verify Binary files differ. diff --git a/source/gpu-verify.dSYM/Contents/Resources/Relocations/x86_64/gpu-verify.yml b/source/gpu-verify.dSYM/Contents/Resources/Relocations/x86_64/gpu-verify.yml @@ -0,0 +1,54 @@ +--- +triple: 'x86_64-apple-darwin' +binary-path: gpu-verify +relocations: + - { offsetInCU: 0x26, offset: 0x26, size: 0x8, addend: 0x0, symName: _main, symObjAddr: 0x0, symBinAddr: 0x100001EA0, symSize: 0x32 } + - { offsetInCU: 0x33, offset: 0x33, size: 0x8, addend: 0x0, symName: _main, symObjAddr: 0x0, symBinAddr: 0x100001EA0, symSize: 0x32 } + - { offsetInCU: 0x26, offset: 0xA7, size: 0x8, addend: 0x0, symName: _mpSizeof, symObjAddr: 0x0, symBinAddr: 0x100001EE0, symSize: 0x60 } + - { offsetInCU: 0x95, offset: 0x116, size: 0x8, addend: 0x0, symName: _mpSizeof, symObjAddr: 0x0, symBinAddr: 0x100001EE0, symSize: 0x60 } + - { offsetInCU: 0xCB, offset: 0x14C, size: 0x8, addend: 0x0, symName: _mpSetZero, symObjAddr: 0x60, symBinAddr: 0x100001F40, symSize: 0x50 } + - { offsetInCU: 0x11D, offset: 0x19E, size: 0x8, addend: 0x0, symName: _uiceil, symObjAddr: 0xB0, symBinAddr: 0x100001F90, symSize: 0xB0 } + - { offsetInCU: 0x153, offset: 0x1D4, size: 0x8, addend: 0x0, symName: _zeroise_bytes, symObjAddr: 0x160, symBinAddr: 0x100002040, symSize: 0x50 } + - { offsetInCU: 0x1A5, offset: 0x226, size: 0x8, addend: 0x0, symName: _mpConvFromOctets, symObjAddr: 0x1B0, symBinAddr: 0x100002090, symSize: 0x100 } + - { offsetInCU: 0x22F, offset: 0x2B0, size: 0x8, addend: 0x0, symName: _mpConvFromHex, symObjAddr: 0x2B0, symBinAddr: 0x100002190, symSize: 0x2F0 } + - { offsetInCU: 0x2D0, offset: 0x351, size: 0x8, addend: 0x0, symName: _opencl_pairs_from_files, symObjAddr: 0x5A0, symBinAddr: 0x100002480, symSize: 0xBF0 } + - { offsetInCU: 0x586, offset: 0x607, size: 0x8, addend: 0x0, symName: _opencl_prepare, symObjAddr: 0x1190, symBinAddr: 0x100003070, symSize: 0x740 } + - { offsetInCU: 0x673, offset: 0x6F4, size: 0x8, addend: 0x0, symName: _opencl_exec_kernel, symObjAddr: 0x18D0, symBinAddr: 0x1000037B0, symSize: 0xC0 } + - { offsetInCU: 0x6CA, offset: 0x74B, size: 0x8, addend: 0x0, symName: _opencl_results, symObjAddr: 0x1990, symBinAddr: 0x100003870, symSize: 0x1D0 } + - { offsetInCU: 0x730, offset: 0x7B1, size: 0x8, addend: 0x0, symName: _opencl_cleanup, symObjAddr: 0x1B60, symBinAddr: 0x100003A40, symSize: 0x50 } + - { offsetInCU: 0x756, offset: 0x7D7, size: 0x8, addend: 0x0, symName: _opencl_release, symObjAddr: 0x1BB0, symBinAddr: 0x100003A90, symSize: 0x90 } + - { offsetInCU: 0x77C, offset: 0x7FD, size: 0x8, addend: 0x0, symName: _gpuv_init, symObjAddr: 0x1C40, symBinAddr: 0x100003B20, symSize: 0x1D0 } + - { offsetInCU: 0x7CF, offset: 0x850, size: 0x8, addend: 0x0, symName: _gpuv_execute, symObjAddr: 0x1E10, symBinAddr: 0x100003CF0, symSize: 0xD0 } + - { offsetInCU: 0x89B, offset: 0x91C, size: 0x8, addend: 0x0, symName: _gpuv_finish, symObjAddr: 0x1EE0, symBinAddr: 0x100003DC0, symSize: 0x70 } + - { offsetInCU: 0x8E3, offset: 0x964, size: 0x8, addend: 0x0, symName: _gpuv_test, symObjAddr: 0x1F50, symBinAddr: 0x100003E30, symSize: 0x2E8 } + - { offsetInCU: 0x26, offset: 0xEF4, size: 0x8, addend: 0x0, symName: _ref_pairs_from_files, symObjAddr: 0x0, symBinAddr: 0x100004120, symSize: 0x570 } + - { offsetInCU: 0x50, offset: 0xF1E, size: 0x8, addend: 0x0, symName: _ref_pairs_from_files, symObjAddr: 0x0, symBinAddr: 0x100004120, symSize: 0x570 } + - { offsetInCU: 0x265, offset: 0x1133, size: 0x8, addend: 0x0, symName: _sexp_from_string, symObjAddr: 0x570, symBinAddr: 0x100004690, symSize: 0x70 } + - { offsetInCU: 0x2C5, offset: 0x1193, size: 0x8, addend: 0x0, symName: _sexp_from_string_key, symObjAddr: 0x5E0, symBinAddr: 0x100004700, symSize: 0xB0 } + - { offsetInCU: 0x341, offset: 0x120F, size: 0x8, addend: 0x0, symName: _gpuv_test_ref, symObjAddr: 0x690, symBinAddr: 0x1000047B0, symSize: 0x59C } + - { offsetInCU: 0x26, offset: 0x1626, size: 0x8, addend: 0x0, symName: _montmodmult_pairs_from_files, symObjAddr: 0x0, symBinAddr: 0x100004D50, symSize: 0x630 } + - { offsetInCU: 0x4D, offset: 0x164D, size: 0x8, addend: 0x0, symName: _len_in_bytes, symObjAddr: 0x5DB8, symBinAddr: 0x10000C050, symSize: 0x0 } + - { offsetInCU: 0x6C, offset: 0x166C, size: 0x8, addend: 0x0, symName: _montmodmult_pairs_from_files, symObjAddr: 0x0, symBinAddr: 0x100004D50, symSize: 0x630 } + - { offsetInCU: 0x279, offset: 0x1879, size: 0x8, addend: 0x0, symName: _convert_thread, symObjAddr: 0x630, symBinAddr: 0x100005380, symSize: 0x4B0 } + - { offsetInCU: 0x3E7, offset: 0x19E7, size: 0x8, addend: 0x0, symName: _modmult_opencl_convert, symObjAddr: 0xAE0, symBinAddr: 0x100005830, symSize: 0x490 } + - { offsetInCU: 0x5C2, offset: 0x1BC2, size: 0x8, addend: 0x0, symName: _modmult_opencl_prepare, symObjAddr: 0xF70, symBinAddr: 0x100005CC0, symSize: 0x600 } + - { offsetInCU: 0x6C1, offset: 0x1CC1, size: 0x8, addend: 0x0, symName: _modmult_opencl_exec_kernel, symObjAddr: 0x1570, symBinAddr: 0x1000062C0, symSize: 0xC0 } + - { offsetInCU: 0x718, offset: 0x1D18, size: 0x8, addend: 0x0, symName: _modmult_opencl_results, symObjAddr: 0x1630, symBinAddr: 0x100006380, symSize: 0x230 } + - { offsetInCU: 0x7F8, offset: 0x1DF8, size: 0x8, addend: 0x0, symName: _modmult_opencl_cleanup, symObjAddr: 0x1860, symBinAddr: 0x1000065B0, symSize: 0x50 } + - { offsetInCU: 0x81E, offset: 0x1E1E, size: 0x8, addend: 0x0, symName: _modmult_opencl_release, symObjAddr: 0x18B0, symBinAddr: 0x100006600, symSize: 0x70 } + - { offsetInCU: 0x844, offset: 0x1E44, size: 0x8, addend: 0x0, symName: _gpuv_init_montg, symObjAddr: 0x1920, symBinAddr: 0x100006670, symSize: 0x1F0 } + - { offsetInCU: 0x897, offset: 0x1E97, size: 0x8, addend: 0x0, symName: _gpuv_execute_montg, symObjAddr: 0x1B10, symBinAddr: 0x100006860, symSize: 0xF0 } + - { offsetInCU: 0x963, offset: 0x1F63, size: 0x8, addend: 0x0, symName: _gpuv_finish_montg, symObjAddr: 0x1C00, symBinAddr: 0x100006950, symSize: 0x90 } + - { offsetInCU: 0x9F6, offset: 0x1FF6, size: 0x8, addend: 0x0, symName: _gpuv_test_montg, symObjAddr: 0x1C90, symBinAddr: 0x1000069E0, symSize: 0x2DE } + - { offsetInCU: 0x26, offset: 0x26C2, size: 0x8, addend: 0x0, symName: _gpuv_estimate_pairs, symObjAddr: 0x0, symBinAddr: 0x100006CC0, symSize: 0x100 } + - { offsetInCU: 0x41, offset: 0x26DD, size: 0x8, addend: 0x0, symName: _select_platform, symObjAddr: 0x190, symBinAddr: 0x100006E50, symSize: 0x2B0 } + - { offsetInCU: 0x66, offset: 0x2702, size: 0x8, addend: 0x0, symName: _select_platform.param, symObjAddr: 0xD50, symBinAddr: 0x10000C000, symSize: 0x0 } + - { offsetInCU: 0x1BE, offset: 0x285A, size: 0x8, addend: 0x0, symName: _gpuv_estimate_pairs, symObjAddr: 0x0, symBinAddr: 0x100006CC0, symSize: 0x100 } + - { offsetInCU: 0x223, offset: 0x28BF, size: 0x8, addend: 0x0, symName: _gpuv_prepare_gcry, symObjAddr: 0x100, symBinAddr: 0x100006DC0, symSize: 0x90 } + - { offsetInCU: 0x238, offset: 0x28D4, size: 0x8, addend: 0x0, symName: _select_device, symObjAddr: 0x440, symBinAddr: 0x100007100, symSize: 0x180 } + - { offsetInCU: 0x2AC, offset: 0x2948, size: 0x8, addend: 0x0, symName: _logger, symObjAddr: 0x5C0, symBinAddr: 0x100007280, symSize: 0x40 } + - { offsetInCU: 0x2FA, offset: 0x2996, size: 0x8, addend: 0x0, symName: _create_compute_context, symObjAddr: 0x600, symBinAddr: 0x1000072C0, symSize: 0x70 } + - { offsetInCU: 0x33E, offset: 0x29DA, size: 0x8, addend: 0x0, symName: _create_command_queue, symObjAddr: 0x670, symBinAddr: 0x100007330, symSize: 0x70 } + - { offsetInCU: 0x390, offset: 0x2A2C, size: 0x8, addend: 0x0, symName: _compile_program, symObjAddr: 0x6E0, symBinAddr: 0x1000073A0, symSize: 0x320 } + - { offsetInCU: 0x46E, offset: 0x2B0A, size: 0x8, addend: 0x0, symName: _create_kernel, symObjAddr: 0xA00, symBinAddr: 0x1000076C0, symSize: 0x63 } +... diff --git a/source/gpuv-montg.c b/source/gpuv-montg.c @@ -0,0 +1,657 @@ +// +// gpuv-montg.c +// lib-gpu-verify +// +// Created by Cedric Zwahlen on 16.12.2023. +// + +#include "gpuv-montg.h" +#include <pthread.h> + +static unsigned long len_in_bytes = 0; + +#define ORDER -1 // I think we need to do this, because we want to write it in the 'wrong' way +#define END 0 + +#define BIT_LENGTH (2048) + +#define BITS 64 + +// sizes are always the same 32 units for all, except exp_buf +void montmodmult_pairs_from_files(void *x_buf, void *m_buf, + void *r_1_buf, + void *n_buf, void *ni_buf, + void *msg_buf, + void *exp_buf, + void *s_buf, + unsigned long *pks, unsigned long *n) { + + FILE * pk; + FILE * ms; + + pk = fopen("lib-gpu-generate/publickey.txt", "r"); + ms = fopen("lib-gpu-generate/msgsig.txt", "r"); + + if (pk == NULL || ms == NULL) { + printf("Auxiliary files not found."); + abort(); + } + + fseek (ms, 0, SEEK_END); + long ms_l = ftell(ms); + fseek (ms, 0, SEEK_SET); + char *ms_ptr = malloc(ms_l); + char *ms_ptr_rest = malloc(ms_l); + if (ms_ptr || ms_ptr_rest) + { + fread (ms_ptr, 1, ms_l, ms); + memcpy(ms_ptr_rest, ms_ptr, ms_l); + } + fclose (ms); + + fseek (pk, 0, SEEK_END); + long pk_l = ftell(pk); + fseek (pk, 0, SEEK_SET); + char *pk_ptr = malloc(pk_l); + char *pk_ptr_rest = malloc(pk_l); + if (pk_ptr && pk_ptr_rest) + { + fread (pk_ptr, 1, pk_l, pk); + memcpy(pk_ptr_rest, pk_ptr, pk_l); + } + fclose (pk); + + gpu_register *n_buf_t = n_buf; + gpu_register *msg_buf_t = msg_buf; + gpu_register *s_buf_t = s_buf; + gpu_register *exp_buf_t = exp_buf; + + int len = (BIT_LENGTH / 8) / sizeof(gpu_register); + + char* message = strtok_r(ms_ptr, "\n", &ms_ptr_rest); + char* signature = strtok_r(0, "\n", &ms_ptr_rest); + char* modulus = strtok_r(pk_ptr, "\n", &pk_ptr_rest); + char* exponent = strtok_r(0, "\n", &pk_ptr_rest); + char* offs = strtok_r(0, "\n", &pk_ptr_rest); + + int i = 0; + int j = 0; + + mpz_t e,mod,msg,s; + + mpz_init(e); + mpz_init(mod); + mpz_init(msg); + mpz_init(s); + + while (message != NULL && signature != NULL) { + + if (i == 0 || pks[j - 1] < i) { + + mpz_set_str(mod,modulus,16); + mpz_set_str(e,exponent,16); + + pks[j] = atoi(offs); + + modulus = strtok_r(0, "\n", &pk_ptr_rest); + exponent = strtok_r(0, "\n", &pk_ptr_rest); + offs = strtok_r(0, "\n", &pk_ptr_rest); + + mpz_export(&n_buf_t[len * j], NULL, ORDER, sizeof(gpu_register), END, 0, mod); + mpz_export(&exp_buf_t[j], NULL, ORDER, sizeof(gpu_register), END, 0, e); + + + j++; + + } + + mpz_set_str(msg,message,16); + mpz_set_str(s,signature,16); + + message = strtok_r(0, "\n",&ms_ptr_rest); + signature = strtok_r(0, "\n",&ms_ptr_rest); + + mpz_export(&msg_buf_t[len * i], NULL, ORDER, sizeof(gpu_register), END, 0, msg); + mpz_export(&s_buf_t[len * i], NULL, ORDER, sizeof(gpu_register), END, 0, s); + + i++; + } + + mpz_clear(e); + mpz_clear(mod); + mpz_clear(msg); + mpz_clear(s); + + + + *n = i; + +} + +struct thread_args { + + void *x_buf; + void *m_buf; + void *r_1_buf; + void *n_buf; + void *ni_buf; + void *s_buf; + unsigned long *pks; + unsigned long n_start; + unsigned long n_end; +}; + +void *convert_thread(void * vargp) { + + struct thread_args *args = vargp; + + int len = (BIT_LENGTH / 8) / sizeof(gpu_register); + + + int j = 0; + + while(1) { + if (args->pks[j] > args->n_start) + break; + j++; + } + + + //printf("pks[%i] == %i, start at: %i, end at: %i\n",j,args->pks[j], args->n_start, args->n_end); + + gpu_register *s_buf_t = args->s_buf; + gpu_register *n_buf_t = args->n_buf; + + gpu_register *x_buf_t = args->x_buf; + gpu_register *m_buf_t = args->m_buf; + gpu_register *r_1_buf_t = args->r_1_buf; + gpu_register *ni_buf_t = args->ni_buf; + + mpz_t mod, s; + mpz_init(s); + mpz_init(mod); + + mpz_t r, r_1, ni, M, x; + + mpz_init(r); + mpz_init(r_1); + mpz_init(ni); + mpz_init(M); + mpz_init(x); + + mpz_t one; // helper variable + mpz_init_set_si(one,1); + + mpz_set_si(one, 1); + mpz_mul_2exp(r,one,BIT_LENGTH); // r + + int start = (int)args->n_start; + + + + for(int i = start; i < args->n_end; i++) { + + if (i == start || args->pks[j - 1] < i) { + + mpz_import(mod, len, ORDER, sizeof(gpu_register), END, 0, &n_buf_t[len * j]); + + mpz_gcdext(one, r_1, ni, r, mod); // set r_1 and ni + + int sgn = mpz_sgn(r_1); + + mpz_abs(r_1, r_1); + mpz_abs(ni, ni); + + if (sgn == -1) { + mpz_sub(ni, r, ni); + mpz_sub(r_1, mod, r_1); + } + + + mpz_export(&ni_buf_t[len * j], NULL, ORDER, sizeof(gpu_register), END, 0, ni); + mpz_export(&r_1_buf_t[len * j], NULL, ORDER, sizeof(gpu_register), END, 0, r_1); + + + + j++; + } + + mpz_import(s, len, ORDER, sizeof(gpu_register), END, 0, &s_buf_t[len * i]); + + // set x (the number to 'square' (multiply by itself)) + mpz_mul(M, s, r); + mpz_mod(M, M, mod); + + mpz_mod(x, r, mod); + + + mpz_export(&x_buf_t[len * i], NULL, ORDER, sizeof(gpu_register), END, 0, x); + mpz_export(&m_buf_t[len * i], NULL, ORDER, sizeof(gpu_register), END, 0, M); + + } + + mpz_clear(r); + mpz_clear(r_1); + mpz_clear(ni); + mpz_clear(M); + mpz_clear(x); + + mpz_clear(one); + + return NULL; + +} + +void modmult_opencl_convert(struct gpu_state_alt *state, void *x_buf, void *m_buf, + void *r_1_buf, + void *n_buf, void *ni_buf, + void *msg_buf, + void *exp_buf, + void *s_buf, + unsigned long *pks, unsigned long n) { + + long nr_of_threads = 4; + + #if __APPLE__ || unix + + nr_of_threads = sysconf(_SC_NPROCESSORS_ONLN); + + #elif _WIN32 + + SYSTEM_INFO sysinfo; + GetSystemInfo(&sysinfo); + nr_of_threads = sysinfo.dwNumberOfProcessors; + + #endif + + + struct timespec p1, p2; + + clock_gettime(CLOCK_REALTIME, &p1); + + pthread_t tid[nr_of_threads]; + // not the best, but it is safe + int ids[nr_of_threads]; + + struct thread_args args[nr_of_threads]; + + unsigned long range = n / nr_of_threads; + + for (int i = 0; i < nr_of_threads - 1; i++) + { + args[i].n_start = i * range; + args[i].n_end = (i + 1) * range; + } + + // last one takes the 'rest' + args[nr_of_threads - 1].n_start = (nr_of_threads - 1) * range; + args[nr_of_threads - 1].n_end = n; + + + for (int i = 0; i < nr_of_threads; i++) { + + args[i].m_buf = m_buf; + args[i].n_buf = n_buf; + args[i].ni_buf = ni_buf; + args[i].pks = pks; + args[i].r_1_buf = r_1_buf; + args[i].s_buf = s_buf; + args[i].x_buf = x_buf; + + ids[i] = i; + int err = pthread_create(&tid[i], NULL, convert_thread, (void *)&args[i]); + if ( err != 0 ) + printf("Error creating threads"); + } + + for (int j = 0; j < nr_of_threads; j++) { + int err = pthread_join(tid[j], NULL); + if ( err != 0 ) + printf("Error joining threads"); + } + + clock_gettime(CLOCK_REALTIME, &p2); + + state->p.tv_sec += ( p2.tv_nsec < p1.tv_nsec ? p2.tv_sec - (p1.tv_sec + 1) : p2.tv_sec - p1.tv_sec ); + state->p.tv_nsec += ( p2.tv_nsec < p1.tv_nsec ? ((999999999 - p1.tv_nsec) + p2.tv_nsec) : (p2.tv_nsec - p1.tv_nsec) ) / 1000; + + +} + +int modmult_opencl_prepare(struct gpu_info *info, struct gpu_state_alt *state, + void *x_buf, void *m_buf, + void *r_1_buf, + void *n_buf, void *ni_buf, + void *msg_buf, + void *exp_buf, + void *s_buf, + unsigned long *pks, unsigned long n + ) { + + + + modmult_opencl_convert(state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n); + + + int err; // error code returned from api calls + + unsigned long pk = 0; + + while (1) { + if (pks[pk] + 1 >= n) + break; + pk++; + } + + unsigned long len = len_in_bytes; + + state->x_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, len, NULL, NULL); + state->m_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, len, NULL, NULL); + state->n_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, len, NULL, NULL); + state->ni_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, len, NULL, NULL); + + state->exp_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, n * sizeof(gpu_register), NULL, NULL); + + state->msg_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, len, NULL, NULL); + + state->pks_indices = clCreateBuffer(info->context, CL_MEM_READ_ONLY, sizeof(unsigned long) * (pk + 2), NULL, NULL); // plus 2 because the first index will contain how many elements are in the array + + if (!state->x_mem || !state->m_mem || !state->n_mem || !state->ni_mem || !state->exp_mem) + { + printf("Error: Failed to allocate device memory!\n"); + exit(1); + } + + // Write our data set into the input array in device memory + // + err = clEnqueueWriteBuffer(info->commands, state->x_mem, CL_TRUE, 0, len, x_buf, 0, NULL, NULL); + err |= clEnqueueWriteBuffer(info->commands, state->m_mem, CL_TRUE, 0, len, m_buf, 0, NULL, NULL); + + //err |= clEnqueueWriteBuffer(info->commands, state->res_mem, CL_TRUE, 0, len, res, 0, NULL, NULL); + err |= clEnqueueWriteBuffer(info->commands, state->n_mem, CL_TRUE, 0, len, n_buf, 0, NULL, NULL); + err |= clEnqueueWriteBuffer(info->commands, state->ni_mem, CL_TRUE, 0, len, ni_buf, 0, NULL, NULL); + + err |= clEnqueueWriteBuffer(info->commands, state->exp_mem, CL_TRUE, 0, n * sizeof(gpu_register), exp_buf, 0, NULL, NULL); + + err |= clEnqueueWriteBuffer(info->commands, state->msg_mem, CL_TRUE, 0, len, msg_buf, 0, NULL, NULL); + + err |= clEnqueueWriteBuffer(info->commands, state->pks_indices, CL_TRUE, sizeof(unsigned long), sizeof(unsigned long) * (pk + 1), pks, 0, NULL, NULL); + err |= clEnqueueWriteBuffer(info->commands, state->pks_indices, CL_TRUE, 0, sizeof(unsigned long), &pk, 0, NULL, NULL); + + if (err != CL_SUCCESS) + { + printf("Error: Failed to write to source array!\n"); + exit(1); + } + + // Set the arguments to our compute kernel + // + err = 0; + err = clSetKernelArg(info->kernel, 0, sizeof(cl_mem), &state->x_mem); + err |= clSetKernelArg(info->kernel, 1, sizeof(cl_mem), &state->m_mem); + err |= clSetKernelArg(info->kernel, 2, sizeof(cl_mem), &state->n_mem); + err |= clSetKernelArg(info->kernel, 3, sizeof(cl_mem), &state->ni_mem); + err |= clSetKernelArg(info->kernel, 4, sizeof(cl_mem), &state->exp_mem); + err |= clSetKernelArg(info->kernel, 5, sizeof(cl_mem), &state->msg_mem); + err |= clSetKernelArg(info->kernel, 6, sizeof(cl_mem), &state->pks_indices); + + if (err != CL_SUCCESS) + { + printf("RSA-Error: Failed to set kernel arguments! %d\n", err); + exit(1); + } + + state->total = n; + + + return 0; + +} + +int modmult_opencl_exec_kernel(struct gpu_info *info, struct gpu_state_alt *state) { + + size_t global; + // size_t local = 1; + int err; + + global = state->total; // has to be exactly the amount of signatures we want to verify + + // measure from the first call to the kernel... + if (state->skip) { + state->skip = false; + clock_gettime(CLOCK_REALTIME, &state->t1); + } + + err = clEnqueueNDRangeKernel(info->commands, info->kernel, 1, NULL, &global, NULL, 0, NULL, NULL); + if (err) + { + printf("Error: Failed to execute kernel!\n"); + return EXIT_FAILURE; + } + + return 0; + +} + +unsigned long modmult_opencl_results(struct gpu_info *info, struct gpu_state_alt *state, bool timed, void * msg_buf, void * r_1_buf, void * n_buf, void* s_buf, unsigned long n) { + + if (state->skip) { + // reset skip in the kernel execution + return 0; + } + + int err; + + // Wait for the command commands to get serviced before reading back results + // + err = clFinish(info->commands); + if (err != CL_SUCCESS) + { + printf("Error: Kernel failure! %d\n", err); + exit(1); + } + + unsigned long results = 0; + + // Read back the results from the device to verify the output + err = clEnqueueReadBuffer(info->commands, state->res_mem, CL_TRUE, 0, sizeof(unsigned long), &results, 0, NULL, NULL ); + if (err != CL_SUCCESS) + { + printf("Error: Failed to read output array! %d\n", err); + exit(1); + } + + if (timed) { + // stop measuring after the last command has been read + clock_gettime(CLOCK_REALTIME, &state->t2); + + printf("Preparation (on CPU) took \t%ld.%06ld s\n", state->p.tv_sec, state->p.tv_nsec); + + long sec = ( state->t2.tv_nsec < state->t1.tv_nsec ? state->t2.tv_sec - (state->t1.tv_sec + 1) : state->t2.tv_sec - state->t1.tv_sec ); + long nanosec = ( state->t2.tv_nsec < state->t1.tv_nsec ? ((999999999 - state->t1.tv_nsec) + state->t2.tv_nsec) : (state->t2.tv_nsec - state->t1.tv_nsec) ) / 1000; + + printf("GPU verification took \t%ld.%06ld s\n", sec, nanosec); + + printf("Total time: \t\t%ld.%06ld s\n", sec + state->p.tv_sec, nanosec + state->p.tv_nsec); + + } + + + return state->total - results; + +} + +void modmult_opencl_cleanup(struct gpu_info *info) { + + clReleaseProgram(info->program); + clReleaseKernel(info->kernel); + clReleaseCommandQueue(info->commands); + clReleaseContext(info->context); + +} + +void modmult_opencl_release(struct gpu_state_alt *state) { + + clReleaseMemObject(state->x_mem); + clReleaseMemObject(state->m_mem); + clReleaseMemObject(state->n_mem); + clReleaseMemObject(state->ni_mem); + clReleaseMemObject(state->exp_mem); + + clReleaseMemObject(state->msg_mem); + + clReleaseMemObject(state->pks_indices); +} + +// MARK: for library + +void gpuv_init_montg(struct gpu_info *info, struct gpu_state_alt *state) { + + info->platform = select_platform(0, false); + info->device_id = select_device (info->platform); + info->context = create_compute_context (info->device_id); + info->commands = create_command_queue (info->device_id, info->context); + info->program = compile_program (info->device_id, info->context, "gpuv-montg.cl"); + info->kernel = create_kernel (info->program, "mont"); + + state->result = 0; + state->total = 0; + state->skip = true; + + state->p.tv_nsec = 0; + state->p.tv_sec = 0; + + int err = 0; + + unsigned long results = 0; + + state->res_mem = clCreateBuffer(info->context, CL_MEM_READ_WRITE, sizeof(unsigned long) ,NULL, NULL); + + err |= clEnqueueWriteBuffer(info->commands, state->res_mem, CL_TRUE, 0, sizeof(unsigned long), &results, 0, NULL, NULL); + if (err != CL_SUCCESS) + { + printf("Error: Failed to write to source array!\n"); + exit(1); + } + + err |= clSetKernelArg(info->kernel, 7, sizeof(cl_mem), &state->res_mem); + if (err != CL_SUCCESS) + { + printf("Error: Failed to set kernel arguments! %d\n", err); + exit(1); + } +} + +void gpuv_execute_montg(struct gpu_info *info, struct gpu_state_alt *state, + void *x_buf, void *m_buf, + void *r_1_buf, + void *n_buf, void *ni_buf, + void *msg_buf, + void *exp_buf, + void *s_buf, + unsigned long *pks, unsigned long n) { + + modmult_opencl_prepare(info, state, + x_buf, m_buf, + r_1_buf, + n_buf, ni_buf, + msg_buf, exp_buf, s_buf, + pks, n + ); // prepares the next batch of signatures on CPU, naturally blocks until it's finished + + state->result += modmult_opencl_results(info, state, false, msg_buf, r_1_buf, n_buf, s_buf, n); // waits for kernel, if it is not ready yet + modmult_opencl_exec_kernel(info,state); // start kernel (returns immediately) + modmult_opencl_release(state); // release buffers + + + +} + +unsigned long gpuv_finish_montg(struct gpu_info *info, struct gpu_state_alt *state, void * msg_buf, void * r_1_buf, void * n_buf, void* s_buf, unsigned long n) { + + state->result += modmult_opencl_results(info,state,true,msg_buf,r_1_buf,n_buf, s_buf, n); + + modmult_opencl_cleanup(info); + + unsigned long res = state->result; + + state->result = 0; // reset result + + return res; + +} + + +int gpuv_test_montg(void) { + + unsigned long pairs = gpuv_estimate_pairs(); // returns an estimation of pairs + + unsigned long digit_sz = (BIT_LENGTH / 8) * pairs; + + len_in_bytes = digit_sz; + + unsigned long arr_sz = pairs * sizeof(unsigned long); + + gpu_register *x_buf = malloc(digit_sz); + gpu_register *m_buf = malloc(digit_sz); + gpu_register *r_1_buf = malloc(digit_sz); + gpu_register *n_buf = malloc(digit_sz); + gpu_register *ni_buf = malloc(digit_sz + pairs); + gpu_register *msg_buf = malloc(digit_sz); + gpu_register *s_buf = malloc(digit_sz); + //gpu_register *mod_buf = malloc(digit_sz); + gpu_register *exp_buf = malloc(pairs * sizeof(gpu_register)); + + memset(x_buf, 0, digit_sz); + memset(m_buf, 0, digit_sz); + memset(r_1_buf, 0, digit_sz); + memset(n_buf, 0, digit_sz); + memset(ni_buf, 0, digit_sz); + memset(msg_buf, 0, digit_sz); + memset(s_buf, 0, digit_sz); + //memset(mod_buf, 0, digit_sz); + memset(exp_buf, 0, pairs * sizeof(gpu_register)); + + unsigned long *pks = malloc(arr_sz); + + memset(pks, 0, arr_sz); + + printf("READING KEYS...\n"); + + montmodmult_pairs_from_files(x_buf, m_buf, + r_1_buf, + n_buf, ni_buf, + msg_buf, + exp_buf, + + s_buf, + pks, &pairs); + + printf("VERIFYING %lu SIGNATURES...\n", pairs); + + struct gpu_info info; + struct gpu_state_alt state; + + gpuv_init_montg(&info, &state); + + gpuv_execute_montg(&info, &state, + x_buf, m_buf, + r_1_buf, + n_buf, ni_buf, + msg_buf, + exp_buf, + // mod_buf, + s_buf, + pks, pairs); + + unsigned long res = gpuv_finish_montg(&info, &state, msg_buf, r_1_buf, n_buf,s_buf, pairs); + + if (res == pairs) { + printf("VERIFICATION RESULT: OK\n\n"); + } else { + printf("VERIFICATION RESULT: NOT OK!\n"); + printf("At least %lu signatures were invalid.\n\n",state.total - res); + } + + return 0; +} diff --git a/source/gpuv-montg.cl b/source/gpuv-montg.cl @@ -0,0 +1 @@ +../xcode/gpuv-montg.cl +\ No newline at end of file diff --git a/source/gpuv-montg.h b/source/gpuv-montg.h @@ -0,0 +1,33 @@ +// +// gpuv-montg.h +// lib-gpu-verify +// +// Created by Cedric Zwahlen on 16.12.2023. +// + +#ifndef gpuv_montg_h +#define gpuv_montg_h + +#include <stdio.h> +#include <stdint.h> +#include <gmp.h> +#include "util.h" + +int gpuv_test_montg(void); + +void gpuv_init_montg(struct gpu_info *info, struct gpu_state_alt *state); + +void gpuv_execute_montg(struct gpu_info *info, struct gpu_state_alt *state, + void *x_buf, void *m_buf, + void *r_1_buf, + void *n_buf, void *ni_buf, + void *msg_buf, + void *exp_buf, + void *s_buf, + unsigned long *pks, unsigned long n); + +unsigned long gpuv_finish_montg(struct gpu_info *info, struct gpu_state_alt *state, void * msg_buf, void * r_1_buf, void * n_buf, void* s_buf, unsigned long n); + +typedef uint64_t gpu_register; + +#endif /* gpuv_montg_h */ diff --git a/source/gpuv-ref.c b/source/gpuv-ref.c @@ -0,0 +1,257 @@ +// +// gpuv-ref.c +// lib-gpu-verify +// +// Created by Cedric Zwahlen on 06.12.2023. +// + +#include "gpuv-ref.h" + +void ref_pairs_from_files(char *bases, unsigned long *b_off, + char *exponents, unsigned long *e_off, + char *moduli, unsigned long *m_off, + char *signatures, unsigned long *s_off, + unsigned long *pks, + unsigned long *n) { + + FILE *pk; + FILE *ms; + + pk = fopen("lib-gpu-generate/publickey.txt", "r"); + ms = fopen("lib-gpu-generate/msgsig.txt", "r"); + + if (pk == NULL || ms == NULL) { + printf("Auxiliary files not found."); + abort(); + } + + fseek (ms, 0, SEEK_END); + long ms_l = ftell(ms); + fseek (ms, 0, SEEK_SET); + char *ms_ptr = malloc(ms_l); + char *ms_ptr_rest = malloc(ms_l); + if (ms_ptr || ms_ptr_rest) + { + fread (ms_ptr, 1, ms_l, ms); + memcpy(ms_ptr_rest, ms_ptr, ms_l); + } + fclose (ms); + + fseek (pk, 0, SEEK_END); + long pk_l = ftell(pk); + fseek (pk, 0, SEEK_SET); + char *pk_ptr = malloc(pk_l); + char *pk_ptr_rest = malloc(pk_l); + if (pk_ptr && pk_ptr_rest) + { + fread (pk_ptr, 1, pk_l, pk); + memcpy(pk_ptr_rest, pk_ptr, pk_l); + } + fclose (pk); + + char* message = strtok_r(ms_ptr, "\n", &ms_ptr_rest); + char* signature = strtok_r(0, "\n", &ms_ptr_rest); + char* modulus = strtok_r(pk_ptr, "\n", &pk_ptr_rest); + char* exponent = strtok_r(0, "\n", &pk_ptr_rest); + char* offs = strtok_r(0, "\n", &pk_ptr_rest); + + unsigned long b_offset = 0; + unsigned long e_offset = 0; + unsigned long m_offset = 0; + unsigned long s_offset = 0; + + int i = 0; + int j = 0; + + while (modulus != NULL && exponent != NULL && offs != NULL) { + + unsigned long n_buf_len = strlen(modulus); + unsigned long e_buf_len = strlen(exponent); + + memcpy(&moduli[m_offset], modulus, n_buf_len); + memcpy(&exponents[e_offset], exponent, e_buf_len); + + m_off[i] = m_offset; + e_off[i] = e_offset; + + m_offset += n_buf_len + 1; + e_offset += e_buf_len + 1; + + pks[i] = atoi(offs); + + modulus = strtok_r(0, "\n", &pk_ptr_rest); + exponent = strtok_r(0, "\n", &pk_ptr_rest); + offs = strtok_r(0, "\n", &pk_ptr_rest); + + i++; + } + + while (message != NULL && signature != NULL) { + + unsigned long m_buf_len = strlen(message); + unsigned long s_buf_len = strlen(signature); + + memcpy(&bases[b_offset], message, m_buf_len); + memcpy(&signatures[s_offset], signature, s_buf_len); + + b_off[j] = b_offset; + s_off[j] = s_offset; + + b_offset += m_buf_len + 1; + s_offset += s_buf_len + 1; + + message = strtok_r(0, "\n",&ms_ptr_rest); + signature = strtok_r(0, "\n",&ms_ptr_rest); + + j++; + + } + + *n = j; +} + +gcry_sexp_t sexp_from_string(char* str, const char *format) { + + gcry_sexp_t sexp; + + gcry_mpi_t mpi = gcry_mpi_new((int)strlen(str) * 8); + //size_t scanned = 0; + gcry_mpi_scan(&mpi, GCRYMPI_FMT_HEX, str, 0, NULL); + + size_t errOff = 0; + gcry_sexp_build(&sexp,&errOff,format,mpi); + + return sexp; +} + +gcry_sexp_t sexp_from_string_key(char* str_1, char* str_2, const char *format) { + + gcry_sexp_t sexp; + + gcry_mpi_t mpi_1 = gcry_mpi_new((int)strlen(str_1) * 8); + //size_t scanned = 0; + gcry_mpi_scan(&mpi_1, GCRYMPI_FMT_HEX, str_1, 0, NULL); + + gcry_mpi_t mpi_2 = gcry_mpi_new((int)strlen(str_2) * 8); + //size_t scanned = 0; + gcry_mpi_scan(&mpi_2, GCRYMPI_FMT_HEX, str_2, 0, NULL); + + size_t errOff = 0; + gcry_sexp_build(&sexp,&errOff,format,mpi_1,mpi_2); + + return sexp; +} + +int gpuv_test_ref(void) { + + unsigned long pairs = gpuv_estimate_pairs(); + + unsigned long str_sz = (2048) * pairs; + + + char *b = malloc(str_sz); + char *e = malloc(str_sz); + char *m = malloc(str_sz); + char *s = malloc(str_sz); + + unsigned long *b_off = malloc(str_sz); + unsigned long *e_off = malloc(str_sz); + unsigned long *m_off = malloc(str_sz); + unsigned long *s_off = malloc(str_sz); + + memset(b, 0, str_sz); + memset(e, 0, str_sz); + memset(m, 0, str_sz); + memset(s, 0, str_sz); + + memset(b_off, 0, str_sz); + memset(e_off, 0, str_sz); + memset(m_off, 0, str_sz); + memset(s_off, 0, str_sz); + + unsigned long *pks = malloc(str_sz); + memset(pks, 0, str_sz); + + ref_pairs_from_files(b, b_off, e, e_off, m, m_off, s, s_off, pks, + &pairs); + + unsigned long pk = 0; + + while (1) { + if (pks[pk] + 1 == pairs) + break; + pk++; + } + + + + gcry_sexp_t *m_sexps = malloc(pairs * sizeof(gcry_sexp_t)); + gcry_sexp_t *s_sexps = malloc(pairs * sizeof(gcry_sexp_t)); + gcry_sexp_t *key_sexps = malloc((pk + 1) * sizeof(gcry_sexp_t)); + + for (int i = 0; i < pairs; i++) { + + m_sexps[i] = sexp_from_string(&b[b_off[i]], "(data (flags raw) (value %m))"); // message format (for comparison) + + s_sexps[i] = sexp_from_string(&s[s_off[i]], "(sig-val (rsa (s %m)))"); // signature format + } + + + for (int i = 0; i <= pk; i++) { + + key_sexps[i] = sexp_from_string_key(&m[m_off[i]], &e[e_off[i]], "(public-key (rsa (n %m) (e %m)))" ); // pub key data + + } + + unsigned long result = 0; + + struct timespec t1, t2; + + printf("VERIFYING %lu SIGNATURES...\n", pairs); + + clock_gettime(CLOCK_REALTIME, &t1); + + pk = 0; // reuse pk + + for (int i = 0; i < pairs; i++) { + + while (1) { + if (pks[pk] >= i) + break; + pk++; + } + + if ( gcry_pk_verify(s_sexps[i], m_sexps[i], key_sexps[pk]) == 0 ) + result += 1; + + } + + clock_gettime(CLOCK_REALTIME, &t2); + + printf("CPU (Reference) verification took %ld.%06ld s\n", ( t2.tv_nsec < t1.tv_nsec ? t2.tv_sec - (t1.tv_sec + 1) : t2.tv_sec - t1.tv_sec ), ( t2.tv_nsec < t1.tv_nsec ? ((999999999 - t1.tv_nsec) + t2.tv_nsec) : (t2.tv_nsec - t1.tv_nsec) ) / 1000); + + if (result == pairs) { + printf("VERIFICATION RESULT: %lu - OK\n\n",result); + } else { + printf("VERIFICATION RESULT: %lu - NOT OK\n\n",result); + } + + + free(b); + free(e); + free(m); + free(s); + + free(b_off); + free(e_off); + free(m_off); + free(s_off); + + free(pks); + + free(m_sexps); + free(s_sexps); + free(key_sexps); + + return result == pairs ? 1 : 0; +} diff --git a/source/gpuv-ref.h b/source/gpuv-ref.h @@ -0,0 +1,16 @@ +// +// gpuv-ref.h +// lib-gpu-verify +// +// Created by Cedric Zwahlen on 06.12.2023. +// + + +#ifndef gpuv_ref_h +#define gpuv_ref_h + +#include "util.h" + +int gpuv_test_ref(void); + +#endif /* gpuv-ref_h */ diff --git a/source/gpuv.c b/source/gpuv.c @@ -0,0 +1,622 @@ +/* + * lib-gpu-verify + * + * This software contains code derived from or inspired by the BigDigit library, + * <http://www.di-mgt.com.au/bigdigits.html> + * which is distributed under the Mozilla Public License, version 2.0. + * + * The original code and modifications made to it are subject to the terms and + * conditions of the Mozilla Public License, version 2.0. A copy of the + * MPL license can be obtained at + * https://www.mozilla.org/en-US/MPL/2.0/. + * + * Changes and additions to the original code are as follows: + * - Copied some functions of the BigDigit library into this file, to convert strings read from files to BigDigit type numbers. + * + * Contributors: + * - Cedric Zwahlen cedric.zwahlen@bfh.ch + * + * Please note that this software is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Mozilla Public License, version 2.0, for the specific language + * governing permissions and limitations under the License. + */ + +#include "gpuv.h" + +#define BITS_PER_DIGIT 32 + +#define MAX_ALLOC_SIZE 256 + +#define BYTES_PER_DIGIT (BITS_PER_DIGIT / 8) + +typedef uint32_t DIGIT_T; // for gpu might need to be half? is that half? + +size_t mpSizeof(const DIGIT_T a[], size_t ndigits) +{ + while(ndigits--) + { + if (a[ndigits] != 0) + return (++ndigits); + } + return 0; +} + +volatile DIGIT_T mpSetZero(volatile DIGIT_T a[], size_t ndigits) +{ /* Sets a = 0 */ + + /* Prevent optimiser ignoring this */ + volatile DIGIT_T optdummy; + volatile DIGIT_T *p = a; + + while (ndigits--) + a[ndigits] = 0; + + optdummy = *p; + return optdummy; +} + +size_t uiceil(double x) +/* Returns ceil(x) as a non-negative integer or 0 if x < 0 */ +{ + size_t c; + + if (x < 0) return 0; + c = (size_t)x; + if ((x - c) > 0.0) + c++; + + return c; +} + +volatile uint8_t zeroise_bytes(volatile void *v, size_t n) +{ /* Zeroise byte array b and make sure optimiser does not ignore this */ + volatile uint8_t optdummy; + volatile uint8_t *b = (uint8_t*)v; + while(n--) + b[n] = 0; + optdummy = *b; + return optdummy; +} + +size_t mpConvFromOctets(DIGIT_T a[], size_t ndigits, const unsigned char *c, size_t nbytes) +/* Converts nbytes octets into big digit a of max size ndigits + Returns actual number of digits set (may be larger than mpSizeof) +*/ +{ + size_t i; + int j, k; + DIGIT_T t; + + mpSetZero(a, ndigits); + //memset(a, 0, ndigits); + + /* Read in octets, least significant first */ + /* i counts into big_d, j along c, and k is # bits to shift */ + for (i = 0, j = (int)nbytes - 1; i < ndigits && j >= 0; i++) + { + t = 0; + for (k = 0; j >= 0 && k < BITS_PER_DIGIT; j--, k += 8) + t |= ((DIGIT_T)c[j]) << k; + a[i] = t; + } + + return i; +} + +size_t mpConvFromHex(DIGIT_T a[], size_t ndigits, const char *s) +/* Convert a string in hexadecimal format to a big digit. + Return actual number of digits set (may be larger than mpSizeof). + Just ignores invalid characters in s. +*/ +{ + + uint8_t newdigits[MAX_ALLOC_SIZE*2]; + + size_t newlen; + size_t n; + unsigned long t; + size_t i, j; + + mpSetZero(a, ndigits); + //memset(&a, 0, ndigits); + + /* Create some temp storage for int values */ + n = strlen(s); + if (0 == n) return 0; + newlen = uiceil(n * 0.5); /* log(16)/log(256)=0.5 */ + //ALLOC_BYTES(newdigits, newlen); + memset(&newdigits, 0, newlen); + + /* Work through zero-terminated string */ + for (i = 0; s[i]; i++) + { + t = s[i]; + if ((t >= '0') && (t <= '9')) t = (t - '0'); + else if ((t >= 'a') && (t <= 'f')) t = (t - 'a' + 10); + else if ((t >= 'A') && (t <= 'F')) t = (t - 'A' + 10); + else continue; + for (j = newlen; j > 0; j--) + { + t += (unsigned long)newdigits[j-1] << 4; + newdigits[j-1] = (unsigned char)(t & 0xFF); + t >>= 8; + } + } + + /* Convert bytes to big digits */ + n = mpConvFromOctets(a, ndigits, newdigits, newlen); + + memset(&newdigits, 0, newlen); + + return n; +} + +// MARK: OPENCL CODE + +void opencl_pairs_from_files(void *bases, unsigned long *b_len, + void *exponents, unsigned long *e_len, + void *moduli, unsigned long *m_len, + void *signatures, unsigned long *s_len, + unsigned long *pks, + unsigned long *n + ) { + + FILE *pk; + FILE *ms; + + pk = fopen("lib-gpu-generate/publickey.txt", "r"); + ms = fopen("lib-gpu-generate/msgsig.txt", "r"); + + if (pk == NULL || ms == NULL) { + printf("Auxiliary files not found."); + abort(); + } + + fseek (ms, 0, SEEK_END); + long ms_l = ftell(ms); + fseek (ms, 0, SEEK_SET); + char *ms_ptr = malloc(ms_l); + char *ms_ptr_rest = malloc(ms_l); + if (ms_ptr || ms_ptr_rest) + { + fread (ms_ptr, 1, ms_l, ms); + memcpy(ms_ptr_rest, ms_ptr, ms_l); + } + fclose (ms); + + fseek (pk, 0, SEEK_END); + long pk_l = ftell(pk); + fseek (pk, 0, SEEK_SET); + char *pk_ptr = malloc(pk_l); + char *pk_ptr_rest = malloc(pk_l); + if (pk_ptr && pk_ptr_rest) + { + fread (pk_ptr, 1, pk_l, pk); + memcpy(pk_ptr_rest, pk_ptr, pk_l); + } + fclose (pk); + + int i = 0; + int j = 0; + + DIGIT_T *bases_t = bases; + DIGIT_T *exponents_t = exponents; + DIGIT_T *moduli_t = moduli; + DIGIT_T *signatures_t = signatures; + + int sz = 2048 / sizeof(DIGIT_T); + + char* message = strtok_r(ms_ptr, "\n", &ms_ptr_rest); + char* signature = strtok_r(0, "\n", &ms_ptr_rest); + char* modulus = strtok_r(pk_ptr, "\n", &pk_ptr_rest); + char* exponent = strtok_r(0, "\n", &pk_ptr_rest); + char* offs = strtok_r(0, "\n", &pk_ptr_rest); + + while (modulus != NULL && exponent != NULL && offs != NULL) { + + pks[i] = atoi(offs); + + unsigned long n_buf_len = strlen(modulus); + unsigned long e_buf_len = strlen(exponent); + + DIGIT_T exponent_dgt [sz*2]; + DIGIT_T modulus_dgt [sz*2]; + + mpSetZero(exponent_dgt, sz*2); + mpSetZero(modulus_dgt, sz*2); + + mpConvFromHex(exponent_dgt, e_buf_len, exponent); + mpConvFromHex(modulus_dgt, n_buf_len, modulus); + + unsigned long max_len = 64; // hardcoded for 2048 bit RSA + + e_len[i] = (i == 0 ? 0 : e_len[i - 1]) + mpSizeof(exponent_dgt, sz*2); + m_len[i] = (i == 0 ? 0 : m_len[i - 1]) + max_len; + + memcpy(&moduli_t[i == 0 ? 0 : (m_len[i - 1])], modulus_dgt, ( m_len[i] - (i == 0 ? 0 : m_len[i - 1]) ) * sizeof(DIGIT_T)); + memcpy(&exponents_t[i == 0 ? 0 : (e_len[i - 1])], exponent_dgt, ( e_len[i] - (i == 0 ? 0 : e_len[i - 1]) ) * sizeof(DIGIT_T)); + + + modulus = strtok_r(0, "\n", &pk_ptr_rest); + exponent = strtok_r(0, "\n", &pk_ptr_rest); + offs = strtok_r(0, "\n", &pk_ptr_rest); + + + i++; + } + + + + while (message != NULL && signature != NULL) { + + unsigned long m_buf_len = strlen(message); + unsigned long s_buf_len = strlen(signature); + + DIGIT_T base_dgt [sz*2]; // temp storage, large enough + DIGIT_T signature_dgt [sz*2]; + + mpSetZero(base_dgt, sz*2); + mpSetZero(signature_dgt, sz*2); + + mpConvFromHex(base_dgt, m_buf_len, message); + mpConvFromHex(signature_dgt, s_buf_len, signature); + + unsigned long max_len = 64; // the maximum of DIGIT_T types we need + + b_len[j] = (j == 0 ? 0 : b_len[j - 1]) + max_len; + s_len[j] = (j == 0 ? 0 : s_len[j - 1]) + max_len; + + memcpy(&bases_t[j == 0 ? 0 : (b_len[j - 1])], base_dgt, ( b_len[j] - (j == 0 ? 0 : b_len[j - 1]) ) * sizeof(DIGIT_T)); + memcpy(&signatures_t[j == 0 ? 0 : (s_len[j - 1])], signature_dgt, ( s_len[j] - (j == 0 ? 0 : s_len[j - 1]) ) * sizeof(DIGIT_T)); + + message = strtok_r(0, "\n",&ms_ptr_rest); + signature = strtok_r(0, "\n",&ms_ptr_rest); + + j++; + + } + + *n = j; + +} + +int opencl_prepare(struct gpu_info *info, struct gpu_state *state, + void *bases, unsigned long *b_len, + void *exponents, unsigned long *e_len, + void *moduli, unsigned long *m_len, + void *signatures, unsigned long *s_len, + const unsigned long *pks, + const unsigned long n) { + + int err; // error code returned from api calls + + unsigned long pk = 0; + + while (1) { + if (pks[pk] + 1 >= n) + break; + pk++; + } + + + state->mod_mem = clCreateBuffer(info->context, CL_MEM_READ_WRITE, sizeof(DIGIT_T) * m_len[pk], NULL, NULL); + state->exp_mem = clCreateBuffer(info->context, CL_MEM_READ_WRITE, sizeof(DIGIT_T) * e_len[pk], NULL, NULL); + + state->sig_mem = clCreateBuffer(info->context, CL_MEM_READ_WRITE, sizeof(DIGIT_T) * s_len[n-1], NULL, NULL); + state->comp_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, sizeof(DIGIT_T) * b_len[n-1], NULL, NULL); // the base, to compare whether we get the same signature + + state->mod_len = clCreateBuffer(info->context, CL_MEM_READ_ONLY, sizeof(unsigned long) * (pk + 1), NULL, NULL); + state->exp_len = clCreateBuffer(info->context, CL_MEM_READ_ONLY, sizeof(unsigned long) * (pk + 1), NULL, NULL); + + state->sig_len = clCreateBuffer(info->context, CL_MEM_READ_ONLY, sizeof(unsigned long) * n, NULL, NULL); + state->comp_len = clCreateBuffer(info->context, CL_MEM_READ_ONLY, sizeof(unsigned long) * n, NULL, NULL); + + state->pks_indices = clCreateBuffer(info->context, CL_MEM_READ_WRITE, sizeof(unsigned long) * (pk + 1),NULL, NULL); + + + + + if (!state->sig_mem || !state->exp_mem || !state->mod_mem || !state->comp_mem || !state->invalid ) + { + printf("Error: Failed to allocate device memory!\n"); + exit(1); + } + + // Write our data set into the input array in device memory + // + err = clEnqueueWriteBuffer(info->commands, state->sig_mem, CL_TRUE, 0, sizeof(DIGIT_T) * s_len[n-1], signatures, 0, NULL, NULL); + err |= clEnqueueWriteBuffer(info->commands, state->sig_len, CL_TRUE, 0,sizeof(unsigned long) * n, s_len, 0, NULL, NULL); + + err |= clEnqueueWriteBuffer(info->commands, state->exp_mem, CL_TRUE, 0,sizeof(DIGIT_T) * e_len[pk], exponents, 0, NULL, NULL); + err |= clEnqueueWriteBuffer(info->commands, state->exp_len, CL_TRUE, 0,sizeof(unsigned long) * (pk + 1), e_len, 0, NULL, NULL); + err |= clEnqueueWriteBuffer(info->commands, state->mod_mem, CL_TRUE, 0, sizeof(DIGIT_T) * m_len[pk], moduli, 0, NULL, NULL); + err |= clEnqueueWriteBuffer(info->commands, state->mod_len, CL_TRUE, 0,sizeof(unsigned long) * (pk + 1), m_len, 0, NULL, NULL); + + err |= clEnqueueWriteBuffer(info->commands, state->comp_mem, CL_TRUE, 0, sizeof(DIGIT_T) * b_len[n-1], bases, 0, NULL, NULL); + err |= clEnqueueWriteBuffer(info->commands, state->comp_len, CL_TRUE, 0,sizeof(unsigned long) * n, b_len, 0, NULL, NULL); + + err |= clEnqueueWriteBuffer(info->commands, state->pks_indices, CL_TRUE, 0, sizeof(unsigned long) * (pk + 1), pks, 0, NULL, NULL); + if (err != CL_SUCCESS) + { + printf("Error: Failed to write to source array!\n"); + exit(1); + } + + // Set the arguments to our compute kernel + // + err = 0; + err = clSetKernelArg(info->kernel, 0, sizeof(cl_mem), &state->sig_mem); + err |= clSetKernelArg(info->kernel, 1, sizeof(cl_mem), &state->sig_len); + err |= clSetKernelArg(info->kernel, 2, sizeof(cl_mem), &state->exp_mem); + err |= clSetKernelArg(info->kernel, 3, sizeof(cl_mem), &state->exp_len); + err |= clSetKernelArg(info->kernel, 4, sizeof(cl_mem), &state->mod_mem); + err |= clSetKernelArg(info->kernel, 5, sizeof(cl_mem), &state->mod_len); + err |= clSetKernelArg(info->kernel, 6, sizeof(cl_mem), &state->comp_mem); + err |= clSetKernelArg(info->kernel, 7, sizeof(cl_mem), &state->comp_len); + err |= clSetKernelArg(info->kernel, 9, sizeof(cl_mem), &state->pks_indices); + err |= clSetKernelArg(info->kernel, 10, sizeof(unsigned long), &n); + + if (err != CL_SUCCESS) + { + printf("RSA-Error: Failed to set kernel arguments! %d\n", err); + exit(1); + } + + state->total = n; + + + return 0; + +} + +int opencl_exec_kernel(struct gpu_info *info, struct gpu_state *state) { + + size_t global; + int err; + + global = state->total; + + // measure from the first call to the kernel... + if (state->skip) { + state->skip = false; + clock_gettime(CLOCK_REALTIME, &state->t1); + } + + err = clEnqueueNDRangeKernel(info->commands, info->kernel, 1, NULL, &global, NULL, 0, NULL, NULL); + if (err) + { + printf("Error: Failed to execute kernel!\n"); + return EXIT_FAILURE; + } + + return 0; + +} + + +unsigned long opencl_results(struct gpu_info *info, struct gpu_state *state, bool timed) { + + if (state->skip) { + // reset skip in the kernel execution + return 0; + } + + int err; + + unsigned long failed_signatures = 0; + + + // Wait for the command commands to get serviced before reading back results + // + err = clFinish(info->commands); + if (err != CL_SUCCESS) + { + printf("Error: Kernel failure! %d\n", err); + exit(1); + } + + // Read back the results from the device to verify the output + // + err = clEnqueueReadBuffer(info->commands, state->invalid, CL_TRUE, 0, sizeof(unsigned long), &failed_signatures, 0, NULL, NULL ); + if (err != CL_SUCCESS) + { + printf("Error: Failed to read output array! %d\n", err); + exit(1); + } + + + if (timed) { + // stop measuring after the last command has been read + clock_gettime(CLOCK_REALTIME, &state->t2); + + printf("GPU verification took %ld.%06ld s\n", ( state->t2.tv_nsec < state->t1.tv_nsec ? state->t2.tv_sec - (state->t1.tv_sec + 1) : state->t2.tv_sec - state->t1.tv_sec ), ( state->t2.tv_nsec < state->t1.tv_nsec ? ((999999999 - state->t1.tv_nsec) + state->t2.tv_nsec) : (state->t2.tv_nsec - state->t1.tv_nsec) ) / 1000); + + } + + + return state->total - failed_signatures; + +} + +void opencl_cleanup(struct gpu_info *info) { + + clReleaseProgram(info->program); + clReleaseKernel(info->kernel); + clReleaseCommandQueue(info->commands); + clReleaseContext(info->context); + +} + +void opencl_release(struct gpu_state *state) { + + clReleaseMemObject(state->comp_mem); + clReleaseMemObject(state->exp_mem); + clReleaseMemObject(state->mod_mem); + clReleaseMemObject(state->sig_mem); + + clReleaseMemObject(state->comp_len); + clReleaseMemObject(state->exp_len); + clReleaseMemObject(state->mod_len); + clReleaseMemObject(state->sig_len); + + clReleaseMemObject(state->pks_indices); + + + +} + +// MARK: for library + +void gpuv_init(struct gpu_info *info, struct gpu_state *state) { + + info->platform = select_platform(0, false); + info->device_id = select_device (info->platform); + info->context = create_compute_context (info->device_id); + info->commands = create_command_queue (info->device_id, info->context); + info->program = compile_program (info->device_id, info->context, "gpuv.cl"); + info->kernel = create_kernel (info->program, "several"); + + state->result = 0; + state->total = 0; + state->skip = true; + + + int err = 0; + + int failed_signatures = 0; + + state->invalid = clCreateBuffer(info->context, CL_MEM_READ_WRITE, sizeof(unsigned long) ,NULL, NULL); + + err |= clEnqueueWriteBuffer(info->commands, state->invalid, CL_TRUE, 0, sizeof(unsigned long), &failed_signatures, 0, NULL, NULL); + if (err != CL_SUCCESS) + { + printf("Error: Failed to write to source array!\n"); + exit(1); + } + + err |= clSetKernelArg(info->kernel, 8, sizeof(cl_mem), &state->invalid); + if (err != CL_SUCCESS) + { + printf("Error: Failed to set kernel arguments! %d\n", err); + exit(1); + } +} + +void gpuv_execute(struct gpu_info *info, + struct gpu_state *state, + void *bases, unsigned long *b_len, + void *exponents, unsigned long *e_len, + void *moduli, unsigned long *m_len, + void *signatures, unsigned long *s_len, + const unsigned long *pks, + const unsigned long n) { + + opencl_prepare(info, state, bases, b_len, + exponents, e_len, + moduli, m_len, + signatures, s_len, + pks, n); // prepares the next batch of signatures on CPU, naturally blocks until it's finished + state->result += opencl_results(info, state, false); // waits for kernel, if it is not ready yet + opencl_exec_kernel(info,state); // start kernel (returns immediately) + opencl_release(state); // release buffers + + + +} + +unsigned long gpuv_finish(struct gpu_info *info, struct gpu_state *state) { + + state->result += opencl_results(info,state,true); + + opencl_cleanup(info); + clReleaseMemObject(state->invalid); + + unsigned long res = state->result; + + state->result = 0; // reset result + + return res; + +} + +// MARK: function to know how much storage the gpu has to split data + + + +//size_t retSize_3 = sizeof(cl_ulong); +//cl_ulong max_stor = 0; +//clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, 0, NULL, &retSize_3); +//clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, retSize_3, &max_stor, &retSize_3); + +//printf("max memory: %llu\n", max_stor); + + +int gpuv_test(void) { + + unsigned long pairs = gpuv_estimate_pairs(); // returns an estimation of pairs + + unsigned long digit_sz = 64 * pairs * sizeof(DIGIT_T); + unsigned long arr_sz = pairs * sizeof(unsigned long); + + DIGIT_T *q = malloc(digit_sz); + DIGIT_T *r = malloc(digit_sz); + DIGIT_T *s = malloc(digit_sz); + DIGIT_T *t = malloc(digit_sz); + + memset(q, 0, digit_sz); + memset(r, 0, digit_sz); + memset(s, 0, digit_sz); + memset(t, 0, digit_sz); + + unsigned long *u = malloc(arr_sz); + unsigned long *v = malloc(arr_sz); + unsigned long *w = malloc(arr_sz); + unsigned long *x = malloc(arr_sz); + + unsigned long *pks = malloc(arr_sz); + + memset(u, 0, arr_sz); + memset(v, 0, arr_sz); + memset(w, 0, arr_sz); + memset(x, 0, arr_sz); + memset(pks, 0, arr_sz); + + opencl_pairs_from_files(q, u, + r, v, + s, w, + t, x, pks, &pairs); // this returns the actual amount of pairs + + printf("VERIFYING %lu SIGNATURES...\n", pairs); + + struct gpu_info info; + struct gpu_state state; + + gpuv_init(&info, &state); + + gpuv_execute(&info, &state, q, u, r, v, s, w, t, x, pks, pairs); + + unsigned long res = gpuv_finish(&info, &state); + + if (res == pairs) { + printf("VERIFICATION RESULT: %lu - OK\n\n",res); + } else { + printf("VERIFICATION RESULT: %lu - NOT OK\n\n",res); + } + + + free(q); + free(r); + free(s); + free(t); + + free(u); + free(v); + free(w); + free(x); + + free(pks); + + return 0; +} + diff --git a/source/gpuv.cl b/source/gpuv.cl @@ -0,0 +1 @@ +../xcode/gpuv.cl +\ No newline at end of file diff --git a/source/gpuv.h b/source/gpuv.h @@ -0,0 +1,30 @@ +// +// gpuv.h +// lib-gpu-verify +// +// Created by Cedric Zwahlen on 28.09.2023. +// + +#ifndef gpuv_h +#define gpuv_h + +#include "util.h" +#include <stdint.h> + +int gpuv_test(void); + + +void gpuv_init(struct gpu_info *info, struct gpu_state *state); + +void gpuv_execute(struct gpu_info *info, + struct gpu_state *state, + void *bases, unsigned long *b_len, + void *exponents, unsigned long *e_len, + void *moduli, unsigned long *m_len, + void *signatures, unsigned long *s_len, + const unsigned long *pks, + const unsigned long n); + +unsigned long gpuv_finish(struct gpu_info *info, struct gpu_state *state); + +#endif /* gpuv_h */ diff --git a/source/lib-gpu-verify.c b/source/lib-gpu-verify.c @@ -6,11 +6,10 @@ // -#include "rsa-test.h" +#include "gpuv.h" -#include "reference-test.h" -#include "montgomery-test.h" -#include "montmodmult.h" +#include "gpuv-ref.h" +#include "gpuv-montg.h" int main(int argc, char** argv) @@ -18,13 +17,13 @@ int main(int argc, char** argv) - setup_gcry(); + gpuv_prepare_gcry(); - mont_modmult_tests(); + gpuv_test_montg(); - rsa_tests(); + gpuv_test(); - reference_tests(); + gpuv_test_ref(); diff --git a/source/montgomery-test.c b/source/montgomery-test.c @@ -1,375 +0,0 @@ -// -// montgomery-test.c -// lib-gpu-verify -// -// Created by Cedric Zwahlen on 28.11.2023. -// - -#include "montgomery-test.h" - -#include <gmp.h> // has been adapted - -void mont_pairs_from_files(void *bases, unsigned long *b_off, - void *exponents, unsigned long *e_off, - void *moduli, unsigned long *m_off, - void *signatures, unsigned long *s_off, - unsigned long *pks, - unsigned long *n) { - - char *bases_t = bases; - char *exponents_t = exponents; - char *moduli_t = moduli; - char *signatures_t = signatures; - - FILE *pkfile; - FILE *msfile; - - pkfile = fopen("lib-gpu-generate/publickey.txt", "r"); - msfile = fopen("lib-gpu-generate/msgsig.txt", "r"); - - if (pkfile == NULL || msfile == NULL) { - printf("Auxiliary files not found."); - abort(); - } - - - int i = 0; - - unsigned long b_offset = 0; - unsigned long e_offset = 0; - unsigned long m_offset = 0; - unsigned long s_offset = 0; - - while (1) { - - char n_buf[2048]; // need to be 0 - char e_buf[2048]; - - memset(n_buf, 0, 2048); - memset(e_buf, 0, 2048); - - unsigned long lastIndex = 0; - - if (fscanf(pkfile, "%s %s %lu", n_buf,e_buf, &lastIndex) == -1) - break; - - mpz_t n, e; - - mpz_init_set_str(n,n_buf,16); - mpz_init_set_str(e,e_buf,16); - - memcpy(&moduli_t[m_offset], n, sizeof(mpz_t)); - memcpy(&exponents_t[e_offset], e, sizeof(mpz_t)); - - m_off[i] = m_offset; - e_off[i] = e_offset; - - m_offset += sizeof(mpz_t); - e_offset += sizeof(mpz_t); - - pks[i] = lastIndex; - - i++; - - // break; // testing with just one - } - - int j = 0; - - while (1) { - - char m_buf[2048]; // temp storage, large enough - char s_buf[2048]; - - memset(m_buf, 0, 2048); - memset(s_buf, 0, 2048); - - if (fscanf(msfile, "%s %s", m_buf,s_buf) == -1) - break; - - mpz_t m, s; - - mpz_init_set_str(m,m_buf,16); - mpz_init_set_str(s,s_buf,16); - - memcpy(&bases_t[b_offset], m, sizeof(mpz_t)); - memcpy(&signatures_t[s_offset], s, sizeof(mpz_t)); - - b_off[j] = b_offset; - s_off[j] = s_offset; - - b_offset += sizeof(mpz_t); - s_offset += sizeof(mpz_t); - - j++; - - // break; // testing with just one - - } - - fclose(pkfile); - fclose(msfile); - - *n = j; -} - - - - -int mont_verify_pairs_with_opencl(void *bases, unsigned long *b_off, - void *exponents, unsigned long *e_off, - void *moduli, unsigned long *m_off, - void *signatures, unsigned long *s_off, - const unsigned long *pks, - const unsigned long n, - unsigned long *result) { - - int err; // error code returned from api calls - - size_t global; // global domain size for our calculation - size_t local; // local domain size for our calculation - - // MARK: this part, I can cache for the library - - cl_platform_id platform = select_platform(0, false); - cl_device_id device_id = select_device (platform); - cl_context context = create_compute_context (device_id); - cl_command_queue commands = create_command_queue (device_id, context); - cl_program program = compile_program (device_id, context, "montgomery.cl"); - cl_kernel kernel = create_kernel (program, "montgomery"); - - // Create the input and output arrays in device memory for our calculation - - cl_mem sig_mem; - cl_mem exp_mem; - cl_mem mod_mem; - cl_mem comp_mem; - - cl_mem sig_len; - cl_mem exp_len; - cl_mem mod_len; - cl_mem comp_len; - - cl_mem pks_indices; - - cl_mem valid; // needs to be a buffer because it goes out - - unsigned long signature_is_valid = 0; - - unsigned long pk = 0; - - while (1) { - if (pks[pk] + 1 == n) - break; - pk++; - } - - unsigned long len = sizeof(mpz_t); - - - mod_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, len * (pk + 1), NULL, NULL); - exp_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, len * (pk + 1), NULL, NULL); - - sig_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, len * n, NULL, NULL); - comp_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, len * n, NULL, NULL); // the base, to compare whether we get the same signature - - mod_len = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned long) * (pk + 1), NULL, NULL); - exp_len = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned long) * (pk + 1), NULL, NULL); - - sig_len = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned long) * n, NULL, NULL); - comp_len = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned long) * n, NULL, NULL); - - pks_indices = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(unsigned long) * (pk + 1),NULL, NULL); - valid = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(unsigned long) ,NULL, NULL); - - - - if (!sig_mem || !exp_mem || !mod_mem || !comp_mem || !valid ) - { - printf("Error: Failed to allocate device memory!\n"); - exit(1); - } - - // Write our data set into the input array in device memory - // - err = clEnqueueWriteBuffer(commands, sig_mem, CL_TRUE, 0, len * n, signatures, 0, NULL, NULL); - err |= clEnqueueWriteBuffer(commands, sig_len, CL_TRUE, 0,sizeof(unsigned long) * n, s_off, 0, NULL, NULL); - - err |= clEnqueueWriteBuffer(commands, exp_mem, CL_TRUE, 0, len * (pk + 1), exponents, 0, NULL, NULL); - err |= clEnqueueWriteBuffer(commands, exp_len, CL_TRUE, 0, sizeof(unsigned long) * (pk + 1), e_off, 0, NULL, NULL); - - err |= clEnqueueWriteBuffer(commands, mod_mem, CL_TRUE, 0, len * (pk + 1), moduli, 0, NULL, NULL); - err |= clEnqueueWriteBuffer(commands, mod_len, CL_TRUE, 0, sizeof(unsigned long) * (pk + 1), m_off, 0, NULL, NULL); - - err |= clEnqueueWriteBuffer(commands, comp_mem, CL_TRUE, 0, len * n, bases, 0, NULL, NULL); - err |= clEnqueueWriteBuffer(commands, comp_len, CL_TRUE, 0,sizeof(unsigned long) * n, b_off, 0, NULL, NULL); - - err |= clEnqueueWriteBuffer(commands, pks_indices, CL_TRUE, 0, sizeof(unsigned long) * (pk + 1), pks, 0, NULL, NULL); - err |= clEnqueueWriteBuffer(commands, valid, CL_TRUE, 0, sizeof(unsigned long), &signature_is_valid, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - printf("Error: Failed to write to source array!\n"); - exit(1); - } - - // Set the arguments to our compute kernel - // - err = 0; - err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &sig_mem); - err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &sig_len); - err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &exp_mem); - err |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &exp_len); - err |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &mod_mem); - err |= clSetKernelArg(kernel, 5, sizeof(cl_mem), &mod_len); - err |= clSetKernelArg(kernel, 6, sizeof(cl_mem), &comp_mem); - err |= clSetKernelArg(kernel, 7, sizeof(cl_mem), &comp_len); - err |= clSetKernelArg(kernel, 8, sizeof(cl_mem), &valid); - err |= clSetKernelArg(kernel, 9, sizeof(cl_mem), &pks_indices); - err |= clSetKernelArg(kernel, 10, sizeof(unsigned long), &n); - - //err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count); - if (err != CL_SUCCESS) - { - printf("Error: Failed to set kernel arguments! %d\n", err); - exit(1); - } - - // Get the maximum work group size for executing the kernel on the device - // - err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL); - if (err != CL_SUCCESS) - { - printf("Error: Failed to retrieve kernel work group info! %d\n", err); - exit(1); - } - - // Execute the kernel over the entire range of our 1d input data set - // using the maximum number of work group items for this device - // - - global = n; - local = 1; - - err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, NULL); - if (err) - { - printf("Error: Failed to execute kernel!\n"); - return EXIT_FAILURE; - } - - printf("KERNEL IS EXECUTING...\n"); - - struct timespec t1, t2; - - clock_gettime(CLOCK_REALTIME, &t1); - - - // Wait for the command commands to get serviced before reading back results - // - clFinish(commands); - - // Read back the results from the device to verify the output - // - //err = clEnqueueReadBuffer( commands, res_mem, CL_TRUE, 0, res_len, res_buf, 0, NULL, NULL ); - err = clEnqueueReadBuffer( commands, valid, CL_TRUE, 0, sizeof(unsigned long), &signature_is_valid, 0, NULL, NULL ); - if (err != CL_SUCCESS) - { - printf("Error: Failed to read output array! %d\n", err); - exit(1); - } - - clock_gettime(CLOCK_REALTIME, &t2); - - printf("GPU verification (Montgomery) took %ld.%06ld s\n", ( t2.tv_nsec < t1.tv_nsec ? t2.tv_sec - (t1.tv_sec + 1) : t2.tv_sec - t1.tv_sec ), ( t2.tv_nsec < t1.tv_nsec ? ((999999999 - t1.tv_nsec) + t2.tv_nsec) : (t2.tv_nsec - t1.tv_nsec) ) / 1000); - - *result = n - signature_is_valid; - - // Shutdown and cleanup - // - clReleaseMemObject(comp_mem); - clReleaseMemObject(exp_mem); - clReleaseMemObject(mod_mem); - clReleaseMemObject(sig_mem); - - clReleaseMemObject(comp_len); - clReleaseMemObject(exp_len); - clReleaseMemObject(mod_len); - clReleaseMemObject(sig_len); - - clReleaseProgram(program); - clReleaseKernel(kernel); - clReleaseCommandQueue(commands); - clReleaseContext(context); - - return 0; - -} - - -int mont_rsa_tests(void) { - - unsigned long pairs = number_of_pairs(); - - long str_sz = sizeof(mpz_t) * pairs; - - void *b = malloc(str_sz); - void *e = malloc(str_sz); - void *m = malloc(str_sz); - void *s = malloc(str_sz); - - unsigned long *b_off = malloc(pairs * sizeof(unsigned long)); - unsigned long *e_off = malloc(pairs * sizeof(unsigned long)); - unsigned long *m_off = malloc(pairs * sizeof(unsigned long)); - unsigned long *s_off = malloc(pairs * sizeof(unsigned long)); - - memset(b, 0, str_sz); - memset(e, 0, str_sz); - memset(m, 0, str_sz); - memset(s, 0, str_sz); - - memset(b_off, 0, pairs * sizeof(unsigned long)); - memset(e_off, 0, pairs * sizeof(unsigned long)); - memset(m_off, 0, pairs * sizeof(unsigned long)); - memset(s_off, 0, pairs * sizeof(unsigned long)); - - unsigned long *pks = malloc(pairs * sizeof(unsigned long)); - - memset(pks, 0, pairs * sizeof(unsigned long)); - - - - mont_pairs_from_files(b, b_off, e, e_off, m, m_off, s, s_off, pks, - &pairs); - - - printf("VERIFYING %lu SIGNATURES...\n", pairs); - - unsigned long result = 0; - - - mont_verify_pairs_with_opencl(s, s_off, e, e_off, m, m_off, b, b_off, - pks, pairs, &result); - - - - - if (result == pairs) { - printf("VERIFICATION RESULT: %lu - OK\n\n",result); - } else { - printf("VERIFICATION RESULT: %lu - NOT OK\n\n",result); - } - - - free(b); - free(e); - free(m); - free(s); - - free(b_off); - free(e_off); - free(m_off); - free(s_off); - - // return 1 for success, 0 for failure - return result == pairs ? 1 : 0; -} diff --git a/source/montgomery-test.h b/source/montgomery-test.h @@ -1,15 +0,0 @@ -// -// montgomery-test.h -// lib-gpu-verify -// -// Created by Cedric Zwahlen on 28.11.2023. -// - -#ifndef montgomery_test_h -#define montgomery_test_h - -#include "util.h" - -int mont_rsa_tests(void); - -#endif /* montgomery_test_h */ diff --git a/source/montgomery.c b/source/montgomery.c @@ -1,431 +0,0 @@ -// -// montgomery.c -// lib-gpu-verify -// -// Created by Cedric Zwahlen on 14.11.2023. -// - -#include "montgomery.h" -#include <math.h> - - - -#define BITS_PER_DIGIT (sizeof(gpu_register) * 8) -#define HIBITMASK 0x8000000000000000 -#define MAX_DIGIT 0xFFFFFFFFFFFFFFFF - -#define R 32 - -int mult(gpu_register p[2], gpu_register x, gpu_register y) -{ - - /* Use a 64-bit temp for product */ - //ulong t = (ulong)x * (ulong)y; - /* then split into two parts */ - - __int128_t t = (__int128_t)x * (__int128_t)y; - - p[1] = (gpu_register)(t >> BITS_PER_DIGIT); - p[0] = (gpu_register)t; - - return 0; -} - - -int multiply( gpu_register *w, gpu_register *u, gpu_register *v, size_t ndigits) -{ - /* Computes product w = u * v - where u, v are multiprecision integers of ndigits each - and w is a multiprecision integer of 2*ndigits - - Ref: Knuth Vol 2 Ch 4.3.1 p 268 Algorithm M. - */ - - gpu_register k, t[2]; - size_t i, j, m, n; - - //assert(w != u && w != v); - - m = n = ndigits; - - /* Step M1. Initialise */ - for (i = 0; i < 2 * m; i++) - w[i] = 0; - - for (j = 0; j < n; j++) - { - /* Step M2. Zero multiplier? */ - if (v[j] == 0) - { - w[j + m] = 0; - } - else - { - /* Step M3. Initialise i */ - k = 0; - for (i = 0; i < m; i++) - { - /* Step M4. Multiply and add */ - /* t = u_i * v_j + w_(i+j) + k */ - mult(t, u[i], v[j]); - - t[0] += k; - if (t[0] < k) - t[1]++; - t[0] += w[i+j]; - if (t[0] < w[i+j]) - t[1]++; - - w[i+j] = t[0]; - k = t[1]; - } - /* Step M5. Loop on i, set w_(j+m) = k */ - w[j+m] = k; - } - } /* Step M6. Loop on j */ - - return 0; -} - - -int square( gpu_register *w, gpu_register *x, size_t ndigits) -/* New in Version 2.0 */ -{ - /* Computes square w = x * x - where x is a multiprecision integer of ndigits - and w is a multiprecision integer of 2*ndigits - - Ref: Menezes p596 Algorithm 14.16 with errata. - */ - - gpu_register k, p[2], u[2], cbit, carry; - size_t i, j, t, i2, cpos; - - t = ndigits; - - /* 1. For i from 0 to (2t-1) do: w_i = 0 */ - i2 = t << 1; - for (i = 0; i < i2; i++) - w[i] = 0; - - carry = 0; - cpos = i2-1; - /* 2. For i from 0 to (t-1) do: */ - for (i = 0; i < t; i++) - { - /* 2.1 (uv) = w_2i + x_i * x_i, w_2i = v, c = u - Careful, w_2i may be double-prec - */ - i2 = i << 1; /* 2*i */ - mult(p, x[i], x[i]); - p[0] += w[i2]; - if (p[0] < w[i2]) - p[1]++; - k = 0; /* p[1] < b, so no overflow here */ - if (i2 == cpos && carry) - { - p[1] += carry; - if (p[1] < carry) - k++; - carry = 0; - } - w[i2] = p[0]; - u[0] = p[1]; - u[1] = k; - - /* 2.2 for j from (i+1) to (t-1) do: - (uv) = w_{i+j} + 2x_j * x_i + c, - w_{i+j} = v, c = u, - u is double-prec - w_{i+j} is dbl if [i+j] == cpos - */ - k = 0; - for (j = i+1; j < t; j++) - { - /* p = x_j * x_i */ - mult(p, x[j], x[i]); - /* p = 2p <=> p <<= 1 */ - cbit = (p[0] & HIBITMASK) != 0; - k = (p[1] & HIBITMASK) != 0; - p[0] <<= 1; - p[1] <<= 1; - p[1] |= cbit; - /* p = p + c */ - p[0] += u[0]; - if (p[0] < u[0]) - { - p[1]++; - if (p[1] == 0) - k++; - } - p[1] += u[1]; - if (p[1] < u[1]) - k++; - /* p = p + w_{i+j} */ - p[0] += w[i+j]; - if (p[0] < w[i+j]) - { - p[1]++; - if (p[1] == 0) - k++; - } - if ((i+j) == cpos && carry) - { /* catch overflow from last round */ - p[1] += carry; - if (p[1] < carry) - k++; - carry = 0; - } - /* w_{i+j} = v, c = u */ - w[i+j] = p[0]; - u[0] = p[1]; - u[1] = k; - } - /* 2.3 w_{i+t} = u */ - w[i+t] = u[0]; - /* remember overflow in w_{i+t} */ - carry = u[1]; - cpos = i+t; - } - - /* (NB original step 3 deleted in Menezes errata) */ - - /* Return w */ - - return 0; -} - -gpu_register add( gpu_register *w, gpu_register *u, gpu_register *v, size_t ndigits) -{ - /* Calculates w = u + v - where w, u, v are multiprecision integers of ndigits each - Returns carry if overflow. Carry = 0 or 1. - - Ref: Knuth Vol 2 Ch 4.3.1 p 266 Algorithm A. - */ - - gpu_register k; - size_t j; - - //assert(w != v); - - /* Step A1. Initialise */ - k = 0; - - for (j = 0; j < ndigits; j++) - { - /* Step A2. Add digits w_j = (u_j + v_j + k) - Set k = 1 if carry (overflow) occurs - */ - w[j] = u[j] + k; - if (w[j] < k) - k = 1; - else - k = 0; - - w[j] += v[j]; - if (w[j] < v[j]) - k++; - - } /* Step A3. Loop on j */ - - return k; /* w_n = k */ -} - -gpu_register subtract(gpu_register *w, gpu_register *u, gpu_register *v, size_t ndigits) -{ - /* Calculates w = u - v where u >= v - w, u, v are multiprecision integers of ndigits each - Returns 0 if OK, or 1 if v > u. - - Ref: Knuth Vol 2 Ch 4.3.1 p 267 Algorithm S. - */ - - gpu_register k; - size_t j; - - - - /* Step S1. Initialise */ - k = 0; - - for (j = 0; j < ndigits; j++) - { - /* Step S2. Subtract digits w_j = (u_j - v_j - k) - Set k = 1 if borrow occurs. - */ - w[j] = u[j] - k; - if (w[j] > MAX_DIGIT - k) - k = 1; - else - k = 0; - - w[j] -= v[j]; - if (w[j] > MAX_DIGIT - v[j]) - k++; - - } /* Step S3. Loop on j */ - - return k; /* Should be zero if u >= v */ -} - -void equal( gpu_register *a, gpu_register *b, size_t ndigits) -{ /* Sets a = b */ - size_t i; - - for (i = 0; i < ndigits; i++) - { - a[i] = b[i]; - } -} - - -void erase_all( gpu_register *a, size_t n) -{ - - for (int i = 0; i < n; i++) - { - a[i] = 0; - } - -} - -void shift_right(gpu_register *r, int n) { - - for (int i = 0; i < R+1; i++) { - - r[i] = r[i + n]; - //r[i + n] = 0; - - } - -} - -// 1 if r > l ; -1 if r < l; == 0 -int compare(gpu_register *r, gpu_register *l, int n) { - - int x = 0; - for (int i = n - 1; i >= 0; i--) { - x = r[i] > l[i]; - if (x) return 1; - x = r[i] < l[i]; - if (x) return -1; - } - return 0; -} - -int testbit(gpu_register e, int i) { - - return (e & (0x1 << (gpu_register)i) ) > 0 ? 1 : 0; - -} - -void montMul( gpu_register *ret, - gpu_register *a, gpu_register *b, - gpu_register *ni, gpu_register *n, - gpu_register *tmp_1, gpu_register *tmp_2, gpu_register *tmp_3 - ) { - - multiply(tmp_1,a,b,R); - multiply(tmp_2,tmp_1,ni,R); - multiply(tmp_3,tmp_2,n,R); - - add(tmp_2,tmp_1,tmp_3,R*2+1); // MARK: something gets lost in the carry - - shift_right(tmp_2, R); - - erase_all(tmp_3, R*2); - equal(tmp_3, n, R); - - if (compare(tmp_2, tmp_3, R+1) >= 0) { - subtract(ret, tmp_2, tmp_3, R+1); - } else { - equal(ret, tmp_2, R); - } - -} - -void montSqr( gpu_register *ret, - gpu_register *a, - gpu_register *ni, gpu_register *n, - gpu_register *tmp_1, gpu_register *tmp_2, gpu_register *tmp_3 - ) { - - square(tmp_1,a,R); - multiply(tmp_2,tmp_1,ni,R); - multiply(tmp_3,tmp_2,n,R); - - add(tmp_2,tmp_1,tmp_3,R*2+1); - - shift_right(tmp_2, R); - - erase_all(tmp_3, R*2); - equal(tmp_3, n, R); - - if (compare(tmp_2, tmp_3, R+1) >= 0) { - subtract(ret, tmp_2, tmp_3, R+1); - } else { - equal(ret, tmp_2, R); - } - - - -} - - - void mont( gpu_register *x, gpu_register *m, - gpu_register *res, gpu_register *n, // res is not needed, we write the result in x - gpu_register *ni, gpu_register *exp, - gpu_register *tmp_1, gpu_register *tmp_2, gpu_register *tmp_3, - gpu_register *pks//, unsigned long count // invalid is not needed either - ) -{ - - size_t i = 0; - - int offs = 0; // the size of one number - - int pk = 0; // das funktioniert so nicht – die globale id wird grösser, ohne dass noch weitere elemente in pks sind... - - -// while (1) { -// if (pks[pk] >= i) -// break; -// pk++; -// } - - - int k = ceil(log2((float)exp[pk] + (float)1)); - - - - for (int j = k - 1; j >= 0; j--) { - - montSqr(res, x, ni, n, tmp_1, tmp_2, tmp_3); - - if (testbit(exp[pk], j)) { - - equal(x, res, R); - erase_all(tmp_1,R * 2); - erase_all(tmp_2,R * 2); - erase_all(tmp_3,R * 2); - - montMul(res, x, m, ni, n, tmp_1, tmp_2, tmp_3); - - - } - - equal(x, res, R); - erase_all(tmp_1,R * 2); - erase_all(tmp_2,R * 2); - erase_all(tmp_3,R * 2); - // clear / reset the temps... - } - - equal(x, res, R); - erase_all(m,R); - m[0] = 1; // reuse m, to convert out of montgomery - montMul(res, x, m, ni, n, tmp_1, tmp_2, tmp_3); - - -} diff --git a/source/montgomery.cl b/source/montgomery.cl @@ -1 +0,0 @@ -../xcode/montgomery.cl -\ No newline at end of file diff --git a/source/montgomery.h b/source/montgomery.h @@ -1,27 +0,0 @@ -// -// montgomery.h -// lib-gpu-verify -// -// Created by Cedric Zwahlen on 14.11.2023. -// - -#ifndef montgomery_h -#define montgomery_h - -#include <stdio.h> - -#include <gmp.h> - -#include <assert.h> - -typedef u_int64_t gpu_register; - -void mont( gpu_register *x, gpu_register *m, - gpu_register *res, gpu_register *n, // res is not needed, we write the result in x - gpu_register *ni, gpu_register *exp, - gpu_register *tmp_1, gpu_register *tmp_2, gpu_register *tmp_3, - gpu_register *pks//, unsigned long count // invalid is not needed either -); - - -#endif /* montgomery_h */ diff --git a/source/montmodmult.c b/source/montmodmult.c @@ -1,662 +0,0 @@ -// -// montmodmult.c -// lib-gpu-verify -// -// Created by Cedric Zwahlen on 16.12.2023. -// - -#include "montmodmult.h" -#include "util.h" -#include <pthread.h> - -static unsigned long len_in_bytes = 0; - -#define ORDER -1 // I think we need to do this, because we want to write it in the 'wrong' way -#define END 0 - -#define BIT_LENGTH (2048) - -#define BITS 64 - -// sizes are always the same 32 units for all, except exp_buf -void montmodmult_pairs_from_files(void *x_buf, void *m_buf, - void *r_1_buf, - void *n_buf, void *ni_buf, - void *msg_buf, - void *exp_buf, - //void *mod_buf, - void *s_buf, - unsigned long *pks, unsigned long *n) { - - FILE * pk; - FILE * ms; - - pk = fopen("lib-gpu-generate/publickey.txt", "r"); - ms = fopen("lib-gpu-generate/msgsig.txt", "r"); - - if (pk == NULL || ms == NULL) { - printf("Auxiliary files not found."); - abort(); - } - - fseek (ms, 0, SEEK_END); - long ms_l = ftell(ms); - fseek (ms, 0, SEEK_SET); - char *ms_ptr = malloc(ms_l); - char *ms_ptr_rest = malloc(ms_l); - if (ms_ptr || ms_ptr_rest) - { - fread (ms_ptr, 1, ms_l, ms); - memcpy(ms_ptr_rest, ms_ptr, ms_l); - } - fclose (ms); - - fseek (pk, 0, SEEK_END); - long pk_l = ftell(pk); - fseek (pk, 0, SEEK_SET); - char *pk_ptr = malloc(pk_l); - char *pk_ptr_rest = malloc(pk_l); - if (pk_ptr && pk_ptr_rest) - { - fread (pk_ptr, 1, pk_l, pk); - memcpy(pk_ptr_rest, pk_ptr, pk_l); - } - fclose (pk); - - gpu_register *n_buf_t = n_buf; - gpu_register *msg_buf_t = msg_buf; - gpu_register *s_buf_t = s_buf; - gpu_register *exp_buf_t = exp_buf; - - int len = (BIT_LENGTH / 8) / sizeof(gpu_register); - - char* message = strtok_r(ms_ptr, "\n", &ms_ptr_rest); - char* signature = strtok_r(0, "\n", &ms_ptr_rest); - char* modulus = strtok_r(pk_ptr, "\n", &pk_ptr_rest); - char* exponent = strtok_r(0, "\n", &pk_ptr_rest); - char* offs = strtok_r(0, "\n", &pk_ptr_rest); - - int i = 0; - int j = 0; - - mpz_t e,mod,msg,s; - - mpz_init(e); - mpz_init(mod); - mpz_init(msg); - mpz_init(s); - - while (message != NULL && signature != NULL) { - - if (i == 0 || pks[j - 1] < i) { - - mpz_set_str(mod,modulus,16); - mpz_set_str(e,exponent,16); - - pks[j] = atoi(offs); - - modulus = strtok_r(0, "\n", &pk_ptr_rest); - exponent = strtok_r(0, "\n", &pk_ptr_rest); - offs = strtok_r(0, "\n", &pk_ptr_rest); - - mpz_export(&n_buf_t[len * j], NULL, ORDER, sizeof(gpu_register), END, 0, mod); - mpz_export(&exp_buf_t[j], NULL, ORDER, sizeof(gpu_register), END, 0, e); - - - j++; - - } - - mpz_set_str(msg,message,16); - mpz_set_str(s,signature,16); - - message = strtok_r(0, "\n",&ms_ptr_rest); - signature = strtok_r(0, "\n",&ms_ptr_rest); - - mpz_export(&msg_buf_t[len * i], NULL, ORDER, sizeof(gpu_register), END, 0, msg); - mpz_export(&s_buf_t[len * i], NULL, ORDER, sizeof(gpu_register), END, 0, s); - - i++; - } - - mpz_clear(e); - mpz_clear(mod); - mpz_clear(msg); - mpz_clear(s); - - - - *n = i; - -} - -struct thread_args { - - void *x_buf; - void *m_buf; - void *r_1_buf; - void *n_buf; - void *ni_buf; - void *s_buf; - unsigned long *pks; - unsigned long n_start; - unsigned long n_end; -}; - -void *convert_thread(void * vargp) { - - struct thread_args *args = vargp; - - int len = (BIT_LENGTH / 8) / sizeof(gpu_register); - - - int j = 0; - - while(1) { - if (args->pks[j] > args->n_start) - break; - j++; - } - - - //printf("pks[%i] == %i, start at: %i, end at: %i\n",j,args->pks[j], args->n_start, args->n_end); - - gpu_register *s_buf_t = args->s_buf; - gpu_register *n_buf_t = args->n_buf; - - gpu_register *x_buf_t = args->x_buf; - gpu_register *m_buf_t = args->m_buf; - gpu_register *r_1_buf_t = args->r_1_buf; - gpu_register *ni_buf_t = args->ni_buf; - - mpz_t mod, s; - mpz_init(s); - mpz_init(mod); - - mpz_t r, r_1, ni, M, x; - - mpz_init(r); - mpz_init(r_1); - mpz_init(ni); - mpz_init(M); - mpz_init(x); - - mpz_t one; // helper variable - mpz_init_set_si(one,1); - - mpz_set_si(one, 1); - mpz_mul_2exp(r,one,BIT_LENGTH); // r - - int start = (int)args->n_start; - - - - for(int i = start; i < args->n_end; i++) { - - if (i == start || args->pks[j - 1] < i) { - - mpz_import(mod, len, ORDER, sizeof(gpu_register), END, 0, &n_buf_t[len * j]); - - mpz_gcdext(one, r_1, ni, r, mod); // set r_1 and ni - - int sgn = mpz_sgn(r_1); - - mpz_abs(r_1, r_1); - mpz_abs(ni, ni); - - if (sgn == -1) { - mpz_sub(ni, r, ni); - mpz_sub(r_1, mod, r_1); - } - - - mpz_export(&ni_buf_t[len * j], NULL, ORDER, sizeof(gpu_register), END, 0, ni); - mpz_export(&r_1_buf_t[len * j], NULL, ORDER, sizeof(gpu_register), END, 0, r_1); - - - - j++; - } - - mpz_import(s, len, ORDER, sizeof(gpu_register), END, 0, &s_buf_t[len * i]); - - // set x (the number to 'square' (multiply by itself)) - mpz_mul(M, s, r); - mpz_mod(M, M, mod); - - mpz_mod(x, r, mod); - - - mpz_export(&x_buf_t[len * i], NULL, ORDER, sizeof(gpu_register), END, 0, x); - mpz_export(&m_buf_t[len * i], NULL, ORDER, sizeof(gpu_register), END, 0, M); - - } - - mpz_clear(r); - mpz_clear(r_1); - mpz_clear(ni); - mpz_clear(M); - mpz_clear(x); - - mpz_clear(one); - - return NULL; - -} - -void modmult_opencl_convert(struct gpu_state_alt *state, void *x_buf, void *m_buf, - void *r_1_buf, - void *n_buf, void *ni_buf, - void *msg_buf, - void *exp_buf, - //void *mod_buf, - void *s_buf, - unsigned long *pks, unsigned long n) { - - long nr_of_threads = 4; - - #if __APPLE__ || unix - - nr_of_threads = sysconf(_SC_NPROCESSORS_ONLN); - - #elif _WIN32 - - SYSTEM_INFO sysinfo; - GetSystemInfo(&sysinfo); - nr_of_threads = sysinfo.dwNumberOfProcessors; - - #endif - - - struct timespec p1, p2; - - clock_gettime(CLOCK_REALTIME, &p1); - - pthread_t tid[nr_of_threads]; - // not the best, but it is safe - int ids[nr_of_threads]; - - struct thread_args args[nr_of_threads]; - - unsigned long range = n / nr_of_threads; - - for (int i = 0; i < nr_of_threads - 1; i++) - { - args[i].n_start = i * range; - args[i].n_end = (i + 1) * range; - } - - // last one takes the 'rest' - args[nr_of_threads - 1].n_start = (nr_of_threads - 1) * range; - args[nr_of_threads - 1].n_end = n; - - - for (int i = 0; i < nr_of_threads; i++) { - - args[i].m_buf = m_buf; - args[i].n_buf = n_buf; - args[i].ni_buf = ni_buf; - args[i].pks = pks; - args[i].r_1_buf = r_1_buf; - args[i].s_buf = s_buf; - args[i].x_buf = x_buf; - - ids[i] = i; - int err = pthread_create(&tid[i], NULL, convert_thread, (void *)&args[i]); - if ( err != 0 ) - printf("Error creating threads"); - } - - for (int j = 0; j < nr_of_threads; j++) { - int err = pthread_join(tid[j], NULL); - if ( err != 0 ) - printf("Error joining threads"); - } - - clock_gettime(CLOCK_REALTIME, &p2); - - state->p.tv_sec += ( p2.tv_nsec < p1.tv_nsec ? p2.tv_sec - (p1.tv_sec + 1) : p2.tv_sec - p1.tv_sec ); - state->p.tv_nsec += ( p2.tv_nsec < p1.tv_nsec ? ((999999999 - p1.tv_nsec) + p2.tv_nsec) : (p2.tv_nsec - p1.tv_nsec) ) / 1000; - - -} - -int modmult_opencl_prepare(struct gpu_info *info, struct gpu_state_alt *state, - void *x_buf, void *m_buf, - void *r_1_buf, - void *n_buf, void *ni_buf, - void *msg_buf, - void *exp_buf, - // void *mod_buf, - void *s_buf, - unsigned long *pks, unsigned long n - ) { - - - - modmult_opencl_convert(state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n); - - - int err; // error code returned from api calls - - unsigned long pk = 0; - - while (1) { - if (pks[pk] + 1 >= n) - break; - pk++; - } - - unsigned long len = len_in_bytes; - - state->x_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, len, NULL, NULL); - state->m_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, len, NULL, NULL); - state->n_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, len, NULL, NULL); - state->ni_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, len, NULL, NULL); - - state->exp_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, n * sizeof(gpu_register), NULL, NULL); - - state->msg_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, len, NULL, NULL); - - state->pks_indices = clCreateBuffer(info->context, CL_MEM_READ_ONLY, sizeof(unsigned long) * (pk + 2), NULL, NULL); // plus 2 because the first index will contain how many elements are in the array - - if (!state->x_mem || !state->m_mem || !state->n_mem || !state->ni_mem || !state->exp_mem) - { - printf("Error: Failed to allocate device memory!\n"); - exit(1); - } - - // Write our data set into the input array in device memory - // - err = clEnqueueWriteBuffer(info->commands, state->x_mem, CL_TRUE, 0, len, x_buf, 0, NULL, NULL); - err |= clEnqueueWriteBuffer(info->commands, state->m_mem, CL_TRUE, 0, len, m_buf, 0, NULL, NULL); - - //err |= clEnqueueWriteBuffer(info->commands, state->res_mem, CL_TRUE, 0, len, res, 0, NULL, NULL); - err |= clEnqueueWriteBuffer(info->commands, state->n_mem, CL_TRUE, 0, len, n_buf, 0, NULL, NULL); - err |= clEnqueueWriteBuffer(info->commands, state->ni_mem, CL_TRUE, 0, len, ni_buf, 0, NULL, NULL); - - err |= clEnqueueWriteBuffer(info->commands, state->exp_mem, CL_TRUE, 0, n * sizeof(gpu_register), exp_buf, 0, NULL, NULL); - - err |= clEnqueueWriteBuffer(info->commands, state->msg_mem, CL_TRUE, 0, len, msg_buf, 0, NULL, NULL); - - err |= clEnqueueWriteBuffer(info->commands, state->pks_indices, CL_TRUE, sizeof(unsigned long), sizeof(unsigned long) * (pk + 1), pks, 0, NULL, NULL); - err |= clEnqueueWriteBuffer(info->commands, state->pks_indices, CL_TRUE, 0, sizeof(unsigned long), &pk, 0, NULL, NULL); - - if (err != CL_SUCCESS) - { - printf("Error: Failed to write to source array!\n"); - exit(1); - } - - // Set the arguments to our compute kernel - // - err = 0; - err = clSetKernelArg(info->kernel, 0, sizeof(cl_mem), &state->x_mem); - err |= clSetKernelArg(info->kernel, 1, sizeof(cl_mem), &state->m_mem); - err |= clSetKernelArg(info->kernel, 2, sizeof(cl_mem), &state->n_mem); - err |= clSetKernelArg(info->kernel, 3, sizeof(cl_mem), &state->ni_mem); - err |= clSetKernelArg(info->kernel, 4, sizeof(cl_mem), &state->exp_mem); - err |= clSetKernelArg(info->kernel, 5, sizeof(cl_mem), &state->msg_mem); - err |= clSetKernelArg(info->kernel, 6, sizeof(cl_mem), &state->pks_indices); - - if (err != CL_SUCCESS) - { - printf("RSA-Error: Failed to set kernel arguments! %d\n", err); - exit(1); - } - - state->total = n; - - - return 0; - -} - -int modmult_opencl_exec_kernel(struct gpu_info *info, struct gpu_state_alt *state) { - - size_t global; - // size_t local = 1; - int err; - - global = state->total; // has to be exactly the amount of signatures we want to verify - - // measure from the first call to the kernel... - if (state->skip) { - state->skip = false; - clock_gettime(CLOCK_REALTIME, &state->t1); - } - - err = clEnqueueNDRangeKernel(info->commands, info->kernel, 1, NULL, &global, NULL, 0, NULL, NULL); - if (err) - { - printf("Error: Failed to execute kernel!\n"); - return EXIT_FAILURE; - } - - return 0; - -} - -unsigned long modmult_opencl_results(struct gpu_info *info, struct gpu_state_alt *state, bool timed, void * msg_buf, void * r_1_buf, void * n_buf, void* s_buf, unsigned long n) { - - if (state->skip) { - // reset skip in the kernel execution - return 0; - } - - int err; - - // Wait for the command commands to get serviced before reading back results - // - err = clFinish(info->commands); - if (err != CL_SUCCESS) - { - printf("Error: Kernel failure! %d\n", err); - exit(1); - } - - unsigned long results = 0; - - // Read back the results from the device to verify the output - err = clEnqueueReadBuffer(info->commands, state->res_mem, CL_TRUE, 0, sizeof(unsigned long), &results, 0, NULL, NULL ); - if (err != CL_SUCCESS) - { - printf("Error: Failed to read output array! %d\n", err); - exit(1); - } - - if (timed) { - // stop measuring after the last command has been read - clock_gettime(CLOCK_REALTIME, &state->t2); - - printf("Preparation (on CPU) took \t%ld.%06ld s\n", state->p.tv_sec, state->p.tv_nsec); - - long sec = ( state->t2.tv_nsec < state->t1.tv_nsec ? state->t2.tv_sec - (state->t1.tv_sec + 1) : state->t2.tv_sec - state->t1.tv_sec ); - long nanosec = ( state->t2.tv_nsec < state->t1.tv_nsec ? ((999999999 - state->t1.tv_nsec) + state->t2.tv_nsec) : (state->t2.tv_nsec - state->t1.tv_nsec) ) / 1000; - - printf("GPU verification took \t%ld.%06ld s\n", sec, nanosec); - - printf("Total time: \t\t%ld.%06ld s\n", sec + state->p.tv_sec, nanosec + state->p.tv_nsec); - - } - - - return state->total - results; - -} - -void modmult_opencl_cleanup(struct gpu_info *info) { - - clReleaseProgram(info->program); - clReleaseKernel(info->kernel); - clReleaseCommandQueue(info->commands); - clReleaseContext(info->context); - -} - -void modmult_opencl_release(struct gpu_state_alt *state) { - - clReleaseMemObject(state->x_mem); - clReleaseMemObject(state->m_mem); - clReleaseMemObject(state->n_mem); - clReleaseMemObject(state->ni_mem); - clReleaseMemObject(state->exp_mem); - - clReleaseMemObject(state->msg_mem); - - clReleaseMemObject(state->pks_indices); -} - -// MARK: for library - -void modmult_gpu_init(struct gpu_info *info, struct gpu_state_alt *state) { - - info->platform = select_platform(0, false); - info->device_id = select_device (info->platform); - info->context = create_compute_context (info->device_id); - info->commands = create_command_queue (info->device_id, info->context); - info->program = compile_program (info->device_id, info->context, "montmodmult.cl"); - info->kernel = create_kernel (info->program, "mont"); - - state->result = 0; - state->total = 0; - state->skip = true; - - state->p.tv_nsec = 0; - state->p.tv_sec = 0; - - int err = 0; - - unsigned long results = 0; - - state->res_mem = clCreateBuffer(info->context, CL_MEM_READ_WRITE, sizeof(unsigned long) ,NULL, NULL); - - err |= clEnqueueWriteBuffer(info->commands, state->res_mem, CL_TRUE, 0, sizeof(unsigned long), &results, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - printf("Error: Failed to write to source array!\n"); - exit(1); - } - - err |= clSetKernelArg(info->kernel, 7, sizeof(cl_mem), &state->res_mem); - if (err != CL_SUCCESS) - { - printf("Error: Failed to set kernel arguments! %d\n", err); - exit(1); - } -} - -void modmult_gpu_execute(struct gpu_info *info, struct gpu_state_alt *state, - void *x_buf, void *m_buf, - void *r_1_buf, - void *n_buf, void *ni_buf, - void *msg_buf, - void *exp_buf, - //void *mod_buf, - void *s_buf, - unsigned long *pks, unsigned long n) { - - modmult_opencl_prepare(info, state, - x_buf, m_buf, - r_1_buf, - n_buf, ni_buf, - msg_buf, exp_buf, s_buf, - pks, n - ); // prepares the next batch of signatures on CPU, naturally blocks until it's finished - - state->result += modmult_opencl_results(info, state, false, msg_buf, r_1_buf, n_buf, s_buf, n); // waits for kernel, if it is not ready yet - modmult_opencl_exec_kernel(info,state); // start kernel (returns immediately) - modmult_opencl_release(state); // release buffers - - - -} - -unsigned long modmult_gpu_finish(struct gpu_info *info, struct gpu_state_alt *state, void * msg_buf, void * r_1_buf, void * n_buf, void* s_buf, unsigned long n) { - - state->result += modmult_opencl_results(info,state,true,msg_buf,r_1_buf,n_buf, s_buf, n); - - modmult_opencl_cleanup(info); - - unsigned long res = state->result; - - state->result = 0; // reset result - - return res; - -} - - -int mont_modmult_tests(void) { - - unsigned long pairs = number_of_pairs(); // returns an estimation of pairs - - unsigned long digit_sz = (BIT_LENGTH / 8) * pairs; - - len_in_bytes = digit_sz; - - unsigned long arr_sz = pairs * sizeof(unsigned long); - - gpu_register *x_buf = malloc(digit_sz); - gpu_register *m_buf = malloc(digit_sz); - gpu_register *r_1_buf = malloc(digit_sz); - gpu_register *n_buf = malloc(digit_sz); - gpu_register *ni_buf = malloc(digit_sz + pairs); - gpu_register *msg_buf = malloc(digit_sz); - gpu_register *s_buf = malloc(digit_sz); - //gpu_register *mod_buf = malloc(digit_sz); - gpu_register *exp_buf = malloc(pairs * sizeof(gpu_register)); - - memset(x_buf, 0, digit_sz); - memset(m_buf, 0, digit_sz); - memset(r_1_buf, 0, digit_sz); - memset(n_buf, 0, digit_sz); - memset(ni_buf, 0, digit_sz); - memset(msg_buf, 0, digit_sz); - memset(s_buf, 0, digit_sz); - //memset(mod_buf, 0, digit_sz); - memset(exp_buf, 0, pairs * sizeof(gpu_register)); - - unsigned long *pks = malloc(arr_sz); - - memset(pks, 0, arr_sz); - - printf("READING KEYS...\n"); - - montmodmult_pairs_from_files(x_buf, m_buf, - r_1_buf, - n_buf, ni_buf, - msg_buf, - exp_buf, - - s_buf, - pks, &pairs); - - printf("VERIFYING %lu SIGNATURES...\n", pairs); - - struct gpu_info info; - struct gpu_state_alt state; - - modmult_gpu_init(&info, &state); - - modmult_gpu_execute(&info, &state, - x_buf, m_buf, - r_1_buf, - n_buf, ni_buf, - msg_buf, - exp_buf, - // mod_buf, - s_buf, - pks, pairs); - - unsigned long res = modmult_gpu_finish(&info, &state, msg_buf, r_1_buf, n_buf,s_buf, pairs); - - if (res == pairs) { - printf("VERIFICATION RESULT: OK\n\n"); - } else { - printf("VERIFICATION RESULT: NOT OK!\n"); - printf("At least %lu signatures were invalid.\n\n",state.total - res); - } - - return 0; -} diff --git a/source/montmodmult.cl b/source/montmodmult.cl @@ -1 +0,0 @@ -../xcode/montmodmult.cl -\ No newline at end of file diff --git a/source/montmodmult.h b/source/montmodmult.h @@ -1,20 +0,0 @@ -// -// montmodmult.h -// lib-gpu-verify -// -// Created by Cedric Zwahlen on 16.12.2023. -// - -#ifndef montmodmult_h -#define montmodmult_h - -#include <stdio.h> -#include <stdint.h> -#include <gmp.h> - -int mont_modmult_tests(void); - - -typedef uint64_t gpu_register; - -#endif /* montmodmult_h */ diff --git a/source/reference-test.c b/source/reference-test.c @@ -1,260 +0,0 @@ -// -// reference-test.c -// lib-gpu-verify -// -// Created by Cedric Zwahlen on 06.12.2023. -// - -#include "reference-test.h" - - -void ref_pairs_from_files(char *bases, unsigned long *b_off, - char *exponents, unsigned long *e_off, - char *moduli, unsigned long *m_off, - char *signatures, unsigned long *s_off, - unsigned long *pks, - unsigned long *n) { - - FILE *pk; - FILE *ms; - - pk = fopen("lib-gpu-generate/publickey.txt", "r"); - ms = fopen("lib-gpu-generate/msgsig.txt", "r"); - - if (pk == NULL || ms == NULL) { - printf("Auxiliary files not found."); - abort(); - } - - fseek (ms, 0, SEEK_END); - long ms_l = ftell(ms); - fseek (ms, 0, SEEK_SET); - char *ms_ptr = malloc(ms_l); - char *ms_ptr_rest = malloc(ms_l); - if (ms_ptr || ms_ptr_rest) - { - fread (ms_ptr, 1, ms_l, ms); - memcpy(ms_ptr_rest, ms_ptr, ms_l); - } - fclose (ms); - - fseek (pk, 0, SEEK_END); - long pk_l = ftell(pk); - fseek (pk, 0, SEEK_SET); - char *pk_ptr = malloc(pk_l); - char *pk_ptr_rest = malloc(pk_l); - if (pk_ptr && pk_ptr_rest) - { - fread (pk_ptr, 1, pk_l, pk); - memcpy(pk_ptr_rest, pk_ptr, pk_l); - } - fclose (pk); - - char* message = strtok_r(ms_ptr, "\n", &ms_ptr_rest); - char* signature = strtok_r(0, "\n", &ms_ptr_rest); - char* modulus = strtok_r(pk_ptr, "\n", &pk_ptr_rest); - char* exponent = strtok_r(0, "\n", &pk_ptr_rest); - char* offs = strtok_r(0, "\n", &pk_ptr_rest); - - unsigned long b_offset = 0; - unsigned long e_offset = 0; - unsigned long m_offset = 0; - unsigned long s_offset = 0; - - int i = 0; - int j = 0; - - while (modulus != NULL && exponent != NULL && offs != NULL) { - - unsigned long n_buf_len = strlen(modulus); - unsigned long e_buf_len = strlen(exponent); - - memcpy(&moduli[m_offset], modulus, n_buf_len); - memcpy(&exponents[e_offset], exponent, e_buf_len); - - m_off[i] = m_offset; - e_off[i] = e_offset; - - m_offset += n_buf_len + 1; - e_offset += e_buf_len + 1; - - pks[i] = atoi(offs); - - modulus = strtok_r(0, "\n", &pk_ptr_rest); - exponent = strtok_r(0, "\n", &pk_ptr_rest); - offs = strtok_r(0, "\n", &pk_ptr_rest); - - i++; - } - - while (message != NULL && signature != NULL) { - - unsigned long m_buf_len = strlen(message); - unsigned long s_buf_len = strlen(signature); - - memcpy(&bases[b_offset], message, m_buf_len); - memcpy(&signatures[s_offset], signature, s_buf_len); - - b_off[j] = b_offset; - s_off[j] = s_offset; - - b_offset += m_buf_len + 1; - s_offset += s_buf_len + 1; - - message = strtok_r(0, "\n",&ms_ptr_rest); - signature = strtok_r(0, "\n",&ms_ptr_rest); - - j++; - - } - - *n = j; -} - -gcry_sexp_t sexp_from_string(char* str, const char *format) { - - gcry_sexp_t sexp; - - gcry_mpi_t mpi = gcry_mpi_new((int)strlen(str) * 8); - //size_t scanned = 0; - gcry_mpi_scan(&mpi, GCRYMPI_FMT_HEX, str, 0, NULL); - - size_t errOff = 0; - gcry_sexp_build(&sexp,&errOff,format,mpi); - - return sexp; -} - -gcry_sexp_t sexp_from_string_key(char* str_1, char* str_2, const char *format) { - - gcry_sexp_t sexp; - - gcry_mpi_t mpi_1 = gcry_mpi_new((int)strlen(str_1) * 8); - //size_t scanned = 0; - gcry_mpi_scan(&mpi_1, GCRYMPI_FMT_HEX, str_1, 0, NULL); - - gcry_mpi_t mpi_2 = gcry_mpi_new((int)strlen(str_2) * 8); - //size_t scanned = 0; - gcry_mpi_scan(&mpi_2, GCRYMPI_FMT_HEX, str_2, 0, NULL); - - size_t errOff = 0; - gcry_sexp_build(&sexp,&errOff,format,mpi_1,mpi_2); - - return sexp; -} - -int reference_tests(void) { - - // setup_gcry(); - - unsigned long pairs = number_of_pairs(); - - unsigned long str_sz = (2048) * pairs; - - - char *b = malloc(str_sz); - char *e = malloc(str_sz); - char *m = malloc(str_sz); - char *s = malloc(str_sz); - - unsigned long *b_off = malloc(str_sz); - unsigned long *e_off = malloc(str_sz); - unsigned long *m_off = malloc(str_sz); - unsigned long *s_off = malloc(str_sz); - - memset(b, 0, str_sz); - memset(e, 0, str_sz); - memset(m, 0, str_sz); - memset(s, 0, str_sz); - - memset(b_off, 0, str_sz); - memset(e_off, 0, str_sz); - memset(m_off, 0, str_sz); - memset(s_off, 0, str_sz); - - unsigned long *pks = malloc(str_sz); - memset(pks, 0, str_sz); - - ref_pairs_from_files(b, b_off, e, e_off, m, m_off, s, s_off, pks, - &pairs); - - unsigned long pk = 0; - - while (1) { - if (pks[pk] + 1 == pairs) - break; - pk++; - } - - - - gcry_sexp_t *m_sexps = malloc(pairs * sizeof(gcry_sexp_t)); - gcry_sexp_t *s_sexps = malloc(pairs * sizeof(gcry_sexp_t)); - gcry_sexp_t *key_sexps = malloc((pk + 1) * sizeof(gcry_sexp_t)); - - for (int i = 0; i < pairs; i++) { - - m_sexps[i] = sexp_from_string(&b[b_off[i]], "(data (flags raw) (value %m))"); // message format (for comparison) - - s_sexps[i] = sexp_from_string(&s[s_off[i]], "(sig-val (rsa (s %m)))"); // signature format - } - - - for (int i = 0; i <= pk; i++) { - - key_sexps[i] = sexp_from_string_key(&m[m_off[i]], &e[e_off[i]], "(public-key (rsa (n %m) (e %m)))" ); // pub key data - - } - - unsigned long result = 0; - - struct timespec t1, t2; - - printf("VERIFYING %lu SIGNATURES...\n", pairs); - - clock_gettime(CLOCK_REALTIME, &t1); - - pk = 0; // reuse pk - - for (int i = 0; i < pairs; i++) { - - while (1) { - if (pks[pk] >= i) - break; - pk++; - } - - if ( gcry_pk_verify(s_sexps[i], m_sexps[i], key_sexps[pk]) == 0 ) - result += 1; - - } - - clock_gettime(CLOCK_REALTIME, &t2); - - printf("CPU (Reference) verification took %ld.%06ld s\n", ( t2.tv_nsec < t1.tv_nsec ? t2.tv_sec - (t1.tv_sec + 1) : t2.tv_sec - t1.tv_sec ), ( t2.tv_nsec < t1.tv_nsec ? ((999999999 - t1.tv_nsec) + t2.tv_nsec) : (t2.tv_nsec - t1.tv_nsec) ) / 1000); - - if (result == pairs) { - printf("VERIFICATION RESULT: %lu - OK\n\n",result); - } else { - printf("VERIFICATION RESULT: %lu - NOT OK\n\n",result); - } - - - free(b); - free(e); - free(m); - free(s); - - free(b_off); - free(e_off); - free(m_off); - free(s_off); - - free(pks); - - free(m_sexps); - free(s_sexps); - free(key_sexps); - - return result == pairs ? 1 : 0; -} diff --git a/source/reference-test.h b/source/reference-test.h @@ -1,15 +0,0 @@ -// -// reference-test.h -// lib-gpu-verify -// -// Created by Cedric Zwahlen on 06.12.2023. -// - -#ifndef reference_test_h -#define reference_test_h - -#include "util.h" - -int reference_tests(void); - -#endif /* reference_test_h */ diff --git a/source/rsa-test.c b/source/rsa-test.c @@ -1,639 +0,0 @@ -/* - * lib-gpu-verify - * - * This software contains code derived from or inspired by the BigDigit library, - * <http://www.di-mgt.com.au/bigdigits.html> - * which is distributed under the Mozilla Public License, version 2.0. - * - * The original code and modifications made to it are subject to the terms and - * conditions of the Mozilla Public License, version 2.0. A copy of the - * MPL license can be obtained at - * https://www.mozilla.org/en-US/MPL/2.0/. - * - * Changes and additions to the original code are as follows: - * - Copied some functions of the BigDigit library into this file, to convert strings read from files to BigDigit type numbers. - * - * Contributors: - * - Cedric Zwahlen cedric.zwahlen@bfh.ch - * - * Please note that this software is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the Mozilla Public License, version 2.0, for the specific language - * governing permissions and limitations under the License. - */ - -#include "rsa-test.h" - -#define BITS_PER_DIGIT 32 - -#define MAX_ALLOC_SIZE 256 - -#define BYTES_PER_DIGIT (BITS_PER_DIGIT / 8) - -typedef uint32_t DIGIT_T; // for gpu might need to be half? is that half? - -size_t mpSizeof(const DIGIT_T a[], size_t ndigits) -{ - while(ndigits--) - { - if (a[ndigits] != 0) - return (++ndigits); - } - return 0; -} - -volatile DIGIT_T mpSetZero(volatile DIGIT_T a[], size_t ndigits) -{ /* Sets a = 0 */ - - /* Prevent optimiser ignoring this */ - volatile DIGIT_T optdummy; - volatile DIGIT_T *p = a; - - while (ndigits--) - a[ndigits] = 0; - - optdummy = *p; - return optdummy; -} - -size_t uiceil(double x) -/* Returns ceil(x) as a non-negative integer or 0 if x < 0 */ -{ - size_t c; - - if (x < 0) return 0; - c = (size_t)x; - if ((x - c) > 0.0) - c++; - - return c; -} - -volatile uint8_t zeroise_bytes(volatile void *v, size_t n) -{ /* Zeroise byte array b and make sure optimiser does not ignore this */ - volatile uint8_t optdummy; - volatile uint8_t *b = (uint8_t*)v; - while(n--) - b[n] = 0; - optdummy = *b; - return optdummy; -} - -size_t mpConvFromOctets(DIGIT_T a[], size_t ndigits, const unsigned char *c, size_t nbytes) -/* Converts nbytes octets into big digit a of max size ndigits - Returns actual number of digits set (may be larger than mpSizeof) -*/ -{ - size_t i; - int j, k; - DIGIT_T t; - - mpSetZero(a, ndigits); - //memset(a, 0, ndigits); - - /* Read in octets, least significant first */ - /* i counts into big_d, j along c, and k is # bits to shift */ - for (i = 0, j = (int)nbytes - 1; i < ndigits && j >= 0; i++) - { - t = 0; - for (k = 0; j >= 0 && k < BITS_PER_DIGIT; j--, k += 8) - t |= ((DIGIT_T)c[j]) << k; - a[i] = t; - } - - return i; -} - -size_t mpConvFromHex(DIGIT_T a[], size_t ndigits, const char *s) -/* Convert a string in hexadecimal format to a big digit. - Return actual number of digits set (may be larger than mpSizeof). - Just ignores invalid characters in s. -*/ -{ - - uint8_t newdigits[MAX_ALLOC_SIZE*2]; - - size_t newlen; - size_t n; - unsigned long t; - size_t i, j; - - mpSetZero(a, ndigits); - //memset(&a, 0, ndigits); - - /* Create some temp storage for int values */ - n = strlen(s); - if (0 == n) return 0; - newlen = uiceil(n * 0.5); /* log(16)/log(256)=0.5 */ - //ALLOC_BYTES(newdigits, newlen); - memset(&newdigits, 0, newlen); - - /* Work through zero-terminated string */ - for (i = 0; s[i]; i++) - { - t = s[i]; - if ((t >= '0') && (t <= '9')) t = (t - '0'); - else if ((t >= 'a') && (t <= 'f')) t = (t - 'a' + 10); - else if ((t >= 'A') && (t <= 'F')) t = (t - 'A' + 10); - else continue; - for (j = newlen; j > 0; j--) - { - t += (unsigned long)newdigits[j-1] << 4; - newdigits[j-1] = (unsigned char)(t & 0xFF); - t >>= 8; - } - } - - /* Convert bytes to big digits */ - n = mpConvFromOctets(a, ndigits, newdigits, newlen); - - memset(&newdigits, 0, newlen); - - return n; -} - -// MARK: OPENCL CODE - -void opencl_pairs_from_files(void *bases, unsigned long *b_len, - void *exponents, unsigned long *e_len, - void *moduli, unsigned long *m_len, - void *signatures, unsigned long *s_len, - unsigned long *pks, - unsigned long *n - ) { - - FILE *pk; - FILE *ms; - - pk = fopen("lib-gpu-generate/publickey.txt", "r"); - ms = fopen("lib-gpu-generate/msgsig.txt", "r"); - - if (pk == NULL || ms == NULL) { - printf("Auxiliary files not found."); - abort(); - } - - fseek (ms, 0, SEEK_END); - long ms_l = ftell(ms); - fseek (ms, 0, SEEK_SET); - char *ms_ptr = malloc(ms_l); - char *ms_ptr_rest = malloc(ms_l); - if (ms_ptr || ms_ptr_rest) - { - fread (ms_ptr, 1, ms_l, ms); - memcpy(ms_ptr_rest, ms_ptr, ms_l); - } - fclose (ms); - - fseek (pk, 0, SEEK_END); - long pk_l = ftell(pk); - fseek (pk, 0, SEEK_SET); - char *pk_ptr = malloc(pk_l); - char *pk_ptr_rest = malloc(pk_l); - if (pk_ptr && pk_ptr_rest) - { - fread (pk_ptr, 1, pk_l, pk); - memcpy(pk_ptr_rest, pk_ptr, pk_l); - } - fclose (pk); - - int i = 0; - int j = 0; - - DIGIT_T *bases_t = bases; - DIGIT_T *exponents_t = exponents; - DIGIT_T *moduli_t = moduli; - DIGIT_T *signatures_t = signatures; - - int sz = 2048 / sizeof(DIGIT_T); - - char* message = strtok_r(ms_ptr, "\n", &ms_ptr_rest); - char* signature = strtok_r(0, "\n", &ms_ptr_rest); - char* modulus = strtok_r(pk_ptr, "\n", &pk_ptr_rest); - char* exponent = strtok_r(0, "\n", &pk_ptr_rest); - char* offs = strtok_r(0, "\n", &pk_ptr_rest); - - while (modulus != NULL && exponent != NULL && offs != NULL) { - - pks[i] = atoi(offs); - - unsigned long n_buf_len = strlen(modulus); - unsigned long e_buf_len = strlen(exponent); - - DIGIT_T exponent_dgt [sz*2]; - DIGIT_T modulus_dgt [sz*2]; - - mpSetZero(exponent_dgt, sz*2); - mpSetZero(modulus_dgt, sz*2); - - mpConvFromHex(exponent_dgt, e_buf_len, exponent); - mpConvFromHex(modulus_dgt, n_buf_len, modulus); - - unsigned long max_len = 64; // hardcoded for 2048 bit RSA - - e_len[i] = (i == 0 ? 0 : e_len[i - 1]) + mpSizeof(exponent_dgt, sz*2); - m_len[i] = (i == 0 ? 0 : m_len[i - 1]) + max_len; - - memcpy(&moduli_t[i == 0 ? 0 : (m_len[i - 1])], modulus_dgt, ( m_len[i] - (i == 0 ? 0 : m_len[i - 1]) ) * sizeof(DIGIT_T)); - memcpy(&exponents_t[i == 0 ? 0 : (e_len[i - 1])], exponent_dgt, ( e_len[i] - (i == 0 ? 0 : e_len[i - 1]) ) * sizeof(DIGIT_T)); - - - modulus = strtok_r(0, "\n", &pk_ptr_rest); - exponent = strtok_r(0, "\n", &pk_ptr_rest); - offs = strtok_r(0, "\n", &pk_ptr_rest); - - - i++; - } - - - - while (message != NULL && signature != NULL) { - - unsigned long m_buf_len = strlen(message); - unsigned long s_buf_len = strlen(signature); - - DIGIT_T base_dgt [sz*2]; // temp storage, large enough - DIGIT_T signature_dgt [sz*2]; - - mpSetZero(base_dgt, sz*2); - mpSetZero(signature_dgt, sz*2); - - mpConvFromHex(base_dgt, m_buf_len, message); - mpConvFromHex(signature_dgt, s_buf_len, signature); - - unsigned long max_len = 64; // the maximum of DIGIT_T types we need - - b_len[j] = (j == 0 ? 0 : b_len[j - 1]) + max_len; - s_len[j] = (j == 0 ? 0 : s_len[j - 1]) + max_len; - - memcpy(&bases_t[j == 0 ? 0 : (b_len[j - 1])], base_dgt, ( b_len[j] - (j == 0 ? 0 : b_len[j - 1]) ) * sizeof(DIGIT_T)); - memcpy(&signatures_t[j == 0 ? 0 : (s_len[j - 1])], signature_dgt, ( s_len[j] - (j == 0 ? 0 : s_len[j - 1]) ) * sizeof(DIGIT_T)); - - message = strtok_r(0, "\n",&ms_ptr_rest); - signature = strtok_r(0, "\n",&ms_ptr_rest); - - j++; - - } - - *n = j; - -} - -int opencl_prepare(struct gpu_info *info, struct gpu_state *state, - void *bases, unsigned long *b_len, - void *exponents, unsigned long *e_len, - void *moduli, unsigned long *m_len, - void *signatures, unsigned long *s_len, - const unsigned long *pks, - const unsigned long n) { - - int err; // error code returned from api calls - - unsigned long pk = 0; - - while (1) { - if (pks[pk] + 1 >= n) - break; - pk++; - } - - - state->mod_mem = clCreateBuffer(info->context, CL_MEM_READ_WRITE, sizeof(DIGIT_T) * m_len[pk], NULL, NULL); - state->exp_mem = clCreateBuffer(info->context, CL_MEM_READ_WRITE, sizeof(DIGIT_T) * e_len[pk], NULL, NULL); - - state->sig_mem = clCreateBuffer(info->context, CL_MEM_READ_WRITE, sizeof(DIGIT_T) * s_len[n-1], NULL, NULL); - state->comp_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, sizeof(DIGIT_T) * b_len[n-1], NULL, NULL); // the base, to compare whether we get the same signature - - state->mod_len = clCreateBuffer(info->context, CL_MEM_READ_ONLY, sizeof(unsigned long) * (pk + 1), NULL, NULL); - state->exp_len = clCreateBuffer(info->context, CL_MEM_READ_ONLY, sizeof(unsigned long) * (pk + 1), NULL, NULL); - - state->sig_len = clCreateBuffer(info->context, CL_MEM_READ_ONLY, sizeof(unsigned long) * n, NULL, NULL); - state->comp_len = clCreateBuffer(info->context, CL_MEM_READ_ONLY, sizeof(unsigned long) * n, NULL, NULL); - - state->pks_indices = clCreateBuffer(info->context, CL_MEM_READ_WRITE, sizeof(unsigned long) * (pk + 1),NULL, NULL); - - - - - if (!state->sig_mem || !state->exp_mem || !state->mod_mem || !state->comp_mem || !state->invalid ) - { - printf("Error: Failed to allocate device memory!\n"); - exit(1); - } - - // Write our data set into the input array in device memory - // - err = clEnqueueWriteBuffer(info->commands, state->sig_mem, CL_TRUE, 0, sizeof(DIGIT_T) * s_len[n-1], signatures, 0, NULL, NULL); - err |= clEnqueueWriteBuffer(info->commands, state->sig_len, CL_TRUE, 0,sizeof(unsigned long) * n, s_len, 0, NULL, NULL); - - err |= clEnqueueWriteBuffer(info->commands, state->exp_mem, CL_TRUE, 0,sizeof(DIGIT_T) * e_len[pk], exponents, 0, NULL, NULL); - err |= clEnqueueWriteBuffer(info->commands, state->exp_len, CL_TRUE, 0,sizeof(unsigned long) * (pk + 1), e_len, 0, NULL, NULL); - err |= clEnqueueWriteBuffer(info->commands, state->mod_mem, CL_TRUE, 0, sizeof(DIGIT_T) * m_len[pk], moduli, 0, NULL, NULL); - err |= clEnqueueWriteBuffer(info->commands, state->mod_len, CL_TRUE, 0,sizeof(unsigned long) * (pk + 1), m_len, 0, NULL, NULL); - - err |= clEnqueueWriteBuffer(info->commands, state->comp_mem, CL_TRUE, 0, sizeof(DIGIT_T) * b_len[n-1], bases, 0, NULL, NULL); - err |= clEnqueueWriteBuffer(info->commands, state->comp_len, CL_TRUE, 0,sizeof(unsigned long) * n, b_len, 0, NULL, NULL); - - err |= clEnqueueWriteBuffer(info->commands, state->pks_indices, CL_TRUE, 0, sizeof(unsigned long) * (pk + 1), pks, 0, NULL, NULL); - //err |= clEnqueueWriteBuffer(info->commands, state->valid, CL_TRUE, 0, sizeof(unsigned long), &signature_is_valid, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - printf("Error: Failed to write to source array!\n"); - exit(1); - } - - // Set the arguments to our compute kernel - // - err = 0; - err = clSetKernelArg(info->kernel, 0, sizeof(cl_mem), &state->sig_mem); - err |= clSetKernelArg(info->kernel, 1, sizeof(cl_mem), &state->sig_len); - err |= clSetKernelArg(info->kernel, 2, sizeof(cl_mem), &state->exp_mem); - err |= clSetKernelArg(info->kernel, 3, sizeof(cl_mem), &state->exp_len); - err |= clSetKernelArg(info->kernel, 4, sizeof(cl_mem), &state->mod_mem); - err |= clSetKernelArg(info->kernel, 5, sizeof(cl_mem), &state->mod_len); - err |= clSetKernelArg(info->kernel, 6, sizeof(cl_mem), &state->comp_mem); - err |= clSetKernelArg(info->kernel, 7, sizeof(cl_mem), &state->comp_len); - //err |= clSetKernelArg(info->kernel, 8, sizeof(cl_mem), &state->valid); -> set somewhere else - err |= clSetKernelArg(info->kernel, 9, sizeof(cl_mem), &state->pks_indices); - err |= clSetKernelArg(info->kernel, 10, sizeof(unsigned long), &n); - - - //err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count); - if (err != CL_SUCCESS) - { - printf("RSA-Error: Failed to set kernel arguments! %d\n", err); - exit(1); - } - - state->total = n; - - - return 0; - -} - -int opencl_exec_kernel(struct gpu_info *info, struct gpu_state *state) { - - size_t global; - int err; - - global = state->total; - - // measure from the first call to the kernel... - if (state->skip) { - state->skip = false; - clock_gettime(CLOCK_REALTIME, &state->t1); - } - - err = clEnqueueNDRangeKernel(info->commands, info->kernel, 1, NULL, &global, NULL, 0, NULL, NULL); - if (err) - { - printf("Error: Failed to execute kernel!\n"); - return EXIT_FAILURE; - } - - //printf("KERNEL IS EXECUTING...\n"); - - return 0; - -} - - -unsigned long opencl_results(struct gpu_info *info, struct gpu_state *state, bool timed) { - - if (state->skip) { - // reset skip in the kernel execution - return 0; - } - - int err; - - unsigned long failed_signatures = 0; - - - // Wait for the command commands to get serviced before reading back results - // - err = clFinish(info->commands); - if (err != CL_SUCCESS) - { - printf("Error: Kernel failure! %d\n", err); - exit(1); - } - - // Read back the results from the device to verify the output - // - //err = clEnqueueReadBuffer( commands, res_mem, CL_TRUE, 0, res_len, res_buf, 0, NULL, NULL ); - err = clEnqueueReadBuffer(info->commands, state->invalid, CL_TRUE, 0, sizeof(unsigned long), &failed_signatures, 0, NULL, NULL ); - if (err != CL_SUCCESS) - { - printf("Error: Failed to read output array! %d\n", err); - exit(1); - } - - - - - - - if (timed) { - // stop measuring after the last command has been read - clock_gettime(CLOCK_REALTIME, &state->t2); - - printf("GPU verification took %ld.%06ld s\n", ( state->t2.tv_nsec < state->t1.tv_nsec ? state->t2.tv_sec - (state->t1.tv_sec + 1) : state->t2.tv_sec - state->t1.tv_sec ), ( state->t2.tv_nsec < state->t1.tv_nsec ? ((999999999 - state->t1.tv_nsec) + state->t2.tv_nsec) : (state->t2.tv_nsec - state->t1.tv_nsec) ) / 1000); - - } - - - return state->total - failed_signatures; - -} - -void opencl_cleanup(struct gpu_info *info) { - - clReleaseProgram(info->program); - clReleaseKernel(info->kernel); - clReleaseCommandQueue(info->commands); - clReleaseContext(info->context); - -} - -void opencl_release(struct gpu_state *state) { - - clReleaseMemObject(state->comp_mem); - clReleaseMemObject(state->exp_mem); - clReleaseMemObject(state->mod_mem); - clReleaseMemObject(state->sig_mem); - - clReleaseMemObject(state->comp_len); - clReleaseMemObject(state->exp_len); - clReleaseMemObject(state->mod_len); - clReleaseMemObject(state->sig_len); - - clReleaseMemObject(state->pks_indices); - - - -} - -// MARK: for library - -void gpu_init(struct gpu_info *info, struct gpu_state *state) { - - info->platform = select_platform(0, false); - info->device_id = select_device (info->platform); - info->context = create_compute_context (info->device_id); - info->commands = create_command_queue (info->device_id, info->context); - info->program = compile_program (info->device_id, info->context, "verify.cl"); - info->kernel = create_kernel (info->program, "several"); - - state->result = 0; - state->total = 0; - state->skip = true; - - - int err = 0; - - int failed_signatures = 0; - - state->invalid = clCreateBuffer(info->context, CL_MEM_READ_WRITE, sizeof(unsigned long) ,NULL, NULL); - - err |= clEnqueueWriteBuffer(info->commands, state->invalid, CL_TRUE, 0, sizeof(unsigned long), &failed_signatures, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - printf("Error: Failed to write to source array!\n"); - exit(1); - } - - err |= clSetKernelArg(info->kernel, 8, sizeof(cl_mem), &state->invalid); - if (err != CL_SUCCESS) - { - printf("Error: Failed to set kernel arguments! %d\n", err); - exit(1); - } -} - -void gpu_execute(struct gpu_info *info, - struct gpu_state *state, - void *bases, unsigned long *b_len, - void *exponents, unsigned long *e_len, - void *moduli, unsigned long *m_len, - void *signatures, unsigned long *s_len, - const unsigned long *pks, - const unsigned long n) { - - opencl_prepare(info, state, bases, b_len, - exponents, e_len, - moduli, m_len, - signatures, s_len, - pks, n); // prepares the next batch of signatures on CPU, naturally blocks until it's finished - state->result += opencl_results(info, state, false); // waits for kernel, if it is not ready yet - opencl_exec_kernel(info,state); // start kernel (returns immediately) - opencl_release(state); // release buffers - - - -} - -unsigned long gpu_finish(struct gpu_info *info, struct gpu_state *state) { - - state->result += opencl_results(info,state,true); - - opencl_cleanup(info); - clReleaseMemObject(state->invalid); - - unsigned long res = state->result; - - state->result = 0; // reset result - - return res; - -} - -// MARK: function to know how much storage the gpu has to split data - - - -//size_t retSize_3 = sizeof(cl_ulong); -//cl_ulong max_stor = 0; -//clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, 0, NULL, &retSize_3); -//clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, retSize_3, &max_stor, &retSize_3); - -//printf("max memory: %llu\n", max_stor); - - -int rsa_tests(void) { - - //setup_gcry(); - - unsigned long pairs = number_of_pairs(); // returns an estimation of pairs - - unsigned long digit_sz = 64 * pairs * sizeof(DIGIT_T); - unsigned long arr_sz = pairs * sizeof(unsigned long); - - DIGIT_T *q = malloc(digit_sz); - DIGIT_T *r = malloc(digit_sz); - DIGIT_T *s = malloc(digit_sz); - DIGIT_T *t = malloc(digit_sz); - - memset(q, 0, digit_sz); - memset(r, 0, digit_sz); - memset(s, 0, digit_sz); - memset(t, 0, digit_sz); - - unsigned long *u = malloc(arr_sz); - unsigned long *v = malloc(arr_sz); - unsigned long *w = malloc(arr_sz); - unsigned long *x = malloc(arr_sz); - - unsigned long *pks = malloc(arr_sz); - - memset(u, 0, arr_sz); - memset(v, 0, arr_sz); - memset(w, 0, arr_sz); - memset(x, 0, arr_sz); - memset(pks, 0, arr_sz); - - printf("READING KEYS...\n"); - - opencl_pairs_from_files(q, u, - r, v, - s, w, - t, x, pks, &pairs); // this returns the actual amount of pairs - - - - printf("VERIFYING %lu SIGNATURES...\n", pairs); - - struct gpu_info info; - struct gpu_state state; - - gpu_init(&info, &state); - - gpu_execute(&info, &state, q, u, r, v, s, w, t, x, pks, pairs); - - unsigned long res = gpu_finish(&info, &state); - - if (res == pairs) { - printf("VERIFICATION RESULT: %lu - OK\n\n",res); - } else { - printf("VERIFICATION RESULT: %lu - NOT OK\n\n",res); - } - - - free(q); - free(r); - free(s); - free(t); - - free(u); - free(v); - free(w); - free(x); - - free(pks); - - return 0; -} - diff --git a/source/rsa-test.h b/source/rsa-test.h @@ -1,33 +0,0 @@ -// -// rsa-test.h -// lib-gpu-verify -// -// Created by Cedric Zwahlen on 28.09.2023. -// - -#ifndef rsa_test_h -#define rsa_test_h - -#include "util.h" - -#include "ctype.h" - - -int rsa_tests(void); - -// MARK: put in seperate file, and rename this one - -void gpu_init(struct gpu_info *info, struct gpu_state *state); - -void gpu_execute(struct gpu_info *info, - struct gpu_state *state, - void *bases, unsigned long *b_len, - void *exponents, unsigned long *e_len, - void *moduli, unsigned long *m_len, - void *signatures, unsigned long *s_len, - const unsigned long *pks, - const unsigned long n); - -unsigned long gpu_finish(struct gpu_info *info, struct gpu_state *state); - -#endif /* rsa_test_h */ diff --git a/source/util.c b/source/util.c @@ -7,7 +7,7 @@ #include "util.h" -unsigned long number_of_pairs(void) { +unsigned long gpuv_estimate_pairs(void) { struct stat ss; @@ -34,7 +34,7 @@ unsigned long number_of_pairs(void) { } -void setup_gcry(void) { +void gpuv_prepare_gcry(void) { /* Version check should be the very first call because it diff --git a/source/util.h b/source/util.h @@ -28,16 +28,11 @@ #include <CL/opencl.h> #endif - - - - #define NEED_LIBGCRYPT_VERSION "1.9.4" -unsigned long number_of_pairs(void); - -void setup_gcry(void); +unsigned long gpuv_estimate_pairs(void); +void gpuv_prepare_gcry(void); cl_platform_id select_platform (unsigned int offset, bool print_platforms); diff --git a/xcode/.DS_Store b/xcode/.DS_Store Binary files differ. diff --git a/xcode/montmodmult.cl b/xcode/gpuv-montg.cl diff --git a/xcode/verify.cl b/xcode/gpuv.cl diff --git a/xcode/lib-gpu-generate/main.c b/xcode/lib-gpu-generate/main.c @@ -16,7 +16,7 @@ #define NEED_LIBGCRYPT_VERSION "1.10.1" -void setup_gcry(void) { +void gpuv_prepare_gcry(void) { gcry_control (GCRYCTL_SET_THREAD_CBS, 0); @@ -198,7 +198,7 @@ int main(int argc, const char * argv[]) { printf("generating %lu signatures with %i keys.\n",n * pks, pks); } - setup_gcry(); + gpuv_prepare_gcry(); pthread_t tid[pks]; // not the best, but it is safe diff --git a/xcode/lib-gpu-verify.xcodeproj/project.pbxproj b/xcode/lib-gpu-verify.xcodeproj/project.pbxproj @@ -7,15 +7,14 @@ objects = { /* Begin PBXBuildFile section */ - 6A36F8892B0F938E00AB772D /* montgomery.cl in Sources */ = {isa = PBXBuildFile; fileRef = 6A36F8882B0F938E00AB772D /* montgomery.cl */; }; - 6A8A795F2A89672700116D7D /* verify.cl in Sources */ = {isa = PBXBuildFile; fileRef = 6A8A795E2A89672700116D7D /* verify.cl */; }; + 6A8A795F2A89672700116D7D /* gpuv.cl in Sources */ = {isa = PBXBuildFile; fileRef = 6A8A795E2A89672700116D7D /* gpuv.cl */; }; 6AA38E5B2B0A97FC00E85243 /* main.c in Sources */ = {isa = PBXBuildFile; fileRef = 6AA38E5A2B0A97FC00E85243 /* main.c */; }; 6ABC2E842B231DFF00033B90 /* util.c in Sources */ = {isa = PBXBuildFile; fileRef = 6ABC2E832B231DFF00033B90 /* util.c */; }; - 6ABC2E882B231E3D00033B90 /* reference-test.c in Sources */ = {isa = PBXBuildFile; fileRef = 6ABC2E862B231E3D00033B90 /* reference-test.c */; }; - 6AC553252B2E174900046AB7 /* montmodmult.cl in Sources */ = {isa = PBXBuildFile; fileRef = 6AC553242B2E174900046AB7 /* montmodmult.cl */; }; - 6AC553292B2E17C800046AB7 /* montmodmult.c in Sources */ = {isa = PBXBuildFile; fileRef = 6AC553282B2E17C800046AB7 /* montmodmult.c */; }; + 6ABC2E882B231E3D00033B90 /* gpuv-ref.c in Sources */ = {isa = PBXBuildFile; fileRef = 6ABC2E862B231E3D00033B90 /* gpuv-ref.c */; }; + 6AC553252B2E174900046AB7 /* gpuv-montg.cl in Sources */ = {isa = PBXBuildFile; fileRef = 6AC553242B2E174900046AB7 /* gpuv-montg.cl */; }; + 6AC553292B2E17C800046AB7 /* gpuv-montg.c in Sources */ = {isa = PBXBuildFile; fileRef = 6AC553282B2E17C800046AB7 /* gpuv-montg.c */; }; 6AF7487A2ADADEBD00D58E08 /* lib-gpu-verify.c in Sources */ = {isa = PBXBuildFile; fileRef = 6AF748792ADADEBD00D58E08 /* lib-gpu-verify.c */; }; - 6AF748832ADADF4500D58E08 /* rsa-test.c in Sources */ = {isa = PBXBuildFile; fileRef = 6AF7487F2ADADF4500D58E08 /* rsa-test.c */; }; + 6AF748832ADADF4500D58E08 /* gpuv.c in Sources */ = {isa = PBXBuildFile; fileRef = 6AF7487F2ADADF4500D58E08 /* gpuv.c */; }; C3770EFD0E6F1138009A5A77 /* OpenCL.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = C3770EFC0E6F1138009A5A77 /* OpenCL.framework */; }; /* End PBXBuildFile section */ @@ -42,27 +41,20 @@ /* Begin PBXFileReference section */ 466E0F5F0C932E1A00ED01DB /* lib-gpu-verify */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "lib-gpu-verify"; sourceTree = BUILT_PRODUCTS_DIR; }; - 6A36F8882B0F938E00AB772D /* montgomery.cl */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.opencl; path = montgomery.cl; sourceTree = "<group>"; }; - 6A7914CC2B0CF320001EDCC1 /* montgomery.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = montgomery.h; path = ../source/montgomery.h; sourceTree = "<group>"; }; - 6A7914CD2B0CF320001EDCC1 /* montgomery.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = montgomery.c; path = ../source/montgomery.c; sourceTree = "<group>"; }; - 6A8A795E2A89672700116D7D /* verify.cl */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.opencl; path = verify.cl; sourceTree = "<group>"; }; + 6A8A795E2A89672700116D7D /* gpuv.cl */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.opencl; path = gpuv.cl; sourceTree = "<group>"; }; 6AA38E582B0A97FC00E85243 /* lib-gpu-generate */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "lib-gpu-generate"; sourceTree = BUILT_PRODUCTS_DIR; }; 6AA38E5A2B0A97FC00E85243 /* main.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = main.c; sourceTree = "<group>"; }; 6AA38E612B0A9B2100E85243 /* lib-gpu-generate.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = "lib-gpu-generate.entitlements"; sourceTree = "<group>"; }; - 6AB4D99B2B1645F900A686F2 /* montgomery-test.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "montgomery-test.h"; path = "../source/montgomery-test.h"; sourceTree = "<group>"; }; - 6AB4D99C2B1645F900A686F2 /* montgomery-test.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = "montgomery-test.c"; path = "../source/montgomery-test.c"; sourceTree = "<group>"; }; 6ABC2E832B231DFF00033B90 /* util.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = util.c; path = ../source/util.c; sourceTree = "<group>"; }; 6ABC2E852B231E0400033B90 /* util.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = util.h; path = ../source/util.h; sourceTree = "<group>"; }; - 6ABC2E862B231E3D00033B90 /* reference-test.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "reference-test.c"; path = "../source/reference-test.c"; sourceTree = "<group>"; }; - 6ABC2E872B231E3D00033B90 /* reference-test.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "reference-test.h"; path = "../source/reference-test.h"; sourceTree = "<group>"; }; - 6AC553242B2E174900046AB7 /* montmodmult.cl */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.opencl; path = montmodmult.cl; sourceTree = "<group>"; }; - 6AC553272B2E17C800046AB7 /* montmodmult.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = montmodmult.h; path = ../source/montmodmult.h; sourceTree = "<group>"; }; - 6AC553282B2E17C800046AB7 /* montmodmult.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = montmodmult.c; path = ../source/montmodmult.c; sourceTree = "<group>"; }; - 6AC5532A2B2E885200046AB7 /* gmp.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = gmp.c; path = ../source/gmp.c; sourceTree = "<group>"; }; - 6AC5532C2B2E889100046AB7 /* gmp.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = gmp.h; path = ../source/gmp.h; sourceTree = "<group>"; }; + 6ABC2E862B231E3D00033B90 /* gpuv-ref.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "gpuv-ref.c"; path = "../source/gpuv-ref.c"; sourceTree = "<group>"; }; + 6ABC2E872B231E3D00033B90 /* gpuv-ref.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "gpuv-ref.h"; path = "../source/gpuv-ref.h"; sourceTree = "<group>"; }; + 6AC553242B2E174900046AB7 /* gpuv-montg.cl */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.opencl; path = "gpuv-montg.cl"; sourceTree = "<group>"; }; + 6AC553272B2E17C800046AB7 /* gpuv-montg.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "gpuv-montg.h"; path = "../source/gpuv-montg.h"; sourceTree = "<group>"; }; + 6AC553282B2E17C800046AB7 /* gpuv-montg.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = "gpuv-montg.c"; path = "../source/gpuv-montg.c"; sourceTree = "<group>"; }; 6AF748792ADADEBD00D58E08 /* lib-gpu-verify.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "lib-gpu-verify.c"; path = "../source/lib-gpu-verify.c"; sourceTree = "<group>"; }; - 6AF7487F2ADADF4500D58E08 /* rsa-test.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "rsa-test.c"; path = "../source/rsa-test.c"; sourceTree = "<group>"; }; - 6AF748802ADADF4500D58E08 /* rsa-test.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "rsa-test.h"; path = "../source/rsa-test.h"; sourceTree = "<group>"; }; + 6AF7487F2ADADF4500D58E08 /* gpuv.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = gpuv.c; path = ../source/gpuv.c; sourceTree = "<group>"; }; + 6AF748802ADADF4500D58E08 /* gpuv.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = gpuv.h; path = ../source/gpuv.h; sourceTree = "<group>"; }; C3770EFC0E6F1138009A5A77 /* OpenCL.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = OpenCL.framework; path = /System/Library/Frameworks/OpenCL.framework; sourceTree = "<absolute>"; }; /* End PBXFileReference section */ @@ -107,12 +99,10 @@ 6A984F162AC5B18A00F530FD /* Headers */ = { isa = PBXGroup; children = ( - 6AF748802ADADF4500D58E08 /* rsa-test.h */, - 6A7914CC2B0CF320001EDCC1 /* montgomery.h */, - 6AC5532C2B2E889100046AB7 /* gmp.h */, + 6AF748802ADADF4500D58E08 /* gpuv.h */, 6ABC2E852B231E0400033B90 /* util.h */, - 6ABC2E872B231E3D00033B90 /* reference-test.h */, - 6AB4D99B2B1645F900A686F2 /* montgomery-test.h */, + 6ABC2E872B231E3D00033B90 /* gpuv-ref.h */, + 6AC553272B2E17C800046AB7 /* gpuv-montg.h */, ); name = Headers; sourceTree = "<group>"; @@ -126,30 +116,17 @@ path = "lib-gpu-generate"; sourceTree = "<group>"; }; - 6AC553262B2E175500046AB7 /* montmodmult */ = { - isa = PBXGroup; - children = ( - 6AC553242B2E174900046AB7 /* montmodmult.cl */, - 6AC553272B2E17C800046AB7 /* montmodmult.h */, - 6AC553282B2E17C800046AB7 /* montmodmult.c */, - ); - name = montmodmult; - sourceTree = "<group>"; - }; C3770EF10E6F10BB009A5A77 /* Sources */ = { isa = PBXGroup; children = ( 6A984F162AC5B18A00F530FD /* Headers */, - 6A8A795E2A89672700116D7D /* verify.cl */, - 6A36F8882B0F938E00AB772D /* montgomery.cl */, - 6AF748792ADADEBD00D58E08 /* lib-gpu-verify.c */, - 6AF7487F2ADADF4500D58E08 /* rsa-test.c */, - 6AC5532A2B2E885200046AB7 /* gmp.c */, - 6A7914CD2B0CF320001EDCC1 /* montgomery.c */, - 6AB4D99C2B1645F900A686F2 /* montgomery-test.c */, + 6A8A795E2A89672700116D7D /* gpuv.cl */, + 6AC553242B2E174900046AB7 /* gpuv-montg.cl */, 6ABC2E832B231DFF00033B90 /* util.c */, - 6ABC2E862B231E3D00033B90 /* reference-test.c */, - 6AC553262B2E175500046AB7 /* montmodmult */, + 6AF748792ADADEBD00D58E08 /* lib-gpu-verify.c */, + 6AF7487F2ADADF4500D58E08 /* gpuv.c */, + 6ABC2E862B231E3D00033B90 /* gpuv-ref.c */, + 6AC553282B2E17C800046AB7 /* gpuv-montg.c */, ); name = Sources; sourceTree = "<group>"; @@ -239,14 +216,13 @@ isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; files = ( - 6ABC2E882B231E3D00033B90 /* reference-test.c in Sources */, - 6AC553252B2E174900046AB7 /* montmodmult.cl in Sources */, - 6AC553292B2E17C800046AB7 /* montmodmult.c in Sources */, + 6ABC2E882B231E3D00033B90 /* gpuv-ref.c in Sources */, + 6AC553252B2E174900046AB7 /* gpuv-montg.cl in Sources */, + 6AC553292B2E17C800046AB7 /* gpuv-montg.c in Sources */, 6AF7487A2ADADEBD00D58E08 /* lib-gpu-verify.c in Sources */, - 6A8A795F2A89672700116D7D /* verify.cl in Sources */, + 6A8A795F2A89672700116D7D /* gpuv.cl in Sources */, 6ABC2E842B231DFF00033B90 /* util.c in Sources */, - 6AF748832ADADF4500D58E08 /* rsa-test.c in Sources */, - 6A36F8892B0F938E00AB772D /* montgomery.cl in Sources */, + 6AF748832ADADF4500D58E08 /* gpuv.c in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcuserdata/cedriczwahlen.xcuserdatad/UserInterfaceState.xcuserstate b/xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcuserdata/cedriczwahlen.xcuserdatad/UserInterfaceState.xcuserstate Binary files differ. diff --git a/xcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist b/xcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist @@ -1199,9 +1199,9 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "607" - endingLineNumber = "607" - landmarkName = "rsa_tests()" + startingLineNumber = "594" + endingLineNumber = "594" + landmarkName = "gpuv_test()" landmarkType = "9"> <Locations> <Location @@ -1427,9 +1427,9 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "631" - endingLineNumber = "631" - landmarkName = "rsa_tests()" + startingLineNumber = "618" + endingLineNumber = "618" + landmarkName = "gpuv_test()" landmarkType = "9"> <Locations> <Location @@ -1520,9 +1520,9 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "633" - endingLineNumber = "633" - landmarkName = "rsa_tests()" + startingLineNumber = "620" + endingLineNumber = "620" + landmarkName = "gpuv_test()" landmarkType = "9"> </BreakpointContent> </BreakpointProxy> @@ -2518,9 +2518,9 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "638" - endingLineNumber = "638" - landmarkName = "rsa_tests()" + startingLineNumber = "625" + endingLineNumber = "625" + landmarkName = "gpuv_test()" landmarkType = "9"> <Locations> <Location @@ -2626,9 +2626,9 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "570" - endingLineNumber = "570" - landmarkName = "rsa_tests()" + startingLineNumber = "557" + endingLineNumber = "557" + landmarkName = "gpuv_test()" landmarkType = "9"> <Locations> <Location @@ -2721,9 +2721,9 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "636" - endingLineNumber = "636" - landmarkName = "rsa_tests()" + startingLineNumber = "623" + endingLineNumber = "623" + landmarkName = "gpuv_test()" landmarkType = "9"> <Locations> <Location @@ -2784,9 +2784,9 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "604" - endingLineNumber = "604" - landmarkName = "rsa_tests()" + startingLineNumber = "591" + endingLineNumber = "591" + landmarkName = "gpuv_test()" landmarkType = "9"> <Locations> <Location @@ -2847,9 +2847,9 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "596" - endingLineNumber = "596" - landmarkName = "rsa_tests()" + startingLineNumber = "583" + endingLineNumber = "583" + landmarkName = "gpuv_test()" landmarkType = "9"> </BreakpointContent> </BreakpointProxy> @@ -2863,9 +2863,9 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "629" - endingLineNumber = "629" - landmarkName = "rsa_tests()" + startingLineNumber = "616" + endingLineNumber = "616" + landmarkName = "gpuv_test()" landmarkType = "9"> <Locations> <Location @@ -2911,9 +2911,9 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "583" - endingLineNumber = "583" - landmarkName = "rsa_tests()" + startingLineNumber = "570" + endingLineNumber = "570" + landmarkName = "gpuv_test()" landmarkType = "9"> </BreakpointContent> </BreakpointProxy> @@ -2927,9 +2927,9 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "624" - endingLineNumber = "624" - landmarkName = "rsa_tests()" + startingLineNumber = "611" + endingLineNumber = "611" + landmarkName = "gpuv_test()" landmarkType = "9"> </BreakpointContent> </BreakpointProxy> @@ -2943,8 +2943,8 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "374" - endingLineNumber = "374" + startingLineNumber = "370" + endingLineNumber = "370" landmarkName = "opencl_prepare(info, state, bases, b_len, exponents, e_len, moduli, m_len, signatures, s_len, pks, n)" landmarkType = "9"> </BreakpointContent> @@ -3007,9 +3007,9 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "618" - endingLineNumber = "618" - landmarkName = "rsa_tests()" + startingLineNumber = "605" + endingLineNumber = "605" + landmarkName = "gpuv_test()" landmarkType = "9"> </BreakpointContent> </BreakpointProxy> @@ -3023,9 +3023,9 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "531" - endingLineNumber = "531" - landmarkName = "gpu_execute(info, state, bases, b_len, exponents, e_len, moduli, m_len, signatures, s_len, pks, n)" + startingLineNumber = "520" + endingLineNumber = "520" + landmarkName = "gpuv_execute(info, state, bases, b_len, exponents, e_len, moduli, m_len, signatures, s_len, pks, n)" landmarkType = "9"> </BreakpointContent> </BreakpointProxy> @@ -3039,8 +3039,8 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "449" - endingLineNumber = "449" + startingLineNumber = "438" + endingLineNumber = "438" landmarkName = "opencl_results(info, state, timed)" landmarkType = "9"> <Locations> @@ -3147,8 +3147,8 @@ filePath = "../source/rsa-test.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "390" - endingLineNumber = "390" + startingLineNumber = "386" + endingLineNumber = "386" landmarkName = "opencl_exec_kernel(info, state)" landmarkType = "9"> </BreakpointContent> @@ -3163,8 +3163,8 @@ filePath = "../source/montmodmult.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "22" - endingLineNumber = "22" + startingLineNumber = "21" + endingLineNumber = "21" landmarkName = "montmodmult_pairs_from_files(x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)" landmarkType = "9"> </BreakpointContent> @@ -3179,8 +3179,8 @@ filePath = "../source/montmodmult.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "93" - endingLineNumber = "93" + startingLineNumber = "91" + endingLineNumber = "91" landmarkName = "montmodmult_pairs_from_files(x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)" landmarkType = "9"> <Locations> @@ -3227,9 +3227,9 @@ filePath = "../source/montmodmult.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "596" - endingLineNumber = "596" - landmarkName = "mont_modmult_tests()" + startingLineNumber = "591" + endingLineNumber = "591" + landmarkName = "gpuv_test_montg()" landmarkType = "9"> <Locations> <Location @@ -3275,9 +3275,9 @@ filePath = "../source/montmodmult.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "646" - endingLineNumber = "646" - landmarkName = "mont_modmult_tests()" + startingLineNumber = "641" + endingLineNumber = "641" + landmarkName = "gpuv_test_montg()" landmarkType = "9"> <Locations> <Location @@ -3398,8 +3398,8 @@ filePath = "../source/montmodmult.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "350" - endingLineNumber = "350" + startingLineNumber = "346" + endingLineNumber = "346" landmarkName = "modmult_opencl_prepare(info, state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)" landmarkType = "9"> <Locations> @@ -3446,8 +3446,8 @@ filePath = "../source/montmodmult.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "340" - endingLineNumber = "340" + startingLineNumber = "336" + endingLineNumber = "336" landmarkName = "modmult_opencl_prepare(info, state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)" landmarkType = "9"> </BreakpointContent> @@ -3462,8 +3462,8 @@ filePath = "../source/montmodmult.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "477" - endingLineNumber = "477" + startingLineNumber = "473" + endingLineNumber = "473" landmarkName = "modmult_opencl_results(info, state, timed, msg_buf, r_1_buf, n_buf, s_buf, n)" landmarkType = "9"> <Locations> @@ -3525,8 +3525,8 @@ filePath = "../source/montmodmult.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "352" - endingLineNumber = "352" + startingLineNumber = "348" + endingLineNumber = "348" landmarkName = "modmult_opencl_prepare(info, state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)" landmarkType = "9"> <Locations> @@ -3573,8 +3573,8 @@ filePath = "../source/montmodmult.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "428" - endingLineNumber = "428" + startingLineNumber = "424" + endingLineNumber = "424" landmarkName = "modmult_opencl_exec_kernel(info, state)" landmarkType = "9"> <Locations> @@ -3621,8 +3621,8 @@ filePath = "../source/montmodmult.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "425" - endingLineNumber = "425" + startingLineNumber = "421" + endingLineNumber = "421" landmarkName = "modmult_opencl_exec_kernel(info, state)" landmarkType = "9"> <Locations> @@ -3714,8 +3714,8 @@ filePath = "../source/montmodmult.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "73" - endingLineNumber = "73" + startingLineNumber = "71" + endingLineNumber = "71" landmarkName = "montmodmult_pairs_from_files(x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)" landmarkType = "9"> </BreakpointContent> @@ -3730,9 +3730,9 @@ filePath = "../source/montmodmult.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "571" - endingLineNumber = "571" - landmarkName = "modmult_gpu_execute(info, state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)" + startingLineNumber = "566" + endingLineNumber = "566" + landmarkName = "gpuv_execute_montg(info, state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)" landmarkType = "9"> <Locations> <Location @@ -3778,8 +3778,8 @@ filePath = "../source/montmodmult.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "372" - endingLineNumber = "372" + startingLineNumber = "368" + endingLineNumber = "368" landmarkName = "modmult_opencl_prepare(info, state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)" landmarkType = "9"> </BreakpointContent> @@ -3794,8 +3794,8 @@ filePath = "../source/montmodmult.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "460" - endingLineNumber = "460" + startingLineNumber = "456" + endingLineNumber = "456" landmarkName = "modmult_opencl_results(info, state, timed, msg_buf, r_1_buf, n_buf, s_buf, n)" landmarkType = "9"> <Locations> @@ -3903,8 +3903,8 @@ filePath = "../source/montmodmult.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "89" - endingLineNumber = "89" + startingLineNumber = "87" + endingLineNumber = "87" landmarkName = "montmodmult_pairs_from_files(x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)" landmarkType = "9"> </BreakpointContent> @@ -3919,8 +3919,8 @@ filePath = "../source/montmodmult.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "82" - endingLineNumber = "82" + startingLineNumber = "80" + endingLineNumber = "80" landmarkName = "montmodmult_pairs_from_files(x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)" landmarkType = "9"> </BreakpointContent> @@ -3935,9 +3935,9 @@ filePath = "../source/montmodmult.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "639" - endingLineNumber = "639" - landmarkName = "mont_modmult_tests()" + startingLineNumber = "634" + endingLineNumber = "634" + landmarkName = "gpuv_test_montg()" landmarkType = "9"> </BreakpointContent> </BreakpointProxy> @@ -3951,9 +3951,9 @@ filePath = "../source/montmodmult.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "600" - endingLineNumber = "600" - landmarkName = "mont_modmult_tests()" + startingLineNumber = "595" + endingLineNumber = "595" + landmarkName = "gpuv_test_montg()" landmarkType = "9"> </BreakpointContent> </BreakpointProxy> @@ -3969,7 +3969,7 @@ endingColumnNumber = "9223372036854775807" startingLineNumber = "33" endingLineNumber = "33" - landmarkName = "number_of_pairs()" + landmarkName = "gpuv_estimate_pairs()" landmarkType = "9"> </BreakpointContent> </BreakpointProxy> @@ -3983,8 +3983,8 @@ filePath = "../source/montmodmult.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "421" - endingLineNumber = "421" + startingLineNumber = "417" + endingLineNumber = "417" landmarkName = "modmult_opencl_exec_kernel(info, state)" landmarkType = "9"> </BreakpointContent> @@ -3993,14 +3993,14 @@ BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> <BreakpointContent uuid = "2E10F90D-6276-4B90-B2B1-8111E48FD074" - shouldBeEnabled = "Yes" + shouldBeEnabled = "No" ignoreCount = "0" continueAfterRunningActions = "No" filePath = "../source/montmodmult.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "318" - endingLineNumber = "318" + startingLineNumber = "315" + endingLineNumber = "315" landmarkName = "modmult_opencl_convert(state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)" landmarkType = "9"> <Locations> @@ -4154,6 +4154,21 @@ endingLineNumber = "312" offsetFromSymbolStart = "940"> </Location> + <Location + uuid = "2E10F90D-6276-4B90-B2B1-8111E48FD074 - 6fe7da94a4c07cf2" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "modmult_opencl_convert" + moduleName = "lib-gpu-verify" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/libgpuverify/source/montmodmult.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "315" + endingLineNumber = "315" + offsetFromSymbolStart = "954"> + </Location> </Locations> </BreakpointContent> </BreakpointProxy> @@ -4167,8 +4182,8 @@ filePath = "../source/montmodmult.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "270" - endingLineNumber = "270" + startingLineNumber = "267" + endingLineNumber = "267" landmarkName = "modmult_opencl_convert(state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)" landmarkType = "9"> </BreakpointContent> @@ -4183,8 +4198,8 @@ filePath = "../source/montmodmult.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "283" - endingLineNumber = "283" + startingLineNumber = "280" + endingLineNumber = "280" landmarkName = "modmult_opencl_convert(state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)" landmarkType = "9"> <Locations> @@ -4231,8 +4246,8 @@ filePath = "../source/montmodmult.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "290" - endingLineNumber = "290" + startingLineNumber = "287" + endingLineNumber = "287" landmarkName = "modmult_opencl_convert(state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)" landmarkType = "9"> </BreakpointContent> @@ -4241,14 +4256,14 @@ BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> <BreakpointContent uuid = "2A4E0B7E-B255-4271-B259-62B34ABE9D3E" - shouldBeEnabled = "Yes" + shouldBeEnabled = "No" ignoreCount = "0" continueAfterRunningActions = "No" filePath = "../source/montmodmult.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "293" - endingLineNumber = "293" + startingLineNumber = "290" + endingLineNumber = "290" landmarkName = "modmult_opencl_convert(state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)" landmarkType = "9"> <Locations> @@ -4357,6 +4372,21 @@ endingLineNumber = "288" offsetFromSymbolStart = "477"> </Location> + <Location + uuid = "2A4E0B7E-B255-4271-B259-62B34ABE9D3E - 6fe7da94a4c07bca" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "modmult_opencl_convert" + moduleName = "lib-gpu-verify" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/libgpuverify/source/montmodmult.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "291" + endingLineNumber = "291" + offsetFromSymbolStart = "491"> + </Location> </Locations> </BreakpointContent> </BreakpointProxy> @@ -4370,8 +4400,8 @@ filePath = "../source/montmodmult.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "310" - endingLineNumber = "310" + startingLineNumber = "307" + endingLineNumber = "307" landmarkName = "modmult_opencl_convert(state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)" landmarkType = "9"> <Locations> @@ -4418,8 +4448,8 @@ filePath = "../source/montmodmult.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "305" - endingLineNumber = "305" + startingLineNumber = "302" + endingLineNumber = "302" landmarkName = "modmult_opencl_convert(state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)" landmarkType = "9"> </BreakpointContent> @@ -4434,8 +4464,8 @@ filePath = "../source/montmodmult.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "311" - endingLineNumber = "311" + startingLineNumber = "308" + endingLineNumber = "308" landmarkName = "modmult_opencl_convert(state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)" landmarkType = "9"> <Locations> @@ -4482,8 +4512,8 @@ filePath = "../source/montmodmult.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "312" - endingLineNumber = "312" + startingLineNumber = "309" + endingLineNumber = "309" landmarkName = "modmult_opencl_convert(state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, s_buf, pks, n)" landmarkType = "9"> <Locations> @@ -4530,8 +4560,8 @@ filePath = "../source/montmodmult.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "155" - endingLineNumber = "155" + startingLineNumber = "153" + endingLineNumber = "153" landmarkName = "convert_thread(vargp)" landmarkType = "9"> <Locations> @@ -4578,8 +4608,8 @@ filePath = "../source/montmodmult.c" startingColumnNumber = "9223372036854775807" endingColumnNumber = "9223372036854775807" - startingLineNumber = "194" - endingLineNumber = "194" + startingLineNumber = "192" + endingLineNumber = "192" landmarkName = "convert_thread(vargp)" landmarkType = "9"> </BreakpointContent> diff --git a/xcode/montgomery.cl b/xcode/montgomery.cl @@ -1,2954 +0,0 @@ -/* this kernel contains code of the mini-gmp, a minimalistic implementation of a GNU GMP subset. - - Contributed to the GNU project by Niels Möller - Additional functionalities and improvements by Marco Bodrato. - - Changes and additions for this kernel by Cedric Zwahlen - -Copyright 1991-1997, 1999-2022 Free Software Foundation, Inc. - -This file contains code that is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. - - */ - - -#ifndef MINI_GMP_LIMB_TYPE -#define MINI_GMP_LIMB_TYPE long -#endif - - -#define ULONG_MAX_gpu 0xFFFFFFFFUL - -#define GMP_LIMB_BITS (sizeof(mp_limb_t) * CHAR_BIT) - -#define GMP_LIMB_MAX ((mp_limb_t) ~ (mp_limb_t) 0) -#define GMP_LIMB_HIGHBIT ((mp_limb_t) 1 << (GMP_LIMB_BITS - 1)) - -#define GMP_HLIMB_BIT ((mp_limb_t) 1 << (GMP_LIMB_BITS / 2)) -#define GMP_LLIMB_MASK (GMP_HLIMB_BIT - 1) - -#define GMP_ULONG_BITS (sizeof(unsigned long) * CHAR_BIT) -#define GMP_ULONG_HIGHBIT ((unsigned long) 1 << (GMP_ULONG_BITS - 1)) - -#define GMP_ABS(x) ((x) >= 0 ? (x) : -(x)) -#define GMP_NEG_CAST(T,x) (-((T)((x) + 1) - 1)) - -#define GMP_MIN(a, b) ((a) < (b) ? (a) : (b)) -#define GMP_MAX(a, b) ((a) > (b) ? (a) : (b)) - -#define GMP_CMP(a,b) (((a) > (b)) - ((a) < (b))) - -#define GMP_MPN_OVERLAP_P(xp, xsize, yp, ysize) \ - ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp)) - - -#define gmp_clz(count, x) do { \ - mp_limb_t __clz_x = (x); \ - unsigned __clz_c = 0; \ - int LOCAL_SHIFT_BITS = 8; \ - if (GMP_LIMB_BITS > LOCAL_SHIFT_BITS) \ - for (; \ - (__clz_x & ((mp_limb_t) 0xff << (GMP_LIMB_BITS - 8))) == 0; \ - __clz_c += 8) \ - { __clz_x <<= LOCAL_SHIFT_BITS; } \ - for (; (__clz_x & GMP_LIMB_HIGHBIT) == 0; __clz_c++) \ - __clz_x <<= 1; \ - (count) = __clz_c; \ - } while (0) - -#define gmp_umullo_limb(u, v) \ - ((sizeof(mp_limb_t) >= sizeof(int)) ? (u)*(v) : (unsigned int)(u) * (v)) - -#define gmp_umul_ppmm(w1, w0, u, v) \ - do { \ - int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS; \ - if (sizeof(unsigned int) * CHAR_BIT >= 2 * GMP_LIMB_BITS) \ - { \ - unsigned int __ww = (unsigned int) (u) * (v); \ - w0 = (mp_limb_t) __ww; \ - w1 = (mp_limb_t) (__ww >> LOCAL_GMP_LIMB_BITS); \ - } \ - else if (GMP_ULONG_BITS >= 2 * GMP_LIMB_BITS) \ - { \ - unsigned long int __ww = (unsigned long int) (u) * (v); \ - w0 = (mp_limb_t) __ww; \ - w1 = (mp_limb_t) (__ww >> LOCAL_GMP_LIMB_BITS); \ - } \ - else { \ - mp_limb_t __x0, __x1, __x2, __x3; \ - unsigned __ul, __vl, __uh, __vh; \ - mp_limb_t __u = (u), __v = (v); \ - assert (sizeof (unsigned) * 2 >= sizeof (mp_limb_t)); \ - \ - __ul = __u & GMP_LLIMB_MASK; \ - __uh = __u >> (GMP_LIMB_BITS / 2); \ - __vl = __v & GMP_LLIMB_MASK; \ - __vh = __v >> (GMP_LIMB_BITS / 2); \ - \ - __x0 = (mp_limb_t) __ul * __vl; \ - __x1 = (mp_limb_t) __ul * __vh; \ - __x2 = (mp_limb_t) __uh * __vl; \ - __x3 = (mp_limb_t) __uh * __vh; \ - \ - __x1 += __x0 >> (GMP_LIMB_BITS / 2);/* this can't give carry */ \ - __x1 += __x2; /* but this indeed can */ \ - if (__x1 < __x2) /* did we get it? */ \ - __x3 += GMP_HLIMB_BIT; /* yes, add it in the proper pos. */ \ - \ - (w1) = __x3 + (__x1 >> (GMP_LIMB_BITS / 2)); \ - (w0) = (__x1 << (GMP_LIMB_BITS / 2)) + (__x0 & GMP_LLIMB_MASK); \ - } \ - } while (0) - -#define gmp_assert_nocarry(x) do { \ - mp_limb_t __cy = (x); \ - assert (__cy == 0); \ - (void) (__cy); \ - } while (0) - -#define gmp_add_ssaaaa(sh, sl, ah, al, bh, bl) \ - do { \ - mp_limb_t __x; \ - __x = (al) + (bl); \ - (sh) = (ah) + (bh) + (__x < (al)); \ - (sl) = __x; \ - } while (0) - -#define gmp_sub_ddmmss(sh, sl, ah, al, bh, bl) \ - do { \ - mp_limb_t __x; \ - __x = (al) - (bl); \ - (sh) = (ah) - (bh) - ((al) < (bl)); \ - (sl) = __x; \ - } while (0) - - -#define gmp_udiv_qrnnd_preinv(q, r, nh, nl, d, di) \ - do { \ - mp_limb_t _qh, _ql, _r, _mask; \ - gmp_umul_ppmm (_qh, _ql, (nh), (di)); \ - gmp_add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \ - _r = (nl) - gmp_umullo_limb (_qh, (d)); \ - _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \ - _qh += _mask; \ - _r += _mask & (d); \ - if (_r >= (d)) \ - { \ - _r -= (d); \ - _qh++; \ - } \ - \ - (r) = _r; \ - (q) = _qh; \ - } while (0) - -#define gmp_udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv) \ - do { \ - mp_limb_t _q0, _t1, _t0, _mask; \ - gmp_umul_ppmm ((q), _q0, (n2), (dinv)); \ - gmp_add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1)); \ - \ - /* Compute the two most significant limbs of n - q'd */ \ - (r1) = (n1) - gmp_umullo_limb ((d1), (q)); \ - gmp_sub_ddmmss ((r1), (r0), (r1), (n0), (d1), (d0)); \ - gmp_umul_ppmm (_t1, _t0, (d0), (q)); \ - gmp_sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0); \ - (q)++; \ - \ - /* Conditionally adjust q and the remainders */ \ - _mask = - (mp_limb_t) ((r1) >= _q0); \ - (q) += _mask; \ - gmp_add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0)); \ - if ((r1) >= (d1)) \ - { \ - if ((r1) > (d1) || (r0) >= (d0)) \ - { \ - (q)++; \ - gmp_sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0)); \ - } \ - } \ - } while (0) - -#define gmp_ctz(count, x) do { \ - mp_limb_t __ctz_x = (x); \ - unsigned __ctz_c = 0; \ - gmp_clz (__ctz_c, __ctz_x & - __ctz_x); \ - (count) = GMP_LIMB_BITS - 1 - __ctz_c; \ - } while (0) - - -#define MPZ_SRCPTR_SWAP(x, y) \ - do { \ - mpz_srcptr __mpz_srcptr_swap__tmp = (x); \ - (x) = (y); \ - (y) = __mpz_srcptr_swap__tmp; \ - } while (0) - -#define MP_SIZE_T_SWAP(x, y) \ - do { \ - mp_size_t __mp_size_t_swap__tmp = (x); \ - (x) = (y); \ - (y) = __mp_size_t_swap__tmp; \ - } while (0) - -#define MPZ_PTR_SWAP(x, y) \ - do { \ - mpz_ptr __mpz_ptr_swap__tmp = (x); \ - (x) = (y); \ - (y) = __mpz_ptr_swap__tmp; \ - } while (0) - -#define MP_BITCNT_T_SWAP(x,y) \ - do { \ - mp_bitcnt_t __mp_bitcnt_t_swap__tmp = (x); \ - (x) = (y); \ - (y) = __mp_bitcnt_t_swap__tmp; \ - } while (0) - - -#define assert(x){if((x)==0){printf((char __constant *)"assert reached\n");}} - -#define NULL ((void*)0) - -typedef unsigned MINI_GMP_LIMB_TYPE mp_limb_t; -typedef long mp_size_t; -typedef unsigned long mp_bitcnt_t; - -typedef mp_limb_t *mp_ptr; -typedef const mp_limb_t *mp_srcptr; - -typedef struct -{ - int _mp_alloc; /* Number of *limbs* allocated and pointed - to by the _mp_d field. */ - int _mp_size; /* abs(_mp_size) is the number of limbs the - last field points to. If _mp_size is - negative this is a negative number. */ - //mp_limb_t *_mp_d; /* Pointer to the limbs. */ - - mp_limb_t _mp_d[256]; - -} __mpz_struct; - -typedef __mpz_struct mpz_t[1]; - -typedef __mpz_struct *mpz_ptr; - -typedef const __mpz_struct *mpz_srcptr; - -struct gmp_div_inverse -{ - /* Normalization shift count. */ - unsigned shift; - /* Normalized divisor (d0 unused for mpn_div_qr_1) */ - mp_limb_t d1, d0; - /* Inverse, for 2/1 or 3/2. */ - mp_limb_t di; -}; - - -struct mpn_base_info -{ - /* bb is the largest power of the base which fits in one limb, and - exp is the corresponding exponent. */ - unsigned exp; - mp_limb_t bb; -}; - - -enum mpz_div_round_mode { GMP_DIV_FLOOR, GMP_DIV_CEIL, GMP_DIV_TRUNC }; - -void mpz_init (mpz_t r); -void mpn_copyi (mp_ptr d, mp_srcptr s, mp_size_t n); -void mpz_set (mpz_t r, const mpz_t x); -void -mpz_set (mpz_t r, const mpz_t x); -void -mpz_set_ui (mpz_t r, unsigned long int x); -void -mpz_set_si (mpz_t r, signed long int x); -void -mpz_init_set_si (mpz_t r, signed long int x); -void -mpz_init_set (mpz_t r, const mpz_t x); -void -mpz_init2 (mpz_t r, mp_bitcnt_t bits); -void -mpz_init_set_ui (mpz_t r, unsigned long int x); -void -mpz_clear (mpz_t r); -void -gmp_die (const char *msg); - - -mp_size_t mpn_normalized_size (mp_srcptr xp, mp_size_t n); -void -mpz_add_ui (mpz_t r, const mpz_t a, unsigned long b); -void -mpz_ui_sub (mpz_t r, unsigned long a, const mpz_t b); -void -mpz_sub_ui (mpz_t r, const mpz_t a, unsigned long b); -int -mpn_absfits_ulong_p (mp_srcptr up, mp_size_t un); -unsigned long int -mpz_get_ui (const mpz_t u); -int -mpz_cmpabs_ui (const mpz_t u, unsigned long v); -mp_limb_t -mpn_sub_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b); -mp_limb_t -mpn_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n); -mp_limb_t -mpn_sub (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn); -mp_limb_t -mpn_invert_3by2 (mp_limb_t u1, mp_limb_t u0); -int -mpz_div_qr (mpz_t q, mpz_t r, - const mpz_t n, const mpz_t d, enum mpz_div_round_mode mode); -void -mpz_mod (mpz_t r, const mpz_t n, const mpz_t d); -void -mpn_div_qr_1_invert (struct gmp_div_inverse *inv, mp_limb_t d); - -void -mpn_div_qr_2_invert (struct gmp_div_inverse *inv, - mp_limb_t d1, mp_limb_t d0); - -void -mpn_div_qr_invert (struct gmp_div_inverse *inv, - mp_srcptr dp, mp_size_t dn); -int -mpz_cmp_ui (const mpz_t u, unsigned long v); -int -mpn_cmp (mp_srcptr ap, mp_srcptr bp, mp_size_t n); -mp_limb_t -mpn_lshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt); -mp_limb_t -mpn_rshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt); -int -mpz_invert (mpz_t r, const mpz_t u, const mpz_t m); -mp_limb_t -mpn_div_qr_1_preinv (mp_ptr qp, mp_srcptr np, mp_size_t nn, - const struct gmp_div_inverse *inv); -mp_limb_t -mpn_add_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n); -void -mpn_div_qr_2_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn, - const struct gmp_div_inverse *inv); -mp_limb_t -mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl); -void -mpn_div_qr_pi1 (mp_ptr qp, - mp_ptr np, mp_size_t nn, mp_limb_t n1, - mp_srcptr dp, mp_size_t dn, - mp_limb_t dinv); -void -mpn_div_qr_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn, - mp_srcptr dp, mp_size_t dn, - const struct gmp_div_inverse *inv); -void -mpz_powm (mpz_t r, const mpz_t b, const mpz_t e, const mpz_t m); -int -mpn_cmp4 (mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn); -mp_size_t -mpz_abs_sub (mpz_t r, const mpz_t a, const mpz_t b); -mp_limb_t -mpn_add_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b); -mp_limb_t -mpn_add (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn); -mp_size_t -mpz_abs_add (mpz_t r, const mpz_t a, const mpz_t b); -void -mpz_sub (mpz_t r, const mpz_t a, const mpz_t b); -mp_limb_t -mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl); -mp_limb_t -mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl); -mp_limb_t -mpn_mul (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn); -void -mpz_mul (mpz_t r, const mpz_t u, const mpz_t v); -void -mpn_copyd (mp_ptr d, mp_srcptr s, mp_size_t n); -void -mpn_zero (mp_ptr rp, mp_size_t n); -void -mpz_mul_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bits); -int -mpn_zero_p(mp_srcptr rp, mp_size_t n); -void -mpz_div_q_2exp (mpz_t q, const mpz_t u, mp_bitcnt_t bit_index, - enum mpz_div_round_mode mode); -void -mpz_tdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt); -int -mpz_cmp (const mpz_t a, const mpz_t b); -void -mpz_add (mpz_t r, const mpz_t a, const mpz_t b); -int -mpz_tstbit (const mpz_t d, mp_bitcnt_t bit_index); -mp_bitcnt_t -mpn_limb_size_in_base_2 (mp_limb_t u); -size_t -mpz_sizeinbase (const mpz_t u, int base); -int -mpz_sgn (const mpz_t u); -mp_bitcnt_t -mpn_common_scan (mp_limb_t limb, mp_size_t i, mp_srcptr up, mp_size_t un, - mp_limb_t ux); -mp_bitcnt_t -mpn_scan1 (mp_srcptr ptr, mp_bitcnt_t bit); -mp_bitcnt_t -mpz_scan1 (mpz_t u, mp_bitcnt_t starting_bit); -mp_bitcnt_t -mpz_make_odd (mpz_t r); -void -mpz_tdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d); -void -mpz_abs_add_bit (mpz_t d, mp_bitcnt_t bit_index); -void -mpz_abs_sub_bit (mpz_t d, mp_bitcnt_t bit_index); -void -mpz_setbit (mpz_t d, mp_bitcnt_t bit_index); -void -mpz_divexact (mpz_t q, const mpz_t n, const mpz_t d); -int -mpz_cmpabs (const mpz_t u, const mpz_t v); -void -mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v); -void -mpz_addmul_ui (mpz_t r, const mpz_t u, unsigned long int v); - -unsigned -mpn_base_power_of_two_p (unsigned b); -void -mpn_get_base_info (struct mpn_base_info *info, mp_limb_t b); -int isspace_gpu(unsigned char c); -int strlen_c(__global char *c); -mp_size_t mpn_set_str_bits (mp_ptr rp, const unsigned char *sp, size_t sn, - unsigned bits); -mp_size_t -mpn_set_str_other (mp_ptr rp, const unsigned char *sp, size_t sn, - mp_limb_t b, const struct mpn_base_info *info); -int -mpz_set_str (mpz_t r, __global char *sp, int base); -int -mpz_init_set_str (mpz_t r, __global char *sp, int base); - -//void mpz_sub (mpz_t r, const mpz_t a, const mpz_t b); -////void mpz_add (mpz_t, const mpz_t, const mpz_t); - -void mpz_abs (mpz_t, const mpz_t); - -void mpz_neg (mpz_t, const mpz_t); -void mpz_swap (mpz_t, mpz_t); -//void mpz_mod (mpz_t, const mpz_t, const mpz_t); -// -////int mpz_sgn (const mpz_t); -// -////void mpz_mul (mpz_t, const mpz_t, const mpz_t); -//void mpz_mul_2exp (mpz_t, const mpz_t, mp_bitcnt_t); -// -//void mpz_gcdext (mpz_t, mpz_t, mpz_t, const mpz_t, const mpz_t); -////void mpz_powm (mpz_t, const mpz_t, const mpz_t, const mpz_t); -// -void mpz_addmul (mpz_t, const mpz_t, const mpz_t); -// -//int mpz_tstbit (const mpz_t, mp_bitcnt_t); -// -//int mpz_cmp_ui (const mpz_t u, unsigned long v); -// -void mpn_div_qr (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn); -// -//mp_limb_t mpn_invert_3by2 (mp_limb_t, mp_limb_t); - -void -mpz_set_lg (unsigned long *r, __global unsigned long *x); - -#define mpn_invert_limb(x) mpn_invert_3by2 ((x), 0) - -#define MPZ_REALLOC(z,n) (z)->_mp_d - -void -mpz_init (mpz_t r) -{ - const mp_limb_t dummy_limb = GMP_LIMB_MAX & 0xc1a0; - - r->_mp_alloc = 0; - r->_mp_size = 0; - - //memset(r->_mp_d, 0, 256); - - for (int i = 0; i < 256; i++) { - r->_mp_d[i] = 0; - } - - // r->_mp_d = (mp_ptr) &dummy_limb; -} - -void -mpn_copyi (mp_ptr d, mp_srcptr s, mp_size_t n) -{ - mp_size_t i; - for (i = 0; i < n; i++) - d[i] = s[i]; -} - -void -mpz_set (mpz_t r, const mpz_t x) -{ - /* Allow the NOP r == x */ - if (r != x) - { - mp_size_t n; - mp_ptr rp; - - n = GMP_ABS (x->_mp_size); - rp = MPZ_REALLOC (r, n); - - mpn_copyi (rp, x->_mp_d, n); - r->_mp_size = x->_mp_size; - } -} - -void -mpz_set_lg (unsigned long *r, __global unsigned long *x) -{ - - - - - // event_t wait; - - //wait = async_work_group_strided_copy(r,x, 256 + 2, 4,0); - - - - //wait_group_events(0,&wait); - - r[0] = x[0]; - r[1] = x[1]; - - for (int i = 2; i < 256; i++) { - - r[i] = x[i]; - - } - - //printf((__constant char *)"%i\n",r->_mp_size); - - // memcpy(r->_mp_d,(*(mpz_t *)x)->_mp_d,256); - -} - - -void -mpz_set_ui (mpz_t r, unsigned long int x) -{ - if (x > 0) - { - r->_mp_size = 1; - MPZ_REALLOC (r, 1)[0] = x; - if (GMP_LIMB_BITS < GMP_ULONG_BITS) - { - int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS; - while (x >>= LOCAL_GMP_LIMB_BITS) - { - ++ r->_mp_size; - MPZ_REALLOC (r, r->_mp_size)[r->_mp_size - 1] = x; - } - } - } - else - r->_mp_size = 0; -} - - -void -mpz_neg (mpz_t r, const mpz_t u) -{ - mpz_set (r, u); - r->_mp_size = -r->_mp_size; -} - - -void -mpz_set_si (mpz_t r, signed long int x) -{ - if (x >= 0) - mpz_set_ui (r, x); - else /* (x < 0) */ - if (GMP_LIMB_BITS < GMP_ULONG_BITS) - { - mpz_set_ui (r, GMP_NEG_CAST (unsigned long int, x)); - mpz_neg (r, r); - } - else - { - r->_mp_size = -1; - MPZ_REALLOC (r, 1)[0] = GMP_NEG_CAST (unsigned long int, x); - } -} - -void -mpz_init_set_si (mpz_t r, signed long int x) -{ - mpz_init (r); - mpz_set_si (r, x); -} - - -void -mpz_init_set (mpz_t r, const mpz_t x) -{ - mpz_init (r); - mpz_set (r, x); -} - -void -mpz_init2 (mpz_t r, mp_bitcnt_t bits) -{ - mp_size_t rn; - - bits -= (bits != 0); /* Round down, except if 0 */ - rn = 1 + bits / GMP_LIMB_BITS; - - r->_mp_alloc = rn; - r->_mp_size = 0; - // r->_mp_d = gmp_alloc_limbs (rn); -} - -void -mpz_init_set_ui (mpz_t r, unsigned long int x) -{ - mpz_init (r); - mpz_set_ui (r, x); -} - -void -mpz_clear (mpz_t r) -{ - //if (r->_mp_alloc) - //gmp_free_limbs (r->_mp_d, r->_mp_alloc); -} - - -void -gmp_die (const char *msg) -{ - //fprintf (stderr, "%s\n", msg); - //abort(); -} - -mp_size_t mpn_normalized_size (mp_srcptr xp, mp_size_t n) -{ - while (n > 0 && xp[n-1] == 0) - --n; - return n; -} - -void -mpz_add_ui (mpz_t r, const mpz_t a, unsigned long b) -{ - mpz_t bb; - mpz_init_set_ui (bb, b); - mpz_add (r, a, bb); - mpz_clear (bb); -} - -void -mpz_ui_sub (mpz_t r, unsigned long a, const mpz_t b) -{ - mpz_neg (r, b); - mpz_add_ui (r, r, a); -} - - -void -mpz_sub_ui (mpz_t r, const mpz_t a, unsigned long b) -{ - mpz_ui_sub (r, b, a); - mpz_neg (r, r); -} - -int -mpn_absfits_ulong_p (mp_srcptr up, mp_size_t un) -{ - int ulongsize = GMP_ULONG_BITS / GMP_LIMB_BITS; - mp_limb_t ulongrem = 0; - - if (GMP_ULONG_BITS % GMP_LIMB_BITS != 0) - ulongrem = (mp_limb_t) (ULONG_MAX_gpu >> GMP_LIMB_BITS * ulongsize) + 1; - - return un <= ulongsize || (up[ulongsize] < ulongrem && un == ulongsize + 1); -} - -unsigned long int -mpz_get_ui (const mpz_t u) -{ - if (GMP_LIMB_BITS < GMP_ULONG_BITS) - { - int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS; - unsigned long r = 0; - mp_size_t n = GMP_ABS (u->_mp_size); - n = GMP_MIN (n, 1 + (mp_size_t) (GMP_ULONG_BITS - 1) / GMP_LIMB_BITS); - while (--n >= 0) - r = (r << LOCAL_GMP_LIMB_BITS) + u->_mp_d[n]; - return r; - } - - return u->_mp_size == 0 ? 0 : u->_mp_d[0]; -} - -int -mpz_cmpabs_ui (const mpz_t u, unsigned long v) -{ - mp_size_t un = GMP_ABS (u->_mp_size); - - if (! mpn_absfits_ulong_p (u->_mp_d, un)) - return 1; - else - { - unsigned long uu = mpz_get_ui (u); - return GMP_CMP(uu, v); - } -} - -mp_limb_t -mpn_sub_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b) -{ - mp_size_t i; - - assert (n > 0); - - i = 0; - do - { - mp_limb_t a = ap[i]; - /* Carry out */ - mp_limb_t cy = a < b; - rp[i] = a - b; - b = cy; - } - while (++i < n); - - return b; -} - -mp_limb_t -mpn_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n) -{ - mp_size_t i; - mp_limb_t cy; - - for (i = 0, cy = 0; i < n; i++) - { - mp_limb_t a, b; - a = ap[i]; b = bp[i]; - b += cy; - cy = (b < cy); - cy += (a < b); - rp[i] = a - b; - } - return cy; -} - -mp_limb_t -mpn_sub (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn) -{ - mp_limb_t cy; - - assert (an >= bn); - - cy = mpn_sub_n (rp, ap, bp, bn); - if (an > bn) - cy = mpn_sub_1 (rp + bn, ap + bn, an - bn, cy); - return cy; -} - - -mp_limb_t -mpn_invert_3by2 (mp_limb_t u1, mp_limb_t u0) -{ - mp_limb_t r, m; - - { - mp_limb_t p, ql; - unsigned ul, uh, qh; - - assert (sizeof (unsigned) * 2 >= sizeof (mp_limb_t)); - /* For notation, let b denote the half-limb base, so that B = b^2. - Split u1 = b uh + ul. */ - ul = u1 & GMP_LLIMB_MASK; - uh = u1 >> (GMP_LIMB_BITS / 2); - - /* Approximation of the high half of quotient. Differs from the 2/1 - inverse of the half limb uh, since we have already subtracted - u0. */ - qh = (u1 ^ GMP_LIMB_MAX) / uh; - - /* Adjust to get a half-limb 3/2 inverse, i.e., we want - - qh' = floor( (b^3 - 1) / u) - b = floor ((b^3 - b u - 1) / u - = floor( (b (~u) + b-1) / u), - - and the remainder - - r = b (~u) + b-1 - qh (b uh + ul) - = b (~u - qh uh) + b-1 - qh ul - - Subtraction of qh ul may underflow, which implies adjustments. - But by normalization, 2 u >= B > qh ul, so we need to adjust by - at most 2. - */ - - r = ((~u1 - (mp_limb_t) qh * uh) << (GMP_LIMB_BITS / 2)) | GMP_LLIMB_MASK; - - p = (mp_limb_t) qh * ul; - /* Adjustment steps taken from udiv_qrnnd_c */ - if (r < p) - { - qh--; - r += u1; - if (r >= u1) /* i.e. we didn't get carry when adding to r */ - if (r < p) - { - qh--; - r += u1; - } - } - r -= p; - - /* Low half of the quotient is - - ql = floor ( (b r + b-1) / u1). - - This is a 3/2 division (on half-limbs), for which qh is a - suitable inverse. */ - - p = (r >> (GMP_LIMB_BITS / 2)) * qh + r; - /* Unlike full-limb 3/2, we can add 1 without overflow. For this to - work, it is essential that ql is a full mp_limb_t. */ - ql = (p >> (GMP_LIMB_BITS / 2)) + 1; - - /* By the 3/2 trick, we don't need the high half limb. */ - r = (r << (GMP_LIMB_BITS / 2)) + GMP_LLIMB_MASK - ql * u1; - - if (r >= (GMP_LIMB_MAX & (p << (GMP_LIMB_BITS / 2)))) - { - ql--; - r += u1; - } - m = ((mp_limb_t) qh << (GMP_LIMB_BITS / 2)) + ql; - if (r >= u1) - { - m++; - r -= u1; - } - } - - /* Now m is the 2/1 inverse of u1. If u0 > 0, adjust it to become a - 3/2 inverse. */ - if (u0 > 0) - { - mp_limb_t th, tl; - r = ~r; - r += u0; - if (r < u0) - { - m--; - if (r >= u1) - { - m--; - r -= u1; - } - r -= u1; - } - gmp_umul_ppmm (th, tl, u0, m); - r += th; - if (r < th) - { - m--; - m -= ((r > u1) | ((r == u1) & (tl > u0))); - } - } - - return m; -} - -int -mpz_div_qr (mpz_t q, mpz_t r, - const mpz_t n, const mpz_t d, enum mpz_div_round_mode mode) -{ - mp_size_t ns, ds, nn, dn, qs; - ns = n->_mp_size; - ds = d->_mp_size; - - if (ds == 0) { - - } - //gmp_die("mpz_div_qr: Divide by zero."); - - if (ns == 0) - { - if (q) - q->_mp_size = 0; - if (r) - r->_mp_size = 0; - return 0; - } - - nn = GMP_ABS (ns); - dn = GMP_ABS (ds); - - qs = ds ^ ns; - - if (nn < dn) - { - if (mode == GMP_DIV_CEIL && qs >= 0) - { - /* q = 1, r = n - d */ - if (r) - mpz_sub (r, n, d); - if (q) - mpz_set_ui (q, 1); - } - else if (mode == GMP_DIV_FLOOR && qs < 0) - { - /* q = -1, r = n + d */ - if (r) - mpz_add (r, n, d); - if (q) - mpz_set_si (q, -1); - } - else - { - /* q = 0, r = d */ - if (r) - mpz_set (r, n); - if (q) - q->_mp_size = 0; - } - return 1; - } - else - { - mp_ptr np, qp; - mp_size_t qn, rn; - mpz_t tq, tr; - - mpz_init_set (tr, n); - np = tr->_mp_d; - - qn = nn - dn + 1; - - if (q) - { - mpz_init2 (tq, qn * GMP_LIMB_BITS); - qp = tq->_mp_d; - } - else - qp = NULL; - - mpn_div_qr (qp, np, nn, d->_mp_d, dn); - - if (qp) - { - qn -= (qp[qn-1] == 0); - - tq->_mp_size = qs < 0 ? -qn : qn; - } - rn = mpn_normalized_size (np, dn); - tr->_mp_size = ns < 0 ? - rn : rn; - - if (mode == GMP_DIV_FLOOR && qs < 0 && rn != 0) - { - if (q) - mpz_sub_ui (tq, tq, 1); - if (r) - mpz_add (tr, tr, d); - } - else if (mode == GMP_DIV_CEIL && qs >= 0 && rn != 0) - { - if (q) - mpz_add_ui (tq, tq, 1); - if (r) - mpz_sub (tr, tr, d); - } - - if (q) - { - mpz_swap (tq, q); - mpz_clear (tq); - } - if (r) - mpz_swap (tr, r); - - mpz_clear (tr); - - return rn != 0; - } -} - -void -mpn_div_qr (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn) -{ - struct gmp_div_inverse inv; - // mp_ptr tp = NULL; - - mpz_t tp; - - - - assert (dn > 0); - assert (nn >= dn); - - mpn_div_qr_invert (&inv, dp, dn); - if (dn > 2 && inv.shift > 0) - { - //tp = gmp_alloc_limbs (dn); - gmp_assert_nocarry (mpn_lshift (tp->_mp_d, dp, dn, inv.shift)); - dp = tp->_mp_d; - } - mpn_div_qr_preinv (qp, np, nn, dp, dn, &inv); - if (tp) {} - //gmp_free_limbs (tp, dn); -} - -void -mpz_addmul (mpz_t r, const mpz_t u, const mpz_t v) -{ - mpz_t t; - mpz_init (t); - mpz_mul (t, u, v); - mpz_add (r, r, t); - mpz_clear (t); -} - -void -mpz_swap (mpz_t u, mpz_t v) -{ - //MP_SIZE_T_SWAP (u->_mp_alloc, v->_mp_alloc); - //MPN_PTR_SWAP (u->_mp_d, u->_mp_size, v->_mp_d, v->_mp_size); - - mpz_t temp; - mpz_init(temp); - - *temp = *u; - *u = *v; - *v = *temp; - -} - -void -mpz_mod (mpz_t r, const mpz_t n, const mpz_t d) -{ - mpz_div_qr (NULL, r, n, d, d->_mp_size >= 0 ? GMP_DIV_FLOOR : GMP_DIV_CEIL); -} - -void -mpn_div_qr_1_invert (struct gmp_div_inverse *inv, mp_limb_t d) -{ - unsigned shift; - - assert (d > 0); - gmp_clz (shift, d); - inv->shift = shift; - inv->d1 = d << shift; - inv->di = mpn_invert_limb (inv->d1); -} - -void -mpn_div_qr_2_invert (struct gmp_div_inverse *inv, - mp_limb_t d1, mp_limb_t d0) -{ - unsigned shift; - - assert (d1 > 0); - gmp_clz (shift, d1); - inv->shift = shift; - if (shift > 0) - { - d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift)); - d0 <<= shift; - } - inv->d1 = d1; - inv->d0 = d0; - inv->di = mpn_invert_3by2 (d1, d0); -} - -void -mpn_div_qr_invert (struct gmp_div_inverse *inv, - mp_srcptr dp, mp_size_t dn) -{ - assert (dn > 0); - - if (dn == 1) - mpn_div_qr_1_invert (inv, dp[0]); - else if (dn == 2) - mpn_div_qr_2_invert (inv, dp[1], dp[0]); - else - { - unsigned shift; - mp_limb_t d1, d0; - - d1 = dp[dn-1]; - d0 = dp[dn-2]; - assert (d1 > 0); - gmp_clz (shift, d1); - inv->shift = shift; - if (shift > 0) - { - d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift)); - d0 = (d0 << shift) | (dp[dn-3] >> (GMP_LIMB_BITS - shift)); - } - inv->d1 = d1; - inv->d0 = d0; - inv->di = mpn_invert_3by2 (d1, d0); - } -} - - -int -mpz_cmp_ui (const mpz_t u, unsigned long v) -{ - mp_size_t usize = u->_mp_size; - - if (usize < 0) - return -1; - else - return mpz_cmpabs_ui (u, v); -} - -int -mpn_cmp (mp_srcptr ap, mp_srcptr bp, mp_size_t n) -{ - while (--n >= 0) - { - if (ap[n] != bp[n]) - return ap[n] > bp[n] ? 1 : -1; - } - return 0; -} - -mp_limb_t -mpn_lshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt) -{ - mp_limb_t high_limb, low_limb; - unsigned int tnc; - mp_limb_t retval; - - assert (n >= 1); - assert (cnt >= 1); - assert (cnt < GMP_LIMB_BITS); - - up += n; - rp += n; - - tnc = GMP_LIMB_BITS - cnt; - low_limb = *--up; - retval = low_limb >> tnc; - high_limb = (low_limb << cnt); - - while (--n != 0) - { - low_limb = *--up; - *--rp = high_limb | (low_limb >> tnc); - high_limb = (low_limb << cnt); - } - *--rp = high_limb; - - return retval; -} - -mp_limb_t -mpn_rshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt) -{ - mp_limb_t high_limb, low_limb; - unsigned int tnc; - mp_limb_t retval; - - assert (n >= 1); - assert (cnt >= 1); - assert (cnt < GMP_LIMB_BITS); - - tnc = GMP_LIMB_BITS - cnt; - high_limb = *up++; - retval = (high_limb << tnc); - low_limb = high_limb >> cnt; - - while (--n != 0) - { - high_limb = *up++; - *rp++ = low_limb | (high_limb << tnc); - low_limb = high_limb >> cnt; - } - *rp = low_limb; - - return retval; -} - -int -mpz_invert (mpz_t r, const mpz_t u, const mpz_t m) -{ - mpz_t g, tr; - int invertible; - - if (u->_mp_size == 0 || mpz_cmpabs_ui (m, 1) <= 0) - return 0; - - mpz_init (g); - mpz_init (tr); - - mpz_gcdext (g, tr, NULL, u, m); - invertible = (mpz_cmp_ui (g, 1) == 0); - - if (invertible) - { - if (tr->_mp_size < 0) - { - if (m->_mp_size >= 0) - mpz_add (tr, tr, m); - else - mpz_sub (tr, tr, m); - } - mpz_swap (r, tr); - } - - mpz_clear (g); - mpz_clear (tr); - return invertible; -} - -/* Not matching current public gmp interface, rather corresponding to - the sbpi1_div_* functions. */ -mp_limb_t -mpn_div_qr_1_preinv (mp_ptr qp, mp_srcptr np, mp_size_t nn, - const struct gmp_div_inverse *inv) -{ - mp_limb_t d, di; - mp_limb_t r; - mp_ptr tp = NULL; - mp_size_t tn = 0; - - if (inv->shift > 0) - { - /* Shift, reusing qp area if possible. In-place shift if qp == np. */ - tp = qp; - if (!tp) - { - tn = nn; - - // tp = gmp_alloc_limbs (tn); - } - r = mpn_lshift (tp, np, nn, inv->shift); - np = tp; - } - else - r = 0; - - d = inv->d1; - di = inv->di; - while (--nn >= 0) - { - mp_limb_t q; - - gmp_udiv_qrnnd_preinv (q, r, r, np[nn], d, di); - if (qp) - qp[nn] = q; - } - //if (tn) - //gmp_free_limbs (tp, tn); - - return r >> inv->shift; -} - -mp_limb_t -mpn_add_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n) -{ - mp_size_t i; - mp_limb_t cy; - - for (i = 0, cy = 0; i < n; i++) - { - mp_limb_t a, b, r; - a = ap[i]; b = bp[i]; - r = a + cy; - cy = (r < cy); - r += b; - cy += (r < b); - rp[i] = r; - } - return cy; -} - -void -mpn_div_qr_2_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn, - const struct gmp_div_inverse *inv) -{ - unsigned shift; - mp_size_t i; - mp_limb_t d1, d0, di, r1, r0; - - assert (nn >= 2); - shift = inv->shift; - d1 = inv->d1; - d0 = inv->d0; - di = inv->di; - - if (shift > 0) - r1 = mpn_lshift (np, np, nn, shift); - else - r1 = 0; - - r0 = np[nn - 1]; - - i = nn - 2; - do - { - mp_limb_t n0, q; - n0 = np[i]; - gmp_udiv_qr_3by2 (q, r1, r0, r1, r0, n0, d1, d0, di); - - if (qp) - qp[i] = q; - } - while (--i >= 0); - - if (shift > 0) - { - assert ((r0 & (GMP_LIMB_MAX >> (GMP_LIMB_BITS - shift))) == 0); - r0 = (r0 >> shift) | (r1 << (GMP_LIMB_BITS - shift)); - r1 >>= shift; - } - - np[1] = r1; - np[0] = r0; -} - -mp_limb_t -mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl) -{ - mp_limb_t ul, cl, hpl, lpl, rl; - - assert (n >= 1); - - cl = 0; - do - { - ul = *up++; - gmp_umul_ppmm (hpl, lpl, ul, vl); - - lpl += cl; - cl = (lpl < cl) + hpl; - - rl = *rp; - lpl = rl - lpl; - cl += lpl > rl; - *rp++ = lpl; - } - while (--n != 0); - - return cl; -} - -void -mpn_div_qr_pi1 (mp_ptr qp, - mp_ptr np, mp_size_t nn, mp_limb_t n1, - mp_srcptr dp, mp_size_t dn, - mp_limb_t dinv) -{ - mp_size_t i; - - mp_limb_t d1, d0; - mp_limb_t cy, cy1; - mp_limb_t q; - - assert (dn > 2); - assert (nn >= dn); - - d1 = dp[dn - 1]; - d0 = dp[dn - 2]; - - assert ((d1 & GMP_LIMB_HIGHBIT) != 0); - /* Iteration variable is the index of the q limb. - * - * We divide <n1, np[dn-1+i], np[dn-2+i], np[dn-3+i],..., np[i]> - * by <d1, d0, dp[dn-3], ..., dp[0] > - */ - - i = nn - dn; - do - { - mp_limb_t n0 = np[dn-1+i]; - - if (n1 == d1 && n0 == d0) - { - q = GMP_LIMB_MAX; - mpn_submul_1 (np+i, dp, dn, q); - n1 = np[dn-1+i]; /* update n1, last loop's value will now be invalid */ - } - else - { - gmp_udiv_qr_3by2 (q, n1, n0, n1, n0, np[dn-2+i], d1, d0, dinv); - - cy = mpn_submul_1 (np + i, dp, dn-2, q); - - cy1 = n0 < cy; - n0 = n0 - cy; - cy = n1 < cy1; - n1 = n1 - cy1; - np[dn-2+i] = n0; - - if (cy != 0) - { - n1 += d1 + mpn_add_n (np + i, np + i, dp, dn - 1); - q--; - } - } - - if (qp) - qp[i] = q; - } - while (--i >= 0); - - np[dn - 1] = n1; -} - -void -mpn_div_qr_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn, - mp_srcptr dp, mp_size_t dn, - const struct gmp_div_inverse *inv) -{ - assert (dn > 0); - assert (nn >= dn); - - if (dn == 1) - np[0] = mpn_div_qr_1_preinv (qp, np, nn, inv); - else if (dn == 2) - mpn_div_qr_2_preinv (qp, np, nn, inv); - else - { - mp_limb_t nh; - unsigned shift; - - assert (inv->d1 == dp[dn-1]); - assert (inv->d0 == dp[dn-2]); - assert ((inv->d1 & GMP_LIMB_HIGHBIT) != 0); - - shift = inv->shift; - if (shift > 0) - nh = mpn_lshift (np, np, nn, shift); - else - nh = 0; - - mpn_div_qr_pi1 (qp, np, nn, nh, dp, dn, inv->di); - - if (shift > 0) - gmp_assert_nocarry (mpn_rshift (np, np, dn, shift)); - } -} - -void -mpz_powm (mpz_t r, const mpz_t b, const mpz_t e, const mpz_t m) -{ - mpz_t tr; - mpz_t base; - mp_size_t en, mn; - mp_srcptr mp; - struct gmp_div_inverse minv; - unsigned shift; - //mp_ptr tp = NULL; - mpz_t tp; - - //mpz_init(tp); - - en = GMP_ABS (e->_mp_size); - mn = GMP_ABS (m->_mp_size); - if (mn == 0) {} - //gmp_die ("mpz_powm: Zero modulo."); - - if (en == 0) - { - mpz_set_ui (r, mpz_cmpabs_ui (m, 1)); - return; - } - - mp = m->_mp_d; - mpn_div_qr_invert (&minv, mp, mn); - shift = minv.shift; - - if (shift > 0) - { - /* To avoid shifts, we do all our reductions, except the final - one, using a *normalized* m. */ - minv.shift = 0; - - // tp = gmp_alloc_limbs (mn); - gmp_assert_nocarry (mpn_lshift (tp->_mp_d, mp, mn, shift)); - mp = tp->_mp_d; - } - - mpz_init (base); - - if (e->_mp_size < 0) - { - if (!mpz_invert (base, b, m)) {} - //gmp_die ("mpz_powm: Negative exponent and non-invertible base."); - } - else - { - mp_size_t bn; - mpz_abs (base, b); - - bn = base->_mp_size; - if (bn >= mn) - { - mpn_div_qr_preinv (NULL, base->_mp_d, base->_mp_size, mp, mn, &minv); - bn = mn; - } - - /* We have reduced the absolute value. Now take care of the - sign. Note that we get zero represented non-canonically as - m. */ - if (b->_mp_size < 0) - { - mp_ptr bp = MPZ_REALLOC (base, mn); - gmp_assert_nocarry (mpn_sub (bp, mp, mn, bp, bn)); - bn = mn; - } - base->_mp_size = mpn_normalized_size (base->_mp_d, bn); - } - mpz_init_set_ui (tr, 1); - - while (--en >= 0) - { - mp_limb_t w = e->_mp_d[en]; - mp_limb_t bit; - - bit = GMP_LIMB_HIGHBIT; - do - { - mpz_mul (tr, tr, tr); - if (w & bit) - mpz_mul (tr, tr, base); - if (tr->_mp_size > mn) - { - mpn_div_qr_preinv (NULL, tr->_mp_d, tr->_mp_size, mp, mn, &minv); - tr->_mp_size = mpn_normalized_size (tr->_mp_d, mn); - } - bit >>= 1; - } - while (bit > 0); - } - - /* Final reduction */ - if (tr->_mp_size >= mn) - { - minv.shift = shift; - mpn_div_qr_preinv (NULL, tr->_mp_d, tr->_mp_size, mp, mn, &minv); - tr->_mp_size = mpn_normalized_size (tr->_mp_d, mn); - } - //if (tp) - //gmp_free_limbs (tp, mn); - - mpz_swap (r, tr); - mpz_clear (tr); - mpz_clear (base); -} - -int -mpn_cmp4 (mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn) -{ - if (an != bn) - return an < bn ? -1 : 1; - else - return mpn_cmp (ap, bp, an); -} - - -mp_size_t -mpz_abs_sub (mpz_t r, const mpz_t a, const mpz_t b) -{ - mp_size_t an = GMP_ABS (a->_mp_size); - mp_size_t bn = GMP_ABS (b->_mp_size); - int cmp; - mp_ptr rp; - - cmp = mpn_cmp4 (a->_mp_d, an, b->_mp_d, bn); - if (cmp > 0) - { - rp = MPZ_REALLOC (r, an); - gmp_assert_nocarry (mpn_sub (rp, a->_mp_d, an, b->_mp_d, bn)); - return mpn_normalized_size (rp, an); - } - else if (cmp < 0) - { - rp = MPZ_REALLOC (r, bn); - gmp_assert_nocarry (mpn_sub (rp, b->_mp_d, bn, a->_mp_d, an)); - return -mpn_normalized_size (rp, bn); - } - else - return 0; -} - -mp_limb_t -mpn_add_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b) -{ - mp_size_t i; - - assert (n > 0); - i = 0; - do - { - mp_limb_t r = ap[i] + b; - /* Carry out */ - b = (r < b); - rp[i] = r; - } - while (++i < n); - - return b; -} - - -mp_limb_t -mpn_add (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn) -{ - mp_limb_t cy; - - assert (an >= bn); - - cy = mpn_add_n (rp, ap, bp, bn); - if (an > bn) - cy = mpn_add_1 (rp + bn, ap + bn, an - bn, cy); - return cy; -} - -mp_size_t -mpz_abs_add (mpz_t r, const mpz_t a, const mpz_t b) -{ - mp_size_t an = GMP_ABS (a->_mp_size); - mp_size_t bn = GMP_ABS (b->_mp_size); - mp_ptr rp; - mp_limb_t cy; - - if (an < bn) - { - MPZ_SRCPTR_SWAP (a, b); - MP_SIZE_T_SWAP (an, bn); - } - - rp = MPZ_REALLOC (r, an + 1); - cy = mpn_add (rp, a->_mp_d, an, b->_mp_d, bn); - - rp[an] = cy; - - return an + cy; -} - -void -mpz_sub (mpz_t r, const mpz_t a, const mpz_t b) -{ - mp_size_t rn; - - if ( (a->_mp_size ^ b->_mp_size) >= 0) - rn = mpz_abs_sub (r, a, b); - else - rn = mpz_abs_add (r, a, b); - - r->_mp_size = a->_mp_size >= 0 ? rn : - rn; -} - -mp_limb_t -mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl) -{ - mp_limb_t ul, cl, hpl, lpl, rl; - - assert (n >= 1); - - cl = 0; - do - { - ul = *up++; - gmp_umul_ppmm (hpl, lpl, ul, vl); - - lpl += cl; - cl = (lpl < cl) + hpl; - - rl = *rp; - lpl = rl + lpl; - cl += lpl < rl; - *rp++ = lpl; - } - while (--n != 0); - - return cl; -} - -mp_limb_t -mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl) -{ - mp_limb_t ul, cl, hpl, lpl; - - assert (n >= 1); - - cl = 0; - do - { - ul = *up++; - gmp_umul_ppmm (hpl, lpl, ul, vl); - - lpl += cl; - cl = (lpl < cl) + hpl; - - *rp++ = lpl; - } - while (--n != 0); - - return cl; -} - - -mp_limb_t -mpn_mul (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn) -{ - assert (un >= vn); - assert (vn >= 1); - assert (!GMP_MPN_OVERLAP_P(rp, un + vn, up, un)); - assert (!GMP_MPN_OVERLAP_P(rp, un + vn, vp, vn)); - - /* We first multiply by the low order limb. This result can be - stored, not added, to rp. We also avoid a loop for zeroing this - way. */ - - rp[un] = mpn_mul_1 (rp, up, un, vp[0]); - - /* Now accumulate the product of up[] and the next higher limb from - vp[]. */ - - while (--vn >= 1) - { - rp += 1, vp += 1; - rp[un] = mpn_addmul_1 (rp, up, un, vp[0]); - } - return rp[un]; -} - - -void -mpz_mul (mpz_t r, const mpz_t u, const mpz_t v) -{ - int sign; - mp_size_t un, vn, rn; - mpz_t t; - mp_ptr tp; - - un = u->_mp_size; - vn = v->_mp_size; - - if (un == 0 || vn == 0) - { - r->_mp_size = 0; - return; - } - - sign = (un ^ vn) < 0; - - un = GMP_ABS (un); - vn = GMP_ABS (vn); - - mpz_init2 (t, (un + vn) * GMP_LIMB_BITS); - - tp = t->_mp_d; - if (un >= vn) - mpn_mul (tp, u->_mp_d, un, v->_mp_d, vn); - else - mpn_mul (tp, v->_mp_d, vn, u->_mp_d, un); - - rn = un + vn; - rn -= tp[rn-1] == 0; - - t->_mp_size = sign ? - rn : rn; - mpz_swap (r, t); - mpz_clear (t); -} - -void -mpn_copyd (mp_ptr d, mp_srcptr s, mp_size_t n) -{ - while (--n >= 0) - d[n] = s[n]; -} - -void -mpn_zero (mp_ptr rp, mp_size_t n) -{ - while (--n >= 0) - rp[n] = 0; -} - - -void -mpz_mul_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bits) -{ - mp_size_t un, rn; - mp_size_t limbs; - unsigned shift; - mp_ptr rp; - - un = GMP_ABS (u->_mp_size); - if (un == 0) - { - r->_mp_size = 0; - return; - } - - limbs = bits / GMP_LIMB_BITS; - shift = bits % GMP_LIMB_BITS; - - rn = un + limbs + (shift > 0); - rp = MPZ_REALLOC (r, rn); - if (shift > 0) - { - mp_limb_t cy = mpn_lshift (rp + limbs, u->_mp_d, un, shift); - rp[rn-1] = cy; - rn -= (cy == 0); - } - else - mpn_copyd (rp + limbs, u->_mp_d, un); - - mpn_zero (rp, limbs); - - r->_mp_size = (u->_mp_size < 0) ? - rn : rn; -} - -int -mpn_zero_p(mp_srcptr rp, mp_size_t n) -{ - return mpn_normalized_size (rp, n) == 0; -} - - -void -mpz_div_q_2exp (mpz_t q, const mpz_t u, mp_bitcnt_t bit_index, - enum mpz_div_round_mode mode) -{ - mp_size_t un, qn; - mp_size_t limb_cnt; - mp_ptr qp; - int adjust; - - un = u->_mp_size; - if (un == 0) - { - q->_mp_size = 0; - return; - } - limb_cnt = bit_index / GMP_LIMB_BITS; - qn = GMP_ABS (un) - limb_cnt; - bit_index %= GMP_LIMB_BITS; - - if (mode == ((un > 0) ? GMP_DIV_CEIL : GMP_DIV_FLOOR)) /* un != 0 here. */ - /* Note: Below, the final indexing at limb_cnt is valid because at - that point we have qn > 0. */ - adjust = (qn <= 0 - || !mpn_zero_p (u->_mp_d, limb_cnt) - || (u->_mp_d[limb_cnt] - & (((mp_limb_t) 1 << bit_index) - 1))); - else - adjust = 0; - - if (qn <= 0) - qn = 0; - else - { - qp = MPZ_REALLOC (q, qn); - - if (bit_index != 0) - { - mpn_rshift (qp, u->_mp_d + limb_cnt, qn, bit_index); - qn -= qp[qn - 1] == 0; - } - else - { - mpn_copyi (qp, u->_mp_d + limb_cnt, qn); - } - } - - q->_mp_size = qn; - - if (adjust) - mpz_add_ui (q, q, 1); - if (un < 0) - mpz_neg (q, q); -} - -void -mpz_tdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt) -{ - mpz_div_q_2exp (r, u, cnt, GMP_DIV_TRUNC); -} - -int -mpz_cmp (const mpz_t a, const mpz_t b) -{ - mp_size_t asize = a->_mp_size; - mp_size_t bsize = b->_mp_size; - - if (asize != bsize) - return (asize < bsize) ? -1 : 1; - else if (asize >= 0) - return mpn_cmp (a->_mp_d, b->_mp_d, asize); - else - return mpn_cmp (b->_mp_d, a->_mp_d, -asize); -} - -void -mpz_add (mpz_t r, const mpz_t a, const mpz_t b) -{ - mp_size_t rn; - - if ( (a->_mp_size ^ b->_mp_size) >= 0) - rn = mpz_abs_add (r, a, b); - else - rn = mpz_abs_sub (r, a, b); - - r->_mp_size = a->_mp_size >= 0 ? rn : - rn; -} - - -int -mpz_tstbit (const mpz_t d, mp_bitcnt_t bit_index) -{ - mp_size_t limb_index; - unsigned shift; - mp_size_t ds; - mp_size_t dn; - mp_limb_t w; - int bit; - - ds = d->_mp_size; - dn = GMP_ABS (ds); - limb_index = bit_index / GMP_LIMB_BITS; - if (limb_index >= dn) - return ds < 0; - - shift = bit_index % GMP_LIMB_BITS; - w = d->_mp_d[limb_index]; - bit = (w >> shift) & 1; - - if (ds < 0) - { - /* d < 0. Check if any of the bits below is set: If so, our bit - must be complemented. */ - if (shift > 0 && (mp_limb_t) (w << (GMP_LIMB_BITS - shift)) > 0) - return bit ^ 1; - while (--limb_index >= 0) - if (d->_mp_d[limb_index] > 0) - return bit ^ 1; - } - return bit; -} - -mp_bitcnt_t -mpn_limb_size_in_base_2 (mp_limb_t u) -{ - unsigned shift; - - assert (u > 0); - gmp_clz (shift, u); - return GMP_LIMB_BITS - shift; -} - -size_t -mpz_sizeinbase (const mpz_t u, int base) -{ - mp_size_t un, tn; - mp_srcptr up; - //mp_ptr tp; - mpz_t tp; - - mp_bitcnt_t bits; - struct gmp_div_inverse bi; - size_t ndigits; - - mpz_init(tp); - - assert (base >= 2); -assert (base <= 62); - - un = GMP_ABS (u->_mp_size); - if (un == 0) - return 1; - - up = u->_mp_d; - - bits = (un - 1) * GMP_LIMB_BITS + mpn_limb_size_in_base_2 (up[un-1]); - switch (base) - { - case 2: - return bits; - case 4: - return (bits + 1) / 2; - case 8: - return (bits + 2) / 3; - case 16: - return (bits + 3) / 4; - case 32: - return (bits + 4) / 5; - /* FIXME: Do something more clever for the common case of base - 10. */ - } - - //tp = gmp_alloc_limbs (un); - - mpn_copyi (tp->_mp_d, up, un); - mpn_div_qr_1_invert (&bi, base); - - tn = un; - ndigits = 0; - do - { - ndigits++; - mpn_div_qr_1_preinv (tp->_mp_d, tp->_mp_d, tn, &bi); - tn -= (tp->_mp_d[tn-1] == 0); - } - while (tn > 0); - - //gmp_free_limbs (tp, un); - return ndigits; -} - -int -mpz_sgn (const mpz_t u) -{ - return GMP_CMP (u->_mp_size, 0); -} - -mp_bitcnt_t -mpn_common_scan (mp_limb_t limb, mp_size_t i, mp_srcptr up, mp_size_t un, - mp_limb_t ux) -{ - unsigned cnt; - - assert (ux == 0 || ux == GMP_LIMB_MAX); - assert (0 <= i && i <= un ); - - while (limb == 0) - { - i++; - if (i == un) - return (ux == 0 ? ~(mp_bitcnt_t) 0 : un * GMP_LIMB_BITS); - limb = ux ^ up[i]; - } - gmp_ctz (cnt, limb); - return (mp_bitcnt_t) i * GMP_LIMB_BITS + cnt; -} - -void -mpz_abs (mpz_t r, const mpz_t u) -{ - mpz_set (r, u); - r->_mp_size = GMP_ABS (r->_mp_size); -} - - -mp_bitcnt_t -mpn_scan1 (mp_srcptr ptr, mp_bitcnt_t bit) -{ - mp_size_t i; - i = bit / GMP_LIMB_BITS; - - return mpn_common_scan ( ptr[i] & (GMP_LIMB_MAX << (bit % GMP_LIMB_BITS)), - i, ptr, i, 0); -} - -mp_bitcnt_t -mpz_scan1 (mpz_t u, mp_bitcnt_t starting_bit) -{ - mp_ptr up; - mp_size_t us, un, i; - mp_limb_t limb, ux; - - us = u->_mp_size; - un = GMP_ABS (us); - i = starting_bit / GMP_LIMB_BITS; - - /* Past the end there's no 1 bits for u>=0, or an immediate 1 bit - for u<0. Notice this test picks up any u==0 too. */ - if (i >= un) - return (us >= 0 ? ~(mp_bitcnt_t) 0 : starting_bit); - - up = u->_mp_d; - ux = 0; - limb = up[i]; - - if (starting_bit != 0) - { - if (us < 0) - { - ux = mpn_zero_p (up, i); - limb = ~ limb + ux; - ux = - (mp_limb_t) (limb >= ux); - } - - /* Mask to 0 all bits before starting_bit, thus ignoring them. */ - limb &= GMP_LIMB_MAX << (starting_bit % GMP_LIMB_BITS); - } - - return mpn_common_scan (limb, i, up, un, ux); -} - - -mp_bitcnt_t -mpz_make_odd (mpz_t r) -{ - mp_bitcnt_t shift; - - assert (r->_mp_size > 0); - /* Count trailing zeros, equivalent to mpn_scan1, because we know that there is a 1 */ - shift = mpn_scan1 (r->_mp_d, 0); - mpz_tdiv_q_2exp (r, r, shift); - - return shift; -} - -void -mpz_tdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d) -{ - mpz_div_qr (q, r, n, d, GMP_DIV_TRUNC); -} - -void -mpz_abs_add_bit (mpz_t d, mp_bitcnt_t bit_index) -{ - mp_size_t dn, limb_index; - mp_limb_t bit; - mp_ptr dp; - - dn = GMP_ABS (d->_mp_size); - - limb_index = bit_index / GMP_LIMB_BITS; - bit = (mp_limb_t) 1 << (bit_index % GMP_LIMB_BITS); - - if (limb_index >= dn) - { - mp_size_t i; - /* The bit should be set outside of the end of the number. - We have to increase the size of the number. */ - dp = MPZ_REALLOC (d, limb_index + 1); - - dp[limb_index] = bit; - for (i = dn; i < limb_index; i++) - dp[i] = 0; - dn = limb_index + 1; - } - else - { - mp_limb_t cy; - - dp = d->_mp_d; - - cy = mpn_add_1 (dp + limb_index, dp + limb_index, dn - limb_index, bit); - if (cy > 0) - { - dp = MPZ_REALLOC (d, dn + 1); - dp[dn++] = cy; - } - } - - d->_mp_size = (d->_mp_size < 0) ? - dn : dn; -} - -void -mpz_abs_sub_bit (mpz_t d, mp_bitcnt_t bit_index) -{ - mp_size_t dn, limb_index; - mp_ptr dp; - mp_limb_t bit; - - dn = GMP_ABS (d->_mp_size); - dp = d->_mp_d; - - limb_index = bit_index / GMP_LIMB_BITS; - bit = (mp_limb_t) 1 << (bit_index % GMP_LIMB_BITS); - - assert (limb_index < dn); - - gmp_assert_nocarry (mpn_sub_1 (dp + limb_index, dp + limb_index, - dn - limb_index, bit)); - dn = mpn_normalized_size (dp, dn); - d->_mp_size = (d->_mp_size < 0) ? - dn : dn; -} - -void -mpz_setbit (mpz_t d, mp_bitcnt_t bit_index) -{ - if (!mpz_tstbit (d, bit_index)) - { - if (d->_mp_size >= 0) - mpz_abs_add_bit (d, bit_index); - else - mpz_abs_sub_bit (d, bit_index); - } -} - -void -mpz_divexact (mpz_t q, const mpz_t n, const mpz_t d) -{ - gmp_assert_nocarry (mpz_div_qr (q, NULL, n, d, GMP_DIV_TRUNC)); -} - -#define mpz_odd_p(z) (((z)->_mp_size != 0) & (int) (z)->_mp_d[0]) -#define mpz_even_p(z) (! mpz_odd_p (z)) - -int -mpz_cmpabs (const mpz_t u, const mpz_t v) -{ - return mpn_cmp4 (u->_mp_d, GMP_ABS (u->_mp_size), - v->_mp_d, GMP_ABS (v->_mp_size)); -} - -void -mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v) -{ - mpz_t tu, tv, s0, s1, t0, t1; - mp_bitcnt_t uz, vz, gz; - mp_bitcnt_t power; - - if (u->_mp_size == 0) - { - /* g = 0 u + sgn(v) v */ - signed long sign = mpz_sgn (v); - mpz_abs (g, v); - if (s) - s->_mp_size = 0; - if (t) - mpz_set_si (t, sign); - return; - } - - if (v->_mp_size == 0) - { - /* g = sgn(u) u + 0 v */ - signed long sign = mpz_sgn (u); - mpz_abs (g, u); - if (s) - mpz_set_si (s, sign); - if (t) - t->_mp_size = 0; - return; - } - - mpz_init (tu); - mpz_init (tv); - mpz_init (s0); - mpz_init (s1); - mpz_init (t0); - mpz_init (t1); - - mpz_abs (tu, u); - uz = mpz_make_odd (tu); - mpz_abs (tv, v); - vz = mpz_make_odd (tv); -gz = GMP_MIN (uz, vz); - - uz -= gz; - vz -= gz; - - /* Cofactors corresponding to odd gcd. gz handled later. */ - if (tu->_mp_size < tv->_mp_size) - { - mpz_swap (tu, tv); - MPZ_SRCPTR_SWAP (u, v); - MPZ_PTR_SWAP (s, t); - MP_BITCNT_T_SWAP (uz, vz); - } - - /* Maintain - * - * u = t0 tu + t1 tv - * v = s0 tu + s1 tv - * - * where u and v denote the inputs with common factors of two - * eliminated, and det (s0, t0; s1, t1) = 2^p. Then - * - * 2^p tu = s1 u - t1 v - * 2^p tv = -s0 u + t0 v - */ - - /* After initial division, tu = q tv + tu', we have - * - * u = 2^uz (tu' + q tv) - * v = 2^vz tv - * - * or - * - * t0 = 2^uz, t1 = 2^uz q - * s0 = 0, s1 = 2^vz - */ - - mpz_tdiv_qr (t1, tu, tu, tv); - mpz_mul_2exp (t1, t1, uz); - - mpz_setbit (s1, vz); - power = uz + vz; - - if (tu->_mp_size > 0) - { - mp_bitcnt_t shift; - shift = mpz_make_odd (tu); - mpz_setbit (t0, uz + shift); - power += shift; - - for (;;) - { - int c; - c = mpz_cmp (tu, tv); - if (c == 0) - break; - - if (c < 0) - { - /* tv = tv' + tu - * - * u = t0 tu + t1 (tv' + tu) = (t0 + t1) tu + t1 tv' - * v = s0 tu + s1 (tv' + tu) = (s0 + s1) tu + s1 tv' */ - - mpz_sub (tv, tv, tu); - mpz_add (t0, t0, t1); - mpz_add (s0, s0, s1); - - shift = mpz_make_odd (tv); - mpz_mul_2exp (t1, t1, shift); - mpz_mul_2exp (s1, s1, shift); - } - else - { - mpz_sub (tu, tu, tv); - mpz_add (t1, t0, t1); - mpz_add (s1, s0, s1); - - shift = mpz_make_odd (tu); - mpz_mul_2exp (t0, t0, shift); - mpz_mul_2exp (s0, s0, shift); - } - power += shift; - } - } - else - mpz_setbit (t0, uz); - - /* Now tv = odd part of gcd, and -s0 and t0 are corresponding - cofactors. */ - - mpz_mul_2exp (tv, tv, gz); - mpz_neg (s0, s0); - - /* 2^p g = s0 u + t0 v. Eliminate one factor of two at a time. To - adjust cofactors, we need u / g and v / g */ - - mpz_divexact (s1, v, tv); - mpz_abs (s1, s1); - mpz_divexact (t1, u, tv); - mpz_abs (t1, t1); - - while (power-- > 0) - { - /* s0 u + t0 v = (s0 - v/g) u - (t0 + u/g) v */ - if (mpz_odd_p (s0) || mpz_odd_p (t0)) - { - mpz_sub (s0, s0, s1); - mpz_add (t0, t0, t1); - } - //assert (mpz_even_p (t0) && mpz_even_p (s0)); - mpz_tdiv_q_2exp (s0, s0, 1); - mpz_tdiv_q_2exp (t0, t0, 1); - } - - /* Arrange so that |s| < |u| / 2g */ - mpz_add (s1, s0, s1); - if (mpz_cmpabs (s0, s1) > 0) - { - mpz_swap (s0, s1); - mpz_sub (t0, t0, t1); - } - if (u->_mp_size < 0) - mpz_neg (s0, s0); - if (v->_mp_size < 0) - mpz_neg (t0, t0); - - mpz_swap (g, tv); - if (s) - mpz_swap (s, s0); - if (t) - mpz_swap (t, t0); - - mpz_clear (tu); - mpz_clear (tv); - mpz_clear (s0); - mpz_clear (s1); - mpz_clear (t0); - mpz_clear (t1); -} - - -void -mpz_addmul_ui (mpz_t r, const mpz_t u, unsigned long int v) -{ - mpz_t t; - mpz_init_set_ui (t, v); - mpz_mul (t, u, t); - mpz_add (r, r, t); - mpz_clear (t); -} - - -// STRING CONVERSION - -unsigned -mpn_base_power_of_two_p (unsigned b) -{ - switch (b) - { - case 2: return 1; - case 4: return 2; - case 8: return 3; - case 16: return 4; - case 32: return 5; - case 64: return 6; - case 128: return 7; - case 256: return 8; - default: return 0; - } -} - - - -void -mpn_get_base_info (struct mpn_base_info *info, mp_limb_t b) -{ - mp_limb_t m; - mp_limb_t p; - unsigned exp; - - m = GMP_LIMB_MAX / b; - for (exp = 1, p = b; p <= m; exp++) - p *= b; - - info->exp = exp; - info->bb = p; -} - -int isspace_gpu(unsigned char c) { - if (c == '\n' || c == ' ' || c == '\t' || c == '\r' || c == '\f' || c == '\v') - return 1; - return 0; -} - -int strlen_c(__global char *c) { - - // rather naive implementation – we assume a string is terminated, and is not 0 characters long. - - int i = 0; - while (1) { - if (c[i] == '\0') - return i; - i++; - } - return i; -} - -mp_size_t -mpn_set_str_bits (mp_ptr rp, const unsigned char *sp, size_t sn, - unsigned bits) -{ - mp_size_t rn; - mp_limb_t limb; - unsigned shift; - - for (limb = 0, rn = 0, shift = 0; sn-- > 0; ) - { - limb |= (mp_limb_t) sp[sn] << shift; - shift += bits; - if (shift >= GMP_LIMB_BITS) - { - shift -= GMP_LIMB_BITS; - rp[rn++] = limb; - /* Next line is correct also if shift == 0, - bits == 8, and mp_limb_t == unsigned char. */ - limb = (unsigned int) sp[sn] >> (bits - shift); - } - } - if (limb != 0) - rp[rn++] = limb; - else - rn = mpn_normalized_size (rp, rn); - return rn; -} - -mp_size_t -mpn_set_str_other (mp_ptr rp, const unsigned char *sp, size_t sn, - mp_limb_t b, const struct mpn_base_info *info) -{ - mp_size_t rn; - mp_limb_t w; - unsigned k; - size_t j; - - assert (sn > 0); - - k = 1 + (sn - 1) % info->exp; - - j = 0; - w = sp[j++]; - while (--k != 0) - w = w * b + sp[j++]; - - rp[0] = w; - - for (rn = 1; j < sn;) - { - mp_limb_t cy; - - w = sp[j++]; - for (k = 1; k < info->exp; k++) - w = w * b + sp[j++]; - - cy = mpn_mul_1 (rp, rp, rn, info->bb); - cy += mpn_add_1 (rp, rp, rn, w); - if (cy > 0) - rp[rn++] = cy; - } - assert (j == sn); - - return rn; -} - - -int -mpz_set_str (mpz_t r, __global char *sp, int base) -{ - unsigned bits, value_of_a; - mp_size_t rn, alloc; - mp_ptr rp; - size_t dn, sn; - int sign; - unsigned char dp[256]; - - assert (base == 0 || (base >= 2 && base <= 62)); - - while (isspace_gpu( (unsigned char) *sp)) - sp++; - - sign = (*sp == '-'); - sp += sign; - - if (base == 0) - { - if (sp[0] == '0') - { - if (sp[1] == 'x' || sp[1] == 'X') - { - base = 16; - sp += 2; - } - else if (sp[1] == 'b' || sp[1] == 'B') - { - base = 2; - sp += 2; - } - else - base = 8; - } - else - base = 10; - } - - if (!*sp) - { - r->_mp_size = 0; - return -1; - } - sn = strlen_c(sp); - //dp = (unsigned char *) gmp_alloc (sn); - - - value_of_a = (base > 36) ? 36 : 10; - for (dn = 0; *sp; sp++) - { - unsigned digit; - - if (isspace_gpu ((unsigned char) *sp)) - continue; - else if (*sp >= '0' && *sp <= '9') - digit = *sp - '0'; - else if (*sp >= 'a' && *sp <= 'z') - digit = *sp - 'a' + value_of_a; - else if (*sp >= 'A' && *sp <= 'Z') - digit = *sp - 'A' + 10; - else - digit = base; /* fail */ - - if (digit >= (unsigned) base) - { - //gmp_free (dp, sn); - r->_mp_size = 0; - return -1; - } - - dp[dn++] = digit; - } - - if (!dn) - { - //gmp_free (dp, sn); - r->_mp_size = 0; - return -1; - } - bits = mpn_base_power_of_two_p (base); - - if (bits > 0) - { - alloc = (dn * bits + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS; - rp = MPZ_REALLOC (r, alloc); - rn = mpn_set_str_bits (rp, dp, dn, bits); - } - else - { - struct mpn_base_info info; - mpn_get_base_info (&info, base); - alloc = (dn + info.exp - 1) / info.exp; - rp = MPZ_REALLOC (r, alloc); - rn = mpn_set_str_other (rp, dp, dn, base, &info); - /* Normalization, needed for all-zero input. */ - assert (rn > 0); - rn -= rp[rn-1] == 0; - } - assert (rn <= alloc); - //gmp_free (dp, sn); - - r->_mp_size = sign ? - rn : rn; - - return 0; -} - - - -int -mpz_init_set_str (mpz_t r, __global char *sp, int base) -{ - mpz_init (r); - return mpz_set_str (r, sp, base); -} - - -// Montgomery multiplication - -void mont_prepare(mpz_t b, mpz_t e, mpz_t m, - mpz_t r, mpz_t r_1, - mpz_t ni, mpz_t M, mpz_t x - ); - -void mont_product(mpz_t ret, - const mpz_t a, const mpz_t b, - const mpz_t r, const mpz_t r_1, - const mpz_t n, const mpz_t ni - ); - -void mont_modexp(mpz_t ret, - mpz_t a, mpz_t e, - const mpz_t M, - const mpz_t n, const mpz_t ni, - const mpz_t r, const mpz_t r_1 - ); - -void mont_finish(mpz_t ret, - const mpz_t xx, - const mpz_t n, const mpz_t ni, - const mpz_t r, const mpz_t r_1 - ); - -void mont_prepare_even_modulus(mpz_t m, mpz_t q, mpz_t powj); - -void mont_mulmod(mpz_t res, const mpz_t a, const mpz_t b, const mpz_t mod); - - - - -void mont_prepare_even_modulus(mpz_t m, mpz_t q, mpz_t powj) { - - mpz_t two; // powj == 2^j - - mpz_init_set_ui(two, 2); - - mp_bitcnt_t j = mpz_scan1(m, 0); - - mpz_tdiv_q_2exp(q,m,j); - mpz_mul_2exp(powj,two,j - 1); - - mpz_clear(two); - -} - -// CPU -void mont_prepare(mpz_t b, mpz_t e, mpz_t m, - mpz_t r, mpz_t r_1, - mpz_t ni, mpz_t M, mpz_t x) { - - - // r and n (modulus) must be relatively prime (this is a given if n (modulus) is odd) - - // calculate r, which must be larger than the modulo and also a power of 2 - - mpz_t one, oo; // some helper variables - mpz_init_set_si(one,1); - mpz_init_set_si(oo,0); - - //unsigned long len = mpz_sizeinbase(m,2); - - unsigned long len = 2048; - - mpz_mul_2exp(r,one,len); - - mpz_set_si(one, 0); - - - mpz_gcdext(one, r_1, ni, r, m); // set r_1 and ni - - int sgn = mpz_sgn(r_1); - - mpz_abs(r_1, r_1); - mpz_abs(ni, ni); - - if (sgn == -1) { - mpz_sub(ni, r, ni); - mpz_sub(r_1, m, r_1); - } - - if (mpz_cmp_ui(one, 1)) - assert(0); - - mpz_mul(one, r, r_1); - mpz_mul(oo,ni,m); - - mpz_sub(one, one, oo); // oo must be one - - if (mpz_cmp_ui(one, 1)) - assert(0); - - mpz_mul(M, b, r); - mpz_mod(M, M, m); // set M - - mpz_mod(x, r, m); // set x - - - -} - -// maybe GPU? -// MARK: n MUST be an odd number -void mont_modexp(mpz_t ret, - mpz_t a, mpz_t e, - const mpz_t M, - const mpz_t n, const mpz_t ni, - const mpz_t r, const mpz_t r_1 - ) { - - mpz_t aa,xx; - - mpz_init_set(aa, M); - mpz_init_set(xx, a); - - int k = (int)mpz_sizeinbase(e,2); - - for (int i = k - 1; i >= 0; i--) { - - mont_product(xx, xx, xx, r, r_1, n, ni); - - if (mpz_tstbit(e, i)) - mont_product(xx, aa, xx, r, r_1, n, ni); - - } - - mpz_set(ret, xx); - -} - -void mont_finish(mpz_t ret, - const mpz_t xx, - const mpz_t n, const mpz_t ni, - const mpz_t r, const mpz_t r_1 - ) { - - - mpz_t x,one; - - mpz_init(x); - mpz_init_set_ui(one, 1); - - mont_product(x, xx, one, r, r_1, n, ni); - - mpz_set(ret, x); - - mpz_clear(x); - mpz_clear(one); - -} - - -// GPU -void mont_product(mpz_t ret, - const mpz_t a, const mpz_t b, - const mpz_t r, const mpz_t r_1, - const mpz_t n, const mpz_t ni - ) { - - mpz_t t,m,u; - - mpz_init(t); - mpz_init(m); - mpz_init(u); - - - - mont_mulmod(t, b, a, r); - - mont_mulmod(m, ni, t, r); - - mpz_t ab,mn; - - mpz_init(ab); - mpz_init(mn); - - mpz_mul(ab, a, b); - mpz_mul(mn, m, n); - - mpz_add(ab, ab, mn); - - unsigned long sz = mpz_sizeinbase(r,2) - 1; - mpz_tdiv_q_2exp(u, ab, sz); // this is essentially a bit shift, instead of a division - - if (mpz_cmp(u, n) >= 0) - mpz_sub(u, u, n); - - mpz_set(ret, u); - - - -} - -// not the fastest... but it does not increase the variable sizes -void mont_mulmod(mpz_t res, const mpz_t a, const mpz_t b, const mpz_t mod) { - - mpz_t aa, bb; - mpz_init_set(aa, a); - mpz_init_set(bb,b); - - mpz_mod(aa, aa, mod); // in case a is bigger - - while (mpz_cmp_ui(bb, 0) > 0) { - if (mpz_odd_p(bb)) { - mpz_add(res, res, aa); - mpz_mod(res, res, mod); - } - - mpz_mul_2exp(aa,aa,1); - mpz_mod(aa, aa, mod); - mpz_tdiv_q_2exp(bb, bb, 1); - } -} - -void printmpz(mpz_t n) { - - for (int i = 0; i < n->_mp_size; i++) { - - printf((char __constant *)"%lu", n->_mp_d[i]); - - } - printf((char __constant *)"\n\n"); - -} - -__kernel void montgomery(__global void *signature, __global unsigned long *s_offsets, - __global void *exponent, __global unsigned long *e_offsets, - __global void *modulus, __global unsigned long *m_offsets, - __global void *base, __global unsigned long *b_offsets, - __global unsigned long *valid, - __global unsigned long *pks, - unsigned long n) -{ - - - int index = get_global_id(0); - - int pk = 0; - - while (1) { - if (pks[pk] >= index) - break; - pk++; - } - - mpz_t b,e,m,sig,res; - mpz_init(res); - - mpz_set_lg((unsigned long *)b,&base[b_offsets[index]]); // this is sacrilegious really... - - mpz_set_lg((unsigned long *)sig,&signature[s_offsets[index]]); - - mpz_set_lg((unsigned long *)e,&exponent[e_offsets[pk]]); - mpz_set_lg((unsigned long *)m,&modulus[m_offsets[pk]]); // n - - - mpz_t r, r_1, ni, M, x; - mpz_init(r); - mpz_init(r_1); - mpz_init(ni); - mpz_init(M); - mpz_init(x); - - - mpz_t xx; - mpz_init(xx); - - // MARK: prepare might not have to run individually on each kernel (prepare might even run on CPU) - mont_prepare(b, e, m, r, r_1, ni, M, x); - mont_modexp(xx, x, e, M, m, ni, r, r_1); - mont_finish(res, xx, m, ni, r, r_1); -// - if (mpz_cmp(sig,res) != 0) { - - *valid += 1; - - } - - - - -}