libgpuverify

Signature verification on GPUs (WiP)
Log | Files | Refs | README | LICENSE

commit ceca94318f8347d9141f1e833f1d7b0445430643
Author: Cedric <cedric.zwahlen@students.bfh.ch>
Date:   Wed, 18 Oct 2023 13:47:36 +0200

initial commit

Diffstat:
A.DS_Store | 0
A.gitignore | 5+++++
Aopenssl/CMakeLists.txt | 28++++++++++++++++++++++++++++
Aopenssl/main.c | 93+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aopenssl/openssl-test.c | 3087+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aopenssl/openssl-test.h | 531+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aopenssl/readme.txt | 25+++++++++++++++++++++++++
Aother/.DS_Store | 0
Aother/CMakeLists.txt | 16++++++++++++++++
Aother/Main.c | 20++++++++++++++++++++
Aother/readme.txt | 32++++++++++++++++++++++++++++++++
Asource/.DS_Store | 0
Asource/big-int-test.c | 1080+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asource/big-int-test.h | 127+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asource/lib-gpu-verify.c | 589+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asource/opencl-test.c | 211+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asource/opencl-test.h | 25+++++++++++++++++++++++++
Asource/rsa-test.c | 276+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asource/rsa-test.h | 27+++++++++++++++++++++++++++
Axcode/.DS_Store | 0
Axcode/ReadMe.txt | 2++
Axcode/lib-gpu-verify.xcodeproj/project.pbxproj | 328+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Axcode/lib-gpu-verify.xcodeproj/project.xcworkspace/contents.xcworkspacedata | 7+++++++
Axcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist | 8++++++++
Axcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcuserdata/cedriczwahlen.xcuserdatad/UserInterfaceState.xcuserstate | 0
Axcode/lib-gpu-verify.xcodeproj/xcshareddata/xcschemes/lib-gpu-verify.xcscheme | 79+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Axcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist | 684+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Axcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcschemes/xcschememanagement.plist | 22++++++++++++++++++++++
Axcode/modexp.cl | 5+++++
Axcode/rsa-kernel.cl | 9+++++++++
30 files changed, 7316 insertions(+), 0 deletions(-)

diff --git a/.DS_Store b/.DS_Store Binary files differ. diff --git a/.gitignore b/.gitignore @@ -0,0 +1,5 @@ +# ignore build in openssl +openssl/build + +# ignore in other +other/build diff --git a/openssl/CMakeLists.txt b/openssl/CMakeLists.txt @@ -0,0 +1,27 @@ +cmake_minimum_required(VERSION 3.1) # 3.1 << C_STANDARD 11 + +set(CMAKE_TRY_COMPILE_TARGET_TYPE "STATIC_LIBRARY") + +project(openssltest LANGUAGES C) + +set(SOURCES + ./openssl-test.h + ./openssl-test.c +) + +add_executable(${PROJECT_NAME} main.c ${SOURCES}) + +# paths for ubuntu, they might be different on other systems. + +target_include_directories(openssltest PRIVATE /usr/include) +target_include_directories(openssltest PRIVATE /usr/lib) + +ADD_LIBRARY(libgcrypt STATIC IMPORTED) +SET_TARGET_PROPERTIES(libgcrypt PROPERTIES IMPORTED_LOCATION /usr/lib/x86_64-linux-gnu/libgcrypt.so.20) +target_link_libraries(openssltest libgcrypt) + +set_target_properties(${PROJECT_NAME} PROPERTIES C_STANDARD 11 + C_STANDARD_REQUIRED ON + C_EXTENSIONS OFF) + +target_compile_definitions(${PROJECT_NAME} PUBLIC) +\ No newline at end of file diff --git a/openssl/main.c b/openssl/main.c @@ -0,0 +1,92 @@ + +#include "openssl-test.h" + +#include <gcrypt.h> + + +#define NEED_LIBGCRYPT_VERSION "1.10.2" +int main(int argc, char** argv) +{ + + // disable any optimisations + gcry_control (GCRYCTL_DISABLE_HWF, "intel-cpu", NULL); + + /* Version check should be the very first call because it + makes sure that important subsystems are initialized. + #define NEED_LIBGCRYPT_VERSION to the minimum required version. */ + if (!gcry_check_version (NEED_LIBGCRYPT_VERSION)) + { + fprintf (stderr, "libgcrypt is too old (need %s, have %s)\n", + NEED_LIBGCRYPT_VERSION, gcry_check_version (NULL)); + exit (2); } + /* Disable secure memory. */ + gcry_control (GCRYCTL_DISABLE_SECMEM, 0); + /* ... If required, other initialization goes here. */ + /* Tell Libgcrypt that initialization has completed. */ + gcry_control (GCRYCTL_INITIALIZATION_FINISHED, 0); + + + char *template = "(genkey(rsa(nbits 4:2048)))"; + gcry_sexp_t parms; + + gcry_sexp_new(&parms, template, strlen(template), 1); + + // will contain key material + gcry_sexp_t key; + + gcry_pk_genkey(&key,parms); + + // create a number, we want to encrypt it + char *val = "1234567890ABCDEF"; + gcry_mpi_t mpi = gcry_mpi_new((int)strlen(val) * 8); + size_t scanned = 0; + + gcry_mpi_scan(&mpi, GCRYMPI_FMT_HEX, val, 0, &scanned); + + gcry_sexp_t toSign; + size_t errOff = 0; + char *dataformat = "(data (flags raw) (value %m))"; + + gcry_sexp_build(&toSign,&errOff,dataformat,mpi); + + // -- sign -- + + gcry_sexp_t resSign; + + // should not use padding + gcry_pk_sign(&resSign, toSign, key); + + // these must be freed manually + gcry_mpi_t n_mpi; + gcry_mpi_t e_mpi; + gcry_mpi_t d_mpi; + + gcry_sexp_extract_param(key,NULL,"n e d",&n_mpi, &e_mpi, &d_mpi, NULL); + + gcry_mpi_t sig_mpi; + + gcry_sexp_extract_param(resSign,NULL,"s",&sig_mpi, NULL); + + int buflen = 2048; + + // may be a lot shorter – these will contain the numbers in HEX string form – for use + unsigned char *n = malloc(buflen); + unsigned char *e = malloc(buflen); + unsigned char *d = malloc(buflen); + size_t nL = 0; + + // check returns + gcry_mpi_print(GCRYMPI_FMT_HEX,n,buflen,&nL,n_mpi); + gcry_mpi_print(GCRYMPI_FMT_HEX,e,buflen,&nL,e_mpi); + gcry_mpi_print(GCRYMPI_FMT_HEX,d,buflen,&nL,d_mpi); + + unsigned char *sgn = malloc(buflen); + gcry_mpi_print(GCRYMPI_FMT_HEX,sgn,buflen,&nL,sig_mpi); + + // everytime the program is run, the inputs calculated on change, keep this in mind when debugging. + test(e,d,n); + + // here, we should free the mpis + + return 0; +} +\ No newline at end of file diff --git a/openssl/openssl-test.c b/openssl/openssl-test.c @@ -0,0 +1,3087 @@ +// +// openssl-test.c +// hello +// +// Created by Cedric Zwahlen on 07.10.2023. +// + +#include "openssl-test.h" + +#include <stdlib.h> +#include <string.h> + +typedef struct ossl_ex_data_global_st { + CRYPTO_RWLOCK *ex_data_lock; + EX_CALLBACKS ex_data[CRYPTO_EX_INDEX__COUNT]; +} OSSL_EX_DATA_GLOBAL; + + + + +struct ossl_lib_ctx_st { + CRYPTO_RWLOCK *lock, *rand_crngt_lock; + OSSL_EX_DATA_GLOBAL global; + + void *property_string_data; + void *evp_method_store; + void *provider_store; + void *namemap; + void *property_defns; + void *global_properties; + void *drbg; + void *drbg_nonce; +/*#ifndef FIPS_MODULE + void *provider_conf; + void *bio_core; + void *child_provider; + OSSL_METHOD_STORE *decoder_store; + void *decoder_cache; + OSSL_METHOD_STORE *encoder_store; + OSSL_METHOD_STORE *store_loader_store; + void *self_test_cb; +#endif*/ +#if defined(OPENSSL_THREADS) + void *threads; +#endif + void *rand_crngt; +#ifdef FIPS_MODULE + void *thread_event_handler; + void *fips_prov; +#endif + + unsigned int ischild:1; +}; + + + + + + + +/* +typedef void CRYPTO_EX_new (void *parent, void *ptr, CRYPTO_EX_DATA *ad, + int idx, long argl, void *argp); +typedef void CRYPTO_EX_free (void *parent, void *ptr, CRYPTO_EX_DATA *ad, + int idx, long argl, void *argp); +typedef int CRYPTO_EX_dup (CRYPTO_EX_DATA *to, const CRYPTO_EX_DATA *from, + void *from_d, int idx, long argl, void *argp); +__owur int CRYPTO_get_ex_new_index(int class_index, long argl, void *argp, + CRYPTO_EX_new *new_func, CRYPTO_EX_dup *dup_func, + CRYPTO_EX_free *free_func); +*/ + +struct crypto_ex_data_st { + //STACK_OF(void) *sk; + void * sk; +}; +//DEFINE_STACK_OF(void) + +typedef struct bn_mont_ctx_st BN_MONT_CTX; +struct bn_mont_ctx_st { + int ri; /* number of bits in R */ + BIGNUM RR; /* used to convert to montgomery form, + possibly zero-padded */ + BIGNUM N; /* The modulus */ + BIGNUM Ni; /* R*(1/R mod N) - N*Ni = 1 (Ni is only + * stored for bignum algorithm) */ + BN_ULONG n0[2]; /* least significant word(s) of Ni; (type + * changed with 0.9.9, was "BN_ULONG n0;" + * before) */ + int flags; +}; + + +int BN_get_flags(const BIGNUM *b, int n) +{ + return b->flags & n; +} + +int BN_is_zero(const BIGNUM *a) +{ + return a->top == 0; +} + +int BN_num_bits_word(BN_ULONG l) +{ + BN_ULONG x, mask; + int bits = (l != 0); + +#if BN_BITS2 > 32 + x = l >> 32; + mask = (0 - x) & BN_MASK2; + mask = (0 - (mask >> (BN_BITS2 - 1))); + bits += 32 & mask; + l ^= (x ^ l) & mask; +#endif + + x = l >> 16; + mask = (0 - x) & BN_MASK2; + mask = (0 - (mask >> (BN_BITS2 - 1))); + bits += 16 & mask; + l ^= (x ^ l) & mask; + + x = l >> 8; + mask = (0 - x) & BN_MASK2; + mask = (0 - (mask >> (BN_BITS2 - 1))); + bits += 8 & mask; + l ^= (x ^ l) & mask; + + x = l >> 4; + mask = (0 - x) & BN_MASK2; + mask = (0 - (mask >> (BN_BITS2 - 1))); + bits += 4 & mask; + l ^= (x ^ l) & mask; + + x = l >> 2; + mask = (0 - x) & BN_MASK2; + mask = (0 - (mask >> (BN_BITS2 - 1))); + bits += 2 & mask; + l ^= (x ^ l) & mask; + + x = l >> 1; + mask = (0 - x) & BN_MASK2; + mask = (0 - (mask >> (BN_BITS2 - 1))); + bits += 1 & mask; + + return bits; +} + +int BN_num_bits(const BIGNUM *a) +{ + int i = a->top - 1; + bn_check_top(a); + + if (BN_is_zero(a)) + return 0; + + return ((i * BN_BITS2) + BN_num_bits_word(a->d[i])); +} + +void CRYPTO_free(void *str, const char *file, int line) +{/* + INCREMENT(free_count); + if (free_impl != CRYPTO_free) { + free_impl(str, file, line); + return; + } +*/ + free(str); +} + +void CRYPTO_clear_free(void *str, size_t num, const char *file, int line) +{ + if (str == NULL) + return; + if (num) + {} // OPENSSL_cleanse(str, num); + CRYPTO_free(str, file, line); +} + +static BN_ULONG *bn_expand_internal(const BIGNUM *b, int words) +{ + BN_ULONG *a = NULL; + + if (words > (INT_MAX / (4 * BN_BITS2))) { + // ERR_raise(ERR_LIB_BN, BN_R_BIGNUM_TOO_LONG); + assert(0); + return NULL; + } + + if (BN_get_flags(b, BN_FLG_SECURE)) + {} // a = OPENSSL_secure_zalloc(words * sizeof(*a)); + else + a = OPENSSL_zalloc(words * sizeof(*a)); + if (a == NULL) + return NULL; + + assert(b->top <= words); + if (b->top > 0) + memcpy(a, b->d, sizeof(*a) * b->top); + + return a; +} + +static void bn_free_d(BIGNUM *a, int clear) +{ + // if (BN_get_flags(a, BN_FLG_SECURE)) + // OPENSSL_secure_clear_free(a->d, a->dmax * sizeof(a->d[0])); + /*else*/ //if (clear != 0) + // OPENSSL_clear_free(a->d, a->dmax * sizeof(a->d[0])); + // else + OPENSSL_free(a->d); +} + + +BIGNUM *bn_expand2(BIGNUM *b, int words) +{ + if (words > b->dmax) { + BN_ULONG *a = bn_expand_internal(b, words); + if (!a) + return NULL; + if (b->d != NULL) + bn_free_d(b, 1); + b->d = a; + b->dmax = words; + } + + return b; +} + +static ossl_inline BIGNUM *bn_expand(BIGNUM *a, int bits) +{ + if (bits > (INT_MAX - BN_BITS2 + 1)) + return NULL; + + if (((bits+BN_BITS2-1)/BN_BITS2) <= (a)->dmax) + return a; + + return bn_expand2((a),(bits+BN_BITS2-1)/BN_BITS2); +} + +int BN_set_word(BIGNUM *a, BN_ULONG w) +{ + bn_check_top(a); + if (bn_expand(a, (int)sizeof(BN_ULONG) * 8) == NULL) + return 0; + a->neg = 0; + a->d[0] = w; + a->top = (w ? 1 : 0); + a->flags &= ~BN_FLG_FIXED_TOP; + bn_check_top(a); + return 1; +} + + +int BN_abs_is_word(const BIGNUM *a, const BN_ULONG w) +{ + return ((a->top == 1) && (a->d[0] == w)) || ((w == 0) && (a->top == 0)); +} + + + + + +void BN_CTX_start(BN_CTX *ctx) +{ + // CTXDBG("ENTER BN_CTX_start()", ctx); + /* If we're already overflowing ... */ + if (ctx->err_stack || ctx->too_many) + ctx->err_stack++; + /* (Try to) get a new frame pointer */ + else if (!BN_STACK_push(&ctx->stack, ctx->used)) { + // ERR_raise(ERR_LIB_BN, BN_R_TOO_MANY_TEMPORARY_VARIABLES); + assert(0); + ctx->err_stack++; + } + // CTXDBG("LEAVE BN_CTX_start()", ctx); +} + +BIGNUM *BN_CTX_get(BN_CTX *ctx) +{ + BIGNUM *ret; + + // CTXDBG("ENTER BN_CTX_get()", ctx); + if (ctx->err_stack || ctx->too_many) + return NULL; + if ((ret = BN_POOL_get(&ctx->pool, ctx->flags)) == NULL) { + /* + * Setting too_many prevents repeated "get" attempts from cluttering + * the error stack. + */ + ctx->too_many = 1; + // ERR_raise(ERR_LIB_BN, BN_R_TOO_MANY_TEMPORARY_VARIABLES); + assert(0); + return NULL; + } + /* OK, make sure the returned bignum is "zero" */ + BN_zero(ret); + /* clear BN_FLG_CONSTTIME if leaked from previous frames */ + ret->flags &= (~BN_FLG_CONSTTIME); + ctx->used++; + // CTXDBG("LEAVE BN_CTX_get()", ctx); + return ret; +} + +void bn_init(BIGNUM *a) +{ + static BIGNUM nilbn; + + *a = nilbn; + bn_check_top(a); +} + +void BN_MONT_CTX_init(BN_MONT_CTX *ctx) +{ + ctx->ri = 0; + bn_init(&ctx->RR); + bn_init(&ctx->N); + bn_init(&ctx->Ni); + ctx->n0[0] = ctx->n0[1] = 0; + ctx->flags = 0; +} + +BN_MONT_CTX *BN_MONT_CTX_new(void) +{ + BN_MONT_CTX *ret; + + if ((ret = OPENSSL_malloc(sizeof(*ret))) == NULL) + return NULL; + + BN_MONT_CTX_init(ret); + ret->flags = BN_FLG_MALLOCED; + return ret; +} + +int BN_ucmp(const BIGNUM *a, const BIGNUM *b) +{ + int i; + BN_ULONG t1, t2, *ap, *bp; + + bn_check_top(a); + bn_check_top(b); + + i = a->top - b->top; + if (i != 0) + return i; + ap = a->d; + bp = b->d; + for (i = a->top - 1; i >= 0; i--) { + t1 = ap[i]; + t2 = bp[i]; + if (t1 != t2) + return ((t1 > t2) ? 1 : -1); + } + return 0; +} + +BIGNUM *bn_wexpand(BIGNUM *a, int words) +{ + return (words <= a->dmax) ? a : bn_expand2(a, words); +} + +BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, + int n) +{ + BN_ULONG t1, t2; + int c = 0; + + assert(n >= 0); + if (n <= 0) + return (BN_ULONG)0; + +#ifndef OPENSSL_SMALL_FOOTPRINT + while (n & ~3) { + t1 = a[0]; + t2 = (t1 - c) & BN_MASK2; + c = (t2 > t1); + t1 = b[0]; + t1 = (t2 - t1) & BN_MASK2; + r[0] = t1; + c += (t1 > t2); + t1 = a[1]; + t2 = (t1 - c) & BN_MASK2; + c = (t2 > t1); + t1 = b[1]; + t1 = (t2 - t1) & BN_MASK2; + r[1] = t1; + c += (t1 > t2); + t1 = a[2]; + t2 = (t1 - c) & BN_MASK2; + c = (t2 > t1); + t1 = b[2]; + t1 = (t2 - t1) & BN_MASK2; + r[2] = t1; + c += (t1 > t2); + t1 = a[3]; + t2 = (t1 - c) & BN_MASK2; + c = (t2 > t1); + t1 = b[3]; + t1 = (t2 - t1) & BN_MASK2; + r[3] = t1; + c += (t1 > t2); + a += 4; + b += 4; + r += 4; + n -= 4; + } +#endif + while (n) { + t1 = a[0]; + t2 = (t1 - c) & BN_MASK2; + c = (t2 > t1); + t1 = b[0]; + t1 = (t2 - t1) & BN_MASK2; + r[0] = t1; + c += (t1 > t2); + a++; + b++; + r++; + n--; + } + return c; +} + + +int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) +{ + int max, min, dif; + BN_ULONG t1, t2, borrow, *rp; + const BN_ULONG *ap, *bp; + + bn_check_top(a); + bn_check_top(b); + + max = a->top; + min = b->top; + dif = max - min; + + if (dif < 0) { /* hmm... should not be happening */ + //ERR_raise(ERR_LIB_BN, BN_R_ARG2_LT_ARG3); + assert(0); + return 0; + } + + if (bn_wexpand(r, max) == NULL) + return 0; + + ap = a->d; + bp = b->d; + rp = r->d; + + borrow = bn_sub_words(rp, ap, bp, min); + ap += min; + rp += min; + + while (dif) { + dif--; + t1 = *(ap++); + t2 = (t1 - borrow) & BN_MASK2; + *(rp++) = t2; + borrow &= (t1 == 0); + } + + while (max && *--rp == 0) + max--; + + r->top = max; + r->neg = 0; + bn_pollute(r); + + return 1; +} + +#ifdef BN_LLONG +BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, + int n) +{ + BN_ULLONG ll = 0; + + assert(n >= 0); + if (n <= 0) + return (BN_ULONG)0; + +# ifndef OPENSSL_SMALL_FOOTPRINT + while (n & ~3) { + ll += (BN_ULLONG) a[0] + b[0]; + r[0] = (BN_ULONG)ll & BN_MASK2; + ll >>= BN_BITS2; + ll += (BN_ULLONG) a[1] + b[1]; + r[1] = (BN_ULONG)ll & BN_MASK2; + ll >>= BN_BITS2; + ll += (BN_ULLONG) a[2] + b[2]; + r[2] = (BN_ULONG)ll & BN_MASK2; + ll >>= BN_BITS2; + ll += (BN_ULLONG) a[3] + b[3]; + r[3] = (BN_ULONG)ll & BN_MASK2; + ll >>= BN_BITS2; + a += 4; + b += 4; + r += 4; + n -= 4; + } +# endif + while (n) { + ll += (BN_ULLONG) a[0] + b[0]; + r[0] = (BN_ULONG)ll & BN_MASK2; + ll >>= BN_BITS2; + a++; + b++; + r++; + n--; + } + return (BN_ULONG)ll; +} +#else /* !BN_LLONG */ +BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, + int n) +{ + BN_ULONG c, l, t; + + assert(n >= 0); + if (n <= 0) + return (BN_ULONG)0; + + c = 0; +# ifndef OPENSSL_SMALL_FOOTPRINT + while (n & ~3) { + t = a[0]; + t = (t + c) & BN_MASK2; + c = (t < c); + l = (t + b[0]) & BN_MASK2; + c += (l < t); + r[0] = l; + t = a[1]; + t = (t + c) & BN_MASK2; + c = (t < c); + l = (t + b[1]) & BN_MASK2; + c += (l < t); + r[1] = l; + t = a[2]; + t = (t + c) & BN_MASK2; + c = (t < c); + l = (t + b[2]) & BN_MASK2; + c += (l < t); + r[2] = l; + t = a[3]; + t = (t + c) & BN_MASK2; + c = (t < c); + l = (t + b[3]) & BN_MASK2; + c += (l < t); + r[3] = l; + a += 4; + b += 4; + r += 4; + n -= 4; + } +# endif + while (n) { + t = a[0]; + t = (t + c) & BN_MASK2; + c = (t < c); + l = (t + b[0]) & BN_MASK2; + c += (l < t); + r[0] = l; + a++; + b++; + r++; + n--; + } + return (BN_ULONG)c; +} +#endif /* !BN_LLONG */ + +/* unsigned add of b to a, r can be equal to a or b. */ +int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) +{ + int max, min, dif; + const BN_ULONG *ap, *bp; + BN_ULONG *rp, carry, t1, t2; + + bn_check_top(a); + bn_check_top(b); + + if (a->top < b->top) { + const BIGNUM *tmp; + + tmp = a; + a = b; + b = tmp; + } + max = a->top; + min = b->top; + dif = max - min; + + if (bn_wexpand(r, max + 1) == NULL) + return 0; + + r->top = max; + + ap = a->d; + bp = b->d; + rp = r->d; + + carry = bn_add_words(rp, ap, bp, min); + rp += min; + ap += min; + + while (dif) { + dif--; + t1 = *(ap++); + t2 = (t1 + carry) & BN_MASK2; + *(rp++) = t2; + carry &= (t2 == 0); + } + *rp = carry; + r->top += carry; + + r->neg = 0; + bn_check_top(r); + return 1; +} + +int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) +{ + int ret, r_neg, cmp_res; + + bn_check_top(a); + bn_check_top(b); + + if (a->neg != b->neg) { + r_neg = a->neg; + ret = BN_uadd(r, a, b); + } else { + cmp_res = BN_ucmp(a, b); + if (cmp_res > 0) { + r_neg = a->neg; + ret = BN_usub(r, a, b); + } else if (cmp_res < 0) { + r_neg = !b->neg; + ret = BN_usub(r, b, a); + } else { + r_neg = 0; + BN_zero(r); + ret = 1; + } + } + + r->neg = r_neg; + bn_check_top(r); + return ret; +} + +int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) +{ + int ret, r_neg, cmp_res; + + bn_check_top(a); + bn_check_top(b); + + if (a->neg == b->neg) { + r_neg = a->neg; + ret = BN_uadd(r, a, b); + } else { + cmp_res = BN_ucmp(a, b); + if (cmp_res > 0) { + r_neg = a->neg; + ret = BN_usub(r, a, b); + } else if (cmp_res < 0) { + r_neg = b->neg; + ret = BN_usub(r, b, a); + } else { + r_neg = 0; + BN_zero(r); + ret = 1; + } + } + + r->neg = r_neg; + bn_check_top(r); + return ret; +} + +int BN_nnmod(BIGNUM *r, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx) +{ + /* + * like BN_mod, but returns non-negative remainder (i.e., 0 <= r < |d| + * always holds) + */ + + if (!(BN_mod(r, m, d, ctx))) + return 0; + if (!r->neg) + return 1; + /* now -|d| < r < 0, so we have to set r := r + |d| */ + return (d->neg ? BN_sub : BN_add) (r, r, d); +} + +BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b) +{ + int bn_words; + + bn_check_top(b); + + bn_words = BN_get_flags(b, BN_FLG_CONSTTIME) ? b->dmax : b->top; + + if (a == b) + return a; + if (bn_wexpand(a, bn_words) == NULL) + return NULL; + + if (b->top > 0) + memcpy(a->d, b->d, sizeof(b->d[0]) * bn_words); + + a->neg = b->neg; + a->top = b->top; + a->flags |= b->flags & BN_FLG_FIXED_TOP; + bn_check_top(a); + return a; +} + +static int bn_left_align(BIGNUM *num) +{ + BN_ULONG *d = num->d, n, m, rmask; + int top = num->top; + int rshift = BN_num_bits_word(d[top - 1]), lshift, i; + + lshift = BN_BITS2 - rshift; + rshift %= BN_BITS2; /* say no to undefined behaviour */ + rmask = (BN_ULONG)0 - rshift; /* rmask = 0 - (rshift != 0) */ + rmask |= rmask >> 8; + + for (i = 0, m = 0; i < top; i++) { + n = d[i]; + d[i] = ((n << lshift) | m) & BN_MASK2; + m = (n >> rshift) & rmask; + } + + return lshift; +} + +/* + * In respect to shift factor the execution time is invariant of + * |n % BN_BITS2|, but not |n / BN_BITS2|. Or in other words pre-condition + * for constant-time-ness is |n < BN_BITS2| or |n / BN_BITS2| being + * non-secret. + */ +int bn_lshift_fixed_top(BIGNUM *r, const BIGNUM *a, int n) +{ + int i, nw; + unsigned int lb, rb; + BN_ULONG *t, *f; + BN_ULONG l, m, rmask = 0; + + assert(n >= 0); + + bn_check_top(r); + bn_check_top(a); + + nw = n / BN_BITS2; + if (bn_wexpand(r, a->top + nw + 1) == NULL) + return 0; + + if (a->top != 0) { + lb = (unsigned int)n % BN_BITS2; + rb = BN_BITS2 - lb; + rb %= BN_BITS2; /* say no to undefined behaviour */ + rmask = (BN_ULONG)0 - rb; /* rmask = 0 - (rb != 0) */ + rmask |= rmask >> 8; + f = &(a->d[0]); + t = &(r->d[nw]); + l = f[a->top - 1]; + t[a->top] = (l >> rb) & rmask; + for (i = a->top - 1; i > 0; i--) { + m = l << lb; + l = f[i - 1]; + t[i] = (m | ((l >> rb) & rmask)) & BN_MASK2; + } + t[0] = (l << lb) & BN_MASK2; + } else { + /* shouldn't happen, but formally required */ + r->d[nw] = 0; + } + if (nw != 0) + memset(r->d, 0, sizeof(*t) * nw); + + r->neg = a->neg; + r->top = a->top + nw + 1; + r->flags |= BN_FLG_FIXED_TOP; + + return 1; +} + +#if defined(BN_LLONG) && defined(BN_DIV2W) + +BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) +{ + return ((BN_ULONG)(((((BN_ULLONG) h) << BN_BITS2) | l) / (BN_ULLONG) d)); +} + +#else + +/* Divide h,l by d and return the result. */ +/* I need to test this some more :-( */ +BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) +{ + BN_ULONG dh, dl, q, ret = 0, th, tl, t; + int i, count = 2; + + if (d == 0) + return BN_MASK2; + + i = BN_num_bits_word(d); + assert((i == BN_BITS2) || (h <= (BN_ULONG)1 << i)); + + i = BN_BITS2 - i; + if (h >= d) + h -= d; + + if (i) { + d <<= i; + h = (h << i) | (l >> (BN_BITS2 - i)); + l <<= i; + } + dh = (d & BN_MASK2h) >> BN_BITS4; + dl = (d & BN_MASK2l); + for (;;) { + if ((h >> BN_BITS4) == dh) + q = BN_MASK2l; + else + q = h / dh; + + th = q * dh; + tl = dl * q; + for (;;) { + t = h - th; + if ((t & BN_MASK2h) || + ((tl) <= ((t << BN_BITS4) | ((l & BN_MASK2h) >> BN_BITS4)))) + break; + q--; + th -= dh; + tl -= dl; + } + t = (tl >> BN_BITS4); + tl = (tl << BN_BITS4) & BN_MASK2h; + th += t; + + if (l < tl) + th++; + l -= tl; + if (h < th) { + h += d; + q--; + } + h -= th; + + if (--count == 0) + break; + + ret = q << BN_BITS4; + h = ((h << BN_BITS4) | (l >> BN_BITS4)) & BN_MASK2; + l = (l & BN_MASK2l) << BN_BITS4; + } + ret |= q; + return ret; +} +#endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */ + + + + +BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) +{ + BN_ULONG carry = 0; + BN_ULONG bl, bh; + + assert(num >= 0); + if (num <= 0) + return (BN_ULONG)0; + + bl = LBITS(w); + bh = HBITS(w); + +# ifndef OPENSSL_SMALL_FOOTPRINT + while (num & ~3) { + mul(rp[0], ap[0], bl, bh, carry); + mul(rp[1], ap[1], bl, bh, carry); + mul(rp[2], ap[2], bl, bh, carry); + mul(rp[3], ap[3], bl, bh, carry); + ap += 4; + rp += 4; + num -= 4; + } +# endif + while (num) { + mul(rp[0], ap[0], bl, bh, carry); + ap++; + rp++; + num--; + } + return carry; +} + +void BN_CTX_end(BN_CTX *ctx) +{ + if (ctx == NULL) + return; + // CTXDBG("ENTER BN_CTX_end()", ctx); + if (ctx->err_stack) + ctx->err_stack--; + else { + unsigned int fp = BN_STACK_pop(&ctx->stack); + /* Does this stack frame have anything to release? */ + if (fp < ctx->used) + BN_POOL_release(&ctx->pool, ctx->used - fp); + ctx->used = fp; + /* Unjam "too_many" in case "get" had failed */ + ctx->too_many = 0; + } + // CTXDBG("LEAVE BN_CTX_end()", ctx); +} + +int bn_rshift_fixed_top(BIGNUM *r, const BIGNUM *a, int n) +{ + int i, top, nw; + unsigned int lb, rb; + BN_ULONG *t, *f; + BN_ULONG l, m, mask; + + bn_check_top(r); + bn_check_top(a); + + assert(n >= 0); + + nw = n / BN_BITS2; + if (nw >= a->top) { + /* shouldn't happen, but formally required */ + BN_zero(r); + return 1; + } + + rb = (unsigned int)n % BN_BITS2; + lb = BN_BITS2 - rb; + lb %= BN_BITS2; /* say no to undefined behaviour */ + mask = (BN_ULONG)0 - lb; /* mask = 0 - (lb != 0) */ + mask |= mask >> 8; + top = a->top - nw; + if (r != a && bn_wexpand(r, top) == NULL) + return 0; + + t = &(r->d[0]); + f = &(a->d[nw]); + l = f[0]; + for (i = 0; i < top - 1; i++) { + m = f[i + 1]; + t[i] = (l >> rb) | ((m << lb) & mask); + l = m; + } + t[i] = l >> rb; + + r->neg = a->neg; + r->top = top; + r->flags |= BN_FLG_FIXED_TOP; + + return 1; +} + +BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, + BN_ULONG w) +{ + BN_ULONG c = 0; + BN_ULONG bl, bh; + + assert(num >= 0); + if (num <= 0) + return (BN_ULONG)0; + + bl = LBITS(w); + bh = HBITS(w); + +# ifndef OPENSSL_SMALL_FOOTPRINT + while (num & ~3) { + mul_add(rp[0], ap[0], bl, bh, c); + mul_add(rp[1], ap[1], bl, bh, c); + mul_add(rp[2], ap[2], bl, bh, c); + mul_add(rp[3], ap[3], bl, bh, c); + ap += 4; + rp += 4; + num -= 4; + } +# endif + while (num) { + mul_add(rp[0], ap[0], bl, bh, c); + ap++; + rp++; + num--; + } + return c; +} + + + +/* + * It's argued that *length* of *significant* part of divisor is public. + * Even if it's private modulus that is. Again, *length* is assumed + * public, but not *value*. Former is likely to be pre-defined by + * algorithm with bit granularity, though below subroutine is invariant + * of limb length. Thanks to this assumption we can require that |divisor| + * may not be zero-padded, yet claim this subroutine "constant-time"(*). + * This is because zero-padded dividend, |num|, is tolerated, so that + * caller can pass dividend of public length(*), but with smaller amount + * of significant limbs. This naturally means that quotient, |dv|, would + * contain correspongly less significant limbs as well, and will be zero- + * padded accordingly. Returned remainder, |rm|, will have same bit length + * as divisor, also zero-padded if needed. These actually leave sign bits + * in ambiguous state. In sense that we try to avoid negative zeros, while + * zero-padded zeros would retain sign. + * + * (*) "Constant-time-ness" has two pre-conditions: + * + * - availability of constant-time bn_div_3_words; + * - dividend is at least as "wide" as divisor, limb-wise, zero-padded + * if so required, which shouldn't be a privacy problem, because + * divisor's length is considered public; + */ +int bn_div_fixed_top(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, + const BIGNUM *divisor, BN_CTX *ctx) +{ + int norm_shift, i, j, loop; + BIGNUM *tmp, *snum, *sdiv, *res; + BN_ULONG *resp, *wnum, *wnumtop; + BN_ULONG d0, d1; + int num_n, div_n, num_neg; + + assert(divisor->top > 0 && divisor->d[divisor->top - 1] != 0); + + bn_check_top(num); + bn_check_top(divisor); + bn_check_top(dv); + bn_check_top(rm); + + BN_CTX_start(ctx); + res = (dv == NULL) ? BN_CTX_get(ctx) : dv; + tmp = BN_CTX_get(ctx); + snum = BN_CTX_get(ctx); + sdiv = BN_CTX_get(ctx); + if (sdiv == NULL) + goto err; + + /* First we normalise the numbers */ + if (!BN_copy(sdiv, divisor)) + goto err; + norm_shift = bn_left_align(sdiv); + sdiv->neg = 0; + /* + * Note that bn_lshift_fixed_top's output is always one limb longer + * than input, even when norm_shift is zero. This means that amount of + * inner loop iterations is invariant of dividend value, and that one + * doesn't need to compare dividend and divisor if they were originally + * of the same bit length. + */ + if (!(bn_lshift_fixed_top(snum, num, norm_shift))) + goto err; + + div_n = sdiv->top; + num_n = snum->top; + + if (num_n <= div_n) { + /* caller didn't pad dividend -> no constant-time guarantee... */ + if (bn_wexpand(snum, div_n + 1) == NULL) + goto err; + memset(&(snum->d[num_n]), 0, (div_n - num_n + 1) * sizeof(BN_ULONG)); + snum->top = num_n = div_n + 1; + } + + loop = num_n - div_n; + /* + * Lets setup a 'window' into snum This is the part that corresponds to + * the current 'area' being divided + */ + wnum = &(snum->d[loop]); + wnumtop = &(snum->d[num_n - 1]); + + /* Get the top 2 words of sdiv */ + d0 = sdiv->d[div_n - 1]; + d1 = (div_n == 1) ? 0 : sdiv->d[div_n - 2]; + + /* Setup quotient */ + if (!bn_wexpand(res, loop)) + goto err; + num_neg = num->neg; + res->neg = (num_neg ^ divisor->neg); + res->top = loop; + res->flags |= BN_FLG_FIXED_TOP; + resp = &(res->d[loop]); + + /* space for temp */ + if (!bn_wexpand(tmp, (div_n + 1))) + goto err; + + for (i = 0; i < loop; i++, wnumtop--) { + BN_ULONG q, l0; + /* + * the first part of the loop uses the top two words of snum and sdiv + * to calculate a BN_ULONG q such that | wnum - sdiv * q | < sdiv + */ +# if defined(BN_DIV3W) + q = bn_div_3_words(wnumtop, d1, d0); +# else + BN_ULONG n0, n1, rem = 0; + + n0 = wnumtop[0]; + n1 = wnumtop[-1]; + if (n0 == d0) + q = BN_MASK2; + else { /* n0 < d0 */ + BN_ULONG n2 = (wnumtop == wnum) ? 0 : wnumtop[-2]; +# ifdef BN_LLONG + BN_ULLONG t2; + +# if defined(BN_LLONG) && defined(BN_DIV2W) && !defined(bn_div_words) + q = (BN_ULONG)(((((BN_ULLONG) n0) << BN_BITS2) | n1) / d0); +# else + q = bn_div_words(n0, n1, d0); +# endif + +# ifndef REMAINDER_IS_ALREADY_CALCULATED + /* + * rem doesn't have to be BN_ULLONG. The least we + * know it's less that d0, isn't it? + */ + rem = (n1 - q * d0) & BN_MASK2; +# endif + t2 = (BN_ULLONG) d1 *q; + + for (;;) { + if (t2 <= ((((BN_ULLONG) rem) << BN_BITS2) | n2)) + break; + q--; + rem += d0; + if (rem < d0) + break; /* don't let rem overflow */ + t2 -= d1; + } +# else /* !BN_LLONG */ + BN_ULONG t2l, t2h; + + q = bn_div_words(n0, n1, d0); +# ifndef REMAINDER_IS_ALREADY_CALCULATED + rem = (n1 - q * d0) & BN_MASK2; +# endif + +# if defined(BN_UMULT_LOHI) + BN_UMULT_LOHI(t2l, t2h, d1, q); +# elif defined(BN_UMULT_HIGH) + t2l = d1 * q; + t2h = BN_UMULT_HIGH(d1, q); +# else + { + BN_ULONG ql, qh; + t2l = LBITS(d1); + t2h = HBITS(d1); + ql = LBITS(q); + qh = HBITS(q); + mul64(t2l, t2h, ql, qh); /* t2=(BN_ULLONG)d1*q; */ + } +# endif + + for (;;) { + if ((t2h < rem) || ((t2h == rem) && (t2l <= n2))) + break; + q--; + rem += d0; + if (rem < d0) + break; /* don't let rem overflow */ + if (t2l < d1) + t2h--; + t2l -= d1; + } +# endif /* !BN_LLONG */ + } +# endif /* !BN_DIV3W */ + + l0 = bn_mul_words(tmp->d, sdiv->d, div_n, q); + tmp->d[div_n] = l0; + wnum--; + /* + * ignore top values of the bignums just sub the two BN_ULONG arrays + * with bn_sub_words + */ + l0 = bn_sub_words(wnum, wnum, tmp->d, div_n + 1); + q -= l0; + /* + * Note: As we have considered only the leading two BN_ULONGs in + * the calculation of q, sdiv * q might be greater than wnum (but + * then (q-1) * sdiv is less or equal than wnum) + */ + for (l0 = 0 - l0, j = 0; j < div_n; j++) + tmp->d[j] = sdiv->d[j] & l0; + l0 = bn_add_words(wnum, wnum, tmp->d, div_n); + (*wnumtop) += l0; + assert((*wnumtop) == 0); + + /* store part of the result */ + *--resp = q; + } + /* snum holds remainder, it's as wide as divisor */ + snum->neg = num_neg; + snum->top = div_n; + snum->flags |= BN_FLG_FIXED_TOP; + + if (rm != NULL && bn_rshift_fixed_top(rm, snum, norm_shift) == 0) + goto err; + + BN_CTX_end(ctx); + return 1; + err: + bn_check_top(rm); + BN_CTX_end(ctx); + return 0; +} + +void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) +{ + BN_ULONG *rr; + + if (na < nb) { + int itmp; + BN_ULONG *ltmp; + + itmp = na; + na = nb; + nb = itmp; + ltmp = a; + a = b; + b = ltmp; + + } + rr = &(r[na]); + if (nb <= 0) { + (void)bn_mul_words(r, a, na, 0); + return; + } else + rr[0] = bn_mul_words(r, a, na, b[0]); + + for (;;) { + if (--nb <= 0) + return; + rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]); + if (--nb <= 0) + return; + rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]); + if (--nb <= 0) + return; + rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]); + if (--nb <= 0) + return; + rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]); + rr += 4; + r += 4; + b += 4; + } +} + +int bn_mul_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) +{ + int ret = 0; + int top, al, bl; + BIGNUM *rr; +#if defined(BN_MUL_COMBA) || defined(BN_RECURSION) + int i; +#endif +#ifdef BN_RECURSION + BIGNUM *t = NULL; + int j = 0, k; +#endif + + bn_check_top(a); + bn_check_top(b); + bn_check_top(r); + + al = a->top; + bl = b->top; + + if ((al == 0) || (bl == 0)) { + BN_zero(r); + return 1; + } + top = al + bl; + + BN_CTX_start(ctx); + if ((r == a) || (r == b)) { + if ((rr = BN_CTX_get(ctx)) == NULL) + goto err; + } else + rr = r; + +#if defined(BN_MUL_COMBA) || defined(BN_RECURSION) + i = al - bl; +#endif +#ifdef BN_MUL_COMBA + if (i == 0) { +# if 0 + if (al == 4) { + if (bn_wexpand(rr, 8) == NULL) + goto err; + rr->top = 8; + bn_mul_comba4(rr->d, a->d, b->d); + goto end; + } +# endif + if (al == 8) { + if (bn_wexpand(rr, 16) == NULL) + goto err; + rr->top = 16; + bn_mul_comba8(rr->d, a->d, b->d); + goto end; + } + } +#endif /* BN_MUL_COMBA */ +#ifdef BN_RECURSION + if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) { + if (i >= -1 && i <= 1) { + /* + * Find out the power of two lower or equal to the longest of the + * two numbers + */ + if (i >= 0) { + j = BN_num_bits_word((BN_ULONG)al); + } + if (i == -1) { + j = BN_num_bits_word((BN_ULONG)bl); + } + j = 1 << (j - 1); + assert(j <= al || j <= bl); + k = j + j; + t = BN_CTX_get(ctx); + if (t == NULL) + goto err; + if (al > j || bl > j) { + if (bn_wexpand(t, k * 4) == NULL) + goto err; + if (bn_wexpand(rr, k * 4) == NULL) + goto err; + bn_mul_part_recursive(rr->d, a->d, b->d, + j, al - j, bl - j, t->d); + } else { /* al <= j || bl <= j */ + + if (bn_wexpand(t, k * 2) == NULL) + goto err; + if (bn_wexpand(rr, k * 2) == NULL) + goto err; + bn_mul_recursive(rr->d, a->d, b->d, j, al - j, bl - j, t->d); + } + rr->top = top; + goto end; + } + } +#endif /* BN_RECURSION */ + if (bn_wexpand(rr, top) == NULL) + goto err; + rr->top = top; + bn_mul_normal(rr->d, a->d, al, b->d, bl); + +#if defined(BN_MUL_COMBA) || defined(BN_RECURSION) + end: +#endif + rr->neg = a->neg ^ b->neg; + rr->flags |= BN_FLG_FIXED_TOP; + if (r != rr && BN_copy(r, rr) == NULL) + goto err; + + ret = 1; + err: + bn_check_top(r); + BN_CTX_end(ctx); + return ret; +} + +void bn_correct_top(BIGNUM *a) +{ + BN_ULONG *ftl; + int tmp_top = a->top; + + if (tmp_top > 0) { + for (ftl = &(a->d[tmp_top]); tmp_top > 0; tmp_top--) { + ftl--; + if (*ftl != 0) + break; + } + a->top = tmp_top; + } + if (a->top == 0) + a->neg = 0; + a->flags &= ~BN_FLG_FIXED_TOP; + bn_pollute(a); +} + +int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, + BN_CTX *ctx) +{ + int ret; + + if (BN_is_zero(divisor)) { + // ERR_raise(ERR_LIB_BN, BN_R_DIV_BY_ZERO); + // return 0; + assert(0); + } + + /* + * Invalid zero-padding would have particularly bad consequences so don't + * just rely on bn_check_top() here (bn_check_top() works only for + * BN_DEBUG builds) + */ + if (divisor->d[divisor->top - 1] == 0) { + // ERR_raise(ERR_LIB_BN, BN_R_NOT_INITIALIZED); + //return 0; + assert(0); + } + + ret = bn_div_fixed_top(dv, rm, num, divisor, ctx); + + if (ret) { + if (dv != NULL) + bn_correct_top(dv); + if (rm != NULL) + bn_correct_top(rm); + } + + return ret; +} + + + + + +void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) +{ + assert(n >= 0); + if (n <= 0) + return; + +# ifndef OPENSSL_SMALL_FOOTPRINT + while (n & ~3) { + sqr64(r[0], r[1], a[0]); + sqr64(r[2], r[3], a[1]); + sqr64(r[4], r[5], a[2]); + sqr64(r[6], r[7], a[3]); + a += 4; + r += 8; + n -= 4; + } +# endif + while (n) { + sqr64(r[0], r[1], a[0]); + a++; + r += 2; + n--; + } +} + + +/* tmp must have 2*n words */ +void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp) +{ + int i, j, max; + const BN_ULONG *ap; + BN_ULONG *rp; + + max = n * 2; + ap = a; + rp = r; + rp[0] = rp[max - 1] = 0; + rp++; + j = n; + + if (--j > 0) { + ap++; + rp[j] = bn_mul_words(rp, ap, j, ap[-1]); + rp += 2; + } + + for (i = n - 2; i > 0; i--) { + j--; + ap++; + rp[j] = bn_mul_add_words(rp, ap, j, ap[-1]); + rp += 2; + } + + bn_add_words(r, r, r, max); + + /* There will not be a carry */ + + bn_sqr_words(tmp, a, n); + + bn_add_words(r, r, tmp, max); +} + +int bn_sqr_fixed_top(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) +{ + int max, al; + int ret = 0; + BIGNUM *tmp, *rr; + + bn_check_top(a); + + al = a->top; + if (al <= 0) { + r->top = 0; + r->neg = 0; + return 1; + } + + BN_CTX_start(ctx); + rr = (a != r) ? r : BN_CTX_get(ctx); + tmp = BN_CTX_get(ctx); + if (rr == NULL || tmp == NULL) + goto err; + + max = 2 * al; /* Non-zero (from above) */ + if (bn_wexpand(rr, max) == NULL) + goto err; + + if (al == 4) { +#ifndef BN_SQR_COMBA + BN_ULONG t[8]; + bn_sqr_normal(rr->d, a->d, 4, t); +#else + bn_sqr_comba4(rr->d, a->d); +#endif + } else if (al == 8) { +#ifndef BN_SQR_COMBA + BN_ULONG t[16]; + bn_sqr_normal(rr->d, a->d, 8, t); +#else + bn_sqr_comba8(rr->d, a->d); +#endif + } else { +#if defined(BN_RECURSION) + if (al < BN_SQR_RECURSIVE_SIZE_NORMAL) { + BN_ULONG t[BN_SQR_RECURSIVE_SIZE_NORMAL * 2]; + bn_sqr_normal(rr->d, a->d, al, t); + } else { + int j, k; + + j = BN_num_bits_word((BN_ULONG)al); + j = 1 << (j - 1); + k = j + j; + if (al == j) { + if (bn_wexpand(tmp, k * 2) == NULL) + goto err; + bn_sqr_recursive(rr->d, a->d, al, tmp->d); + } else { + if (bn_wexpand(tmp, max) == NULL) + goto err; + bn_sqr_normal(rr->d, a->d, al, tmp->d); + } + } +#else + if (bn_wexpand(tmp, max) == NULL) + goto err; + bn_sqr_normal(rr->d, a->d, al, tmp->d); +#endif + } + + rr->neg = 0; + rr->top = max; + rr->flags |= BN_FLG_FIXED_TOP; + if (r != rr && BN_copy(r, rr) == NULL) + goto err; + + ret = 1; + err: + bn_check_top(rr); + bn_check_top(tmp); + BN_CTX_end(ctx); + return ret; +} + +int BN_rshift(BIGNUM *r, const BIGNUM *a, int n) +{ + int ret = 0; + + if (n < 0) { + // ERR_raise(ERR_LIB_BN, BN_R_INVALID_SHIFT); + assert(0); + //return 0; + } + + ret = bn_rshift_fixed_top(r, a, n); + + bn_correct_top(r); + bn_check_top(r); + + return ret; +} + +int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) +{ + int ret = bn_mul_fixed_top(r, a, b, ctx); + + bn_correct_top(r); + bn_check_top(r); + + return ret; +} + +int BN_mask_bits(BIGNUM *a, int n) +{ + int b, w; + + bn_check_top(a); + if (n < 0) + return 0; + + w = n / BN_BITS2; + b = n % BN_BITS2; + if (w >= a->top) + return 0; + if (b == 0) + a->top = w; + else { + a->top = w + 1; + a->d[w] &= ~(BN_MASK2 << b); + } + bn_correct_top(a); + return 1; +} + +static int bn_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) +{ + BIGNUM *n; + BN_ULONG *ap, *np, *rp, n0, v, carry; + int nl, max, i; + unsigned int rtop; + + n = &(mont->N); + nl = n->top; + if (nl == 0) { + ret->top = 0; + return 1; + } + + max = (2 * nl); /* carry is stored separately */ + if (bn_wexpand(r, max) == NULL) + return 0; + + r->neg ^= n->neg; + np = n->d; + rp = r->d; + + /* clear the top words of T */ + for (rtop = r->top, i = 0; i < max; i++) { + v = (BN_ULONG)0 - ((i - rtop) >> (8 * sizeof(rtop) - 1)); + rp[i] &= v; + } + + r->top = max; + r->flags |= BN_FLG_FIXED_TOP; + n0 = mont->n0[0]; + + /* + * Add multiples of |n| to |r| until R = 2^(nl * BN_BITS2) divides it. On + * input, we had |r| < |n| * R, so now |r| < 2 * |n| * R. Note that |r| + * includes |carry| which is stored separately. + */ + for (carry = 0, i = 0; i < nl; i++, rp++) { + v = bn_mul_add_words(rp, np, nl, (rp[0] * n0) & BN_MASK2); + v = (v + carry + rp[nl]) & BN_MASK2; + carry |= (v != rp[nl]); + carry &= (v <= rp[nl]); + rp[nl] = v; + } + + if (bn_wexpand(ret, nl) == NULL) + return 0; + ret->top = nl; + ret->flags |= BN_FLG_FIXED_TOP; + ret->neg = r->neg; + + rp = ret->d; + + /* + * Shift |nl| words to divide by R. We have |ap| < 2 * |n|. Note that |ap| + * includes |carry| which is stored separately. + */ + ap = &(r->d[nl]); + + carry -= bn_sub_words(rp, ap, np, nl); + /* + * |carry| is -1 if |ap| - |np| underflowed or zero if it did not. Note + * |carry| cannot be 1. That would imply the subtraction did not fit in + * |nl| words, and we know at most one subtraction is needed. + */ + for (i = 0; i < nl; i++) { + rp[i] = (carry & ap[i]) | (~carry & rp[i]); + ap[i] = 0; + } + + return 1; +} + +int bn_from_mont_fixed_top(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, + BN_CTX *ctx) +{ + int retn = 0; +#ifdef MONT_WORD + BIGNUM *t; + + BN_CTX_start(ctx); + if ((t = BN_CTX_get(ctx)) && BN_copy(t, a)) { + retn = bn_from_montgomery_word(ret, t, mont); + } + BN_CTX_end(ctx); +#else /* !MONT_WORD */ + BIGNUM *t1, *t2; + + BN_CTX_start(ctx); + t1 = BN_CTX_get(ctx); + t2 = BN_CTX_get(ctx); + if (t2 == NULL) + goto err; + + if (!BN_copy(t1, a)) + goto err; + BN_mask_bits(t1, mont->ri); + + if (!BN_mul(t2, t1, &mont->Ni, ctx)) + goto err; + BN_mask_bits(t2, mont->ri); + + if (!BN_mul(t1, t2, &mont->N, ctx)) + goto err; + if (!BN_add(t2, a, t1)) + goto err; + if (!BN_rshift(ret, t2, mont->ri)) + goto err; + + if (BN_ucmp(ret, &(mont->N)) >= 0) { + if (!BN_usub(ret, ret, &(mont->N))) + goto err; + } + retn = 1; + bn_check_top(ret); + err: + BN_CTX_end(ctx); +#endif /* MONT_WORD */ + return retn; +} + +int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, + BN_CTX *ctx) +{ + int retn; + + retn = bn_from_mont_fixed_top(ret, a, mont, ctx); + bn_correct_top(ret); + bn_check_top(ret); + + return retn; +} + +int bn_mul_mont_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, + BN_MONT_CTX *mont, BN_CTX *ctx) +{ + BIGNUM *tmp; + int ret = 0; + int num = mont->N.top; + +#if defined(OPENSSL_BN_ASM_MONT) && defined(MONT_WORD) + if (num > 1 && num <= BN_SOFT_LIMIT && a->top == num && b->top == num) { + if (bn_wexpand(r, num) == NULL) + return 0; + if (bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num)) { + r->neg = a->neg ^ b->neg; + r->top = num; + r->flags |= BN_FLG_FIXED_TOP; + return 1; + } + } +#endif + + if ((a->top + b->top) > 2 * num) + return 0; + + BN_CTX_start(ctx); + tmp = BN_CTX_get(ctx); + if (tmp == NULL) + goto err; + + bn_check_top(tmp); + if (a == b) { + if (!bn_sqr_fixed_top(tmp, a, ctx)) + goto err; + } else { + if (!bn_mul_fixed_top(tmp, a, b, ctx)) + goto err; + } + /* reduce from aRR to aR */ +#ifdef MONT_WORD + if (!bn_from_montgomery_word(r, tmp, mont)) + goto err; +#else + if (!BN_from_montgomery(r, tmp, mont, ctx)) + goto err; +#endif + ret = 1; + err: + BN_CTX_end(ctx); + return ret; +} + + +int bn_to_mont_fixed_top(BIGNUM *r, const BIGNUM *a, BN_MONT_CTX *mont, + BN_CTX *ctx) +{ + return bn_mul_mont_fixed_top(r, a, &(mont->RR), mont, ctx); +} + +static int BN_STACK_push(BN_STACK *st, unsigned int idx) +{ + if (st->depth == st->size) { + /* Need to expand */ + unsigned int newsize = + st->size ? (st->size * 3 / 2) : BN_CTX_START_FRAMES; + unsigned int *newitems; + + if ((newitems = OPENSSL_malloc(sizeof(*newitems) * newsize)) == NULL) + return 0; + if (st->depth) + memcpy(newitems, st->indexes, sizeof(*newitems) * st->depth); + OPENSSL_free(st->indexes); + st->indexes = newitems; + st->size = newsize; + } + st->indexes[(st->depth)++] = idx; + return 1; +} + +static unsigned int BN_STACK_pop(BN_STACK *st) +{ + return st->indexes[--(st->depth)]; +} + +static void BN_STACK_init(BN_STACK *st) +{ + st->indexes = NULL; + st->depth = st->size = 0; +} + +static void BN_STACK_finish(BN_STACK *st) +{ + OPENSSL_free(st->indexes); + st->indexes = NULL; +} + +static void BN_POOL_init(BN_POOL *p) +{ + p->head = p->current = p->tail = NULL; + p->used = p->size = 0; +} + +typedef void *(*memset_t)(void *, int, size_t); + +static volatile memset_t memset_func = memset; + +void OPENSSL_cleanse(void *ptr, size_t len) +{ + memset_func(ptr, 0, len); +} + + +void BN_clear_free(BIGNUM *a) +{ + if (a == NULL) + return; + if (a->d != NULL && !BN_get_flags(a, BN_FLG_STATIC_DATA)) + bn_free_d(a, 1); + if (BN_get_flags(a, BN_FLG_MALLOCED)) { + OPENSSL_cleanse(a, sizeof(*a)); + OPENSSL_free(a); + } +} + +static void BN_POOL_finish(BN_POOL *p) +{ + unsigned int loop; + BIGNUM *bn; + + while (p->head) { + for (loop = 0, bn = p->head->vals; loop++ < BN_CTX_POOL_SIZE; bn++) + if (bn->d) + BN_clear_free(bn); + p->current = p->head->next; + OPENSSL_free(p->head); + p->head = p->current; + } +} + +void BN_set_flags(BIGNUM *b, int n) +{ + b->flags |= n; +} + +static BIGNUM *BN_POOL_get(BN_POOL *p, int flag) +{ + BIGNUM *bn; + unsigned int loop; + + /* Full; allocate a new pool item and link it in. */ + if (p->used == p->size) { + BN_POOL_ITEM *item; + + if ((item = OPENSSL_malloc(sizeof(*item))) == NULL) + return NULL; + for (loop = 0, bn = item->vals; loop++ < BN_CTX_POOL_SIZE; bn++) { + bn_init(bn); + if ((flag & BN_FLG_SECURE) != 0) + BN_set_flags(bn, BN_FLG_SECURE); + } + item->prev = p->tail; + item->next = NULL; + + if (p->head == NULL) + p->head = p->current = p->tail = item; + else { + p->tail->next = item; + p->tail = item; + p->current = item; + } + p->size += BN_CTX_POOL_SIZE; + p->used++; + /* Return the first bignum from the new pool */ + return item->vals; + } + + if (!p->used) + p->current = p->head; + else if ((p->used % BN_CTX_POOL_SIZE) == 0) + p->current = p->current->next; + return p->current->vals + ((p->used++) % BN_CTX_POOL_SIZE); +} + +static void BN_POOL_release(BN_POOL *p, unsigned int num) +{ + unsigned int offset = (p->used - 1) % BN_CTX_POOL_SIZE; + + p->used -= num; + while (num--) { + bn_check_top(p->current->vals + offset); + if (offset == 0) { + offset = BN_CTX_POOL_SIZE - 1; + p->current = p->current->prev; + } else + offset--; + } +} + +const BIGNUM *BN_value_one(void) +{ + static const BN_ULONG data_one = 1L; + static const BIGNUM const_one = + { (BN_ULONG *)&data_one, 1, 1, 0, BN_FLG_STATIC_DATA }; + + return &const_one; +} + +int BN_is_bit_set(const BIGNUM *a, int n) +{ + int i, j; + + bn_check_top(a); + if (n < 0) + return 0; + i = n / BN_BITS2; + j = n % BN_BITS2; + if (a->top <= i) + return 0; + return (int)(((a->d[i]) >> j) & ((BN_ULONG)1)); +} + +int BN_set_bit(BIGNUM *a, int n) +{ + int i, j, k; + + if (n < 0) + return 0; + + i = n / BN_BITS2; + j = n % BN_BITS2; + if (a->top <= i) { + if (bn_wexpand(a, i + 1) == NULL) + return 0; + for (k = a->top; k < i + 1; k++) + a->d[k] = 0; + a->top = i + 1; + a->flags &= ~BN_FLG_FIXED_TOP; + } + + a->d[i] |= (((BN_ULONG)1) << j); + bn_check_top(a); + return 1; +} + +BN_CTX *BN_CTX_new_ex(OSSL_LIB_CTX *ctx) +{ + BN_CTX *ret; + + if ((ret = OPENSSL_zalloc(sizeof(*ret))) == NULL) + return NULL; + /* Initialise the structure */ + BN_POOL_init(&ret->pool); + BN_STACK_init(&ret->stack); + ret->libctx = ctx; + return ret; +} + +BIGNUM *BN_new(void) +{ + BIGNUM *ret; + + if ((ret = OPENSSL_zalloc(sizeof(*ret))) == NULL) + return NULL; + ret->flags = BN_FLG_MALLOCED; + bn_check_top(ret); + return ret; +} + +int BN_is_odd(const BIGNUM *a) +{ + return (a->top > 0) && (a->d[0] & 1); +} + +int BN_lshift1(BIGNUM *r, const BIGNUM *a) +{ + register BN_ULONG *ap, *rp, t, c; + int i; + + bn_check_top(r); + bn_check_top(a); + + if (r != a) { + r->neg = a->neg; + if (bn_wexpand(r, a->top + 1) == NULL) + return 0; + r->top = a->top; + } else { + if (bn_wexpand(r, a->top + 1) == NULL) + return 0; + } + ap = a->d; + rp = r->d; + c = 0; + for (i = 0; i < a->top; i++) { + t = *(ap++); + *(rp++) = ((t << 1) | c) & BN_MASK2; + c = t >> (BN_BITS2 - 1); + } + *rp = c; + r->top += c; + bn_check_top(r); + return 1; +} + +int BN_rshift1(BIGNUM *r, const BIGNUM *a) +{ + BN_ULONG *ap, *rp, t, c; + int i; + + bn_check_top(r); + bn_check_top(a); + + if (BN_is_zero(a)) { + BN_zero(r); + return 1; + } + i = a->top; + ap = a->d; + if (a != r) { + if (bn_wexpand(r, i) == NULL) + return 0; + r->neg = a->neg; + } + rp = r->d; + r->top = i; + t = ap[--i]; + rp[i] = t >> 1; + c = t << (BN_BITS2 - 1); + r->top -= (t == 1); + while (i > 0) { + t = ap[--i]; + rp[i] = ((t >> 1) & BN_MASK2) | c; + c = t << (BN_BITS2 - 1); + } + if (!r->top) + r->neg = 0; /* don't allow negative zero */ + bn_check_top(r); + return 1; +} + +int BN_is_word(const BIGNUM *a, const BN_ULONG w) +{ + return BN_abs_is_word(a, w) && (!w || !a->neg); +} + +int BN_is_one(const BIGNUM *a) +{ + return BN_abs_is_word(a, 1) && !a->neg; +} + +int BN_lshift(BIGNUM *r, const BIGNUM *a, int n) +{ + int ret; + + if (n < 0) { + // ERR_raise(ERR_LIB_BN, BN_R_INVALID_SHIFT); + //return 0; + assert(0); + } + + ret = bn_lshift_fixed_top(r, a, n); + + bn_correct_top(r); + bn_check_top(r); + + return ret; +} + +int BN_mul_word(BIGNUM *a, BN_ULONG w) +{ + BN_ULONG ll; + + bn_check_top(a); + w &= BN_MASK2; + if (a->top) { + if (w == 0) + BN_zero(a); + else { + ll = bn_mul_words(a->d, a->d, a->top, w); + if (ll) { + if (bn_wexpand(a, a->top + 1) == NULL) + return 0; + a->d[a->top++] = ll; + } + } + } + bn_check_top(a); + return 1; +} + +/* + * This is an internal function, we assume all callers pass valid arguments: + * all pointers passed here are assumed non-NULL. + */ +BIGNUM *int_bn_mod_inverse(BIGNUM *in, + const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx, + int *pnoinv) +{ + BIGNUM *A, *B, *X, *Y, *M, *D, *T, *R = NULL; + BIGNUM *ret = NULL; + int sign; + + /* This is invalid input so we don't worry about constant time here */ + if (BN_abs_is_word(n, 1) || BN_is_zero(n)) { + *pnoinv = 1; + return NULL; + } + + *pnoinv = 0; +/* + if ((BN_get_flags(a, BN_FLG_CONSTTIME) != 0) + || (BN_get_flags(n, BN_FLG_CONSTTIME) != 0)) { + return bn_mod_inverse_no_branch(in, a, n, ctx, pnoinv); + } +*/ + bn_check_top(a); + bn_check_top(n); + + BN_CTX_start(ctx); + A = BN_CTX_get(ctx); + B = BN_CTX_get(ctx); + X = BN_CTX_get(ctx); + D = BN_CTX_get(ctx); + M = BN_CTX_get(ctx); + Y = BN_CTX_get(ctx); + T = BN_CTX_get(ctx); + if (T == NULL) + goto err; + + if (in == NULL) + R = BN_new(); + else + R = in; + if (R == NULL) + goto err; + + if (!BN_one(X)) + goto err; + BN_zero(Y); + if (BN_copy(B, a) == NULL) + goto err; + if (BN_copy(A, n) == NULL) + goto err; + A->neg = 0; + if (B->neg || (BN_ucmp(B, A) >= 0)) { + if (!BN_nnmod(B, B, A, ctx)) + goto err; + } + sign = -1; + /*- + * From B = a mod |n|, A = |n| it follows that + * + * 0 <= B < A, + * -sign*X*a == B (mod |n|), + * sign*Y*a == A (mod |n|). + */ + + if (BN_is_odd(n) && (BN_num_bits(n) <= 2048)) { + /* + * Binary inversion algorithm; requires odd modulus. This is faster + * than the general algorithm if the modulus is sufficiently small + * (about 400 .. 500 bits on 32-bit systems, but much more on 64-bit + * systems) + */ + int shift; + + while (!BN_is_zero(B)) { + /*- + * 0 < B < |n|, + * 0 < A <= |n|, + * (1) -sign*X*a == B (mod |n|), + * (2) sign*Y*a == A (mod |n|) + */ + + /* + * Now divide B by the maximum possible power of two in the + * integers, and divide X by the same value mod |n|. When we're + * done, (1) still holds. + */ + shift = 0; + while (!BN_is_bit_set(B, shift)) { /* note that 0 < B */ + shift++; + + if (BN_is_odd(X)) { + if (!BN_uadd(X, X, n)) + goto err; + } + /* + * now X is even, so we can easily divide it by two + */ + if (!BN_rshift1(X, X)) + goto err; + } + if (shift > 0) { + if (!BN_rshift(B, B, shift)) + goto err; + } + + /* + * Same for A and Y. Afterwards, (2) still holds. + */ + shift = 0; + while (!BN_is_bit_set(A, shift)) { /* note that 0 < A */ + shift++; + + if (BN_is_odd(Y)) { + if (!BN_uadd(Y, Y, n)) + goto err; + } + /* now Y is even */ + if (!BN_rshift1(Y, Y)) + goto err; + } + if (shift > 0) { + if (!BN_rshift(A, A, shift)) + goto err; + } + + /*- + * We still have (1) and (2). + * Both A and B are odd. + * The following computations ensure that + * + * 0 <= B < |n|, + * 0 < A < |n|, + * (1) -sign*X*a == B (mod |n|), + * (2) sign*Y*a == A (mod |n|), + * + * and that either A or B is even in the next iteration. + */ + if (BN_ucmp(B, A) >= 0) { + /* -sign*(X + Y)*a == B - A (mod |n|) */ + if (!BN_uadd(X, X, Y)) + goto err; + /* + * NB: we could use BN_mod_add_quick(X, X, Y, n), but that + * actually makes the algorithm slower + */ + if (!BN_usub(B, B, A)) + goto err; + } else { + /* sign*(X + Y)*a == A - B (mod |n|) */ + if (!BN_uadd(Y, Y, X)) + goto err; + /* + * as above, BN_mod_add_quick(Y, Y, X, n) would slow things down + */ + if (!BN_usub(A, A, B)) + goto err; + } + } + } else { + /* general inversion algorithm */ + + while (!BN_is_zero(B)) { + BIGNUM *tmp; + + /*- + * 0 < B < A, + * (*) -sign*X*a == B (mod |n|), + * sign*Y*a == A (mod |n|) + */ + + /* (D, M) := (A/B, A%B) ... */ + if (BN_num_bits(A) == BN_num_bits(B)) { + if (!BN_one(D)) + goto err; + if (!BN_sub(M, A, B)) + goto err; + } else if (BN_num_bits(A) == BN_num_bits(B) + 1) { + /* A/B is 1, 2, or 3 */ + if (!BN_lshift1(T, B)) + goto err; + if (BN_ucmp(A, T) < 0) { + /* A < 2*B, so D=1 */ + if (!BN_one(D)) + goto err; + if (!BN_sub(M, A, B)) + goto err; + } else { + /* A >= 2*B, so D=2 or D=3 */ + if (!BN_sub(M, A, T)) + goto err; + if (!BN_add(D, T, B)) + goto err; /* use D (:= 3*B) as temp */ + if (BN_ucmp(A, D) < 0) { + /* A < 3*B, so D=2 */ + if (!BN_set_word(D, 2)) + goto err; + /* + * M (= A - 2*B) already has the correct value + */ + } else { + /* only D=3 remains */ + if (!BN_set_word(D, 3)) + goto err; + /* + * currently M = A - 2*B, but we need M = A - 3*B + */ + if (!BN_sub(M, M, B)) + goto err; + } + } + } else { + if (!BN_div(D, M, A, B, ctx)) + goto err; + } + + /*- + * Now + * A = D*B + M; + * thus we have + * (**) sign*Y*a == D*B + M (mod |n|). + */ + + tmp = A; /* keep the BIGNUM object, the value does not matter */ + + /* (A, B) := (B, A mod B) ... */ + A = B; + B = M; + /* ... so we have 0 <= B < A again */ + + /*- + * Since the former M is now B and the former B is now A, + * (**) translates into + * sign*Y*a == D*A + B (mod |n|), + * i.e. + * sign*Y*a - D*A == B (mod |n|). + * Similarly, (*) translates into + * -sign*X*a == A (mod |n|). + * + * Thus, + * sign*Y*a + D*sign*X*a == B (mod |n|), + * i.e. + * sign*(Y + D*X)*a == B (mod |n|). + * + * So if we set (X, Y, sign) := (Y + D*X, X, -sign), we arrive back at + * -sign*X*a == B (mod |n|), + * sign*Y*a == A (mod |n|). + * Note that X and Y stay non-negative all the time. + */ + + /* + * most of the time D is very small, so we can optimize tmp := D*X+Y + */ + if (BN_is_one(D)) { + if (!BN_add(tmp, X, Y)) + goto err; + } else { + if (BN_is_word(D, 2)) { + if (!BN_lshift1(tmp, X)) + goto err; + } else if (BN_is_word(D, 4)) { + if (!BN_lshift(tmp, X, 2)) + goto err; + } else if (D->top == 1) { + if (!BN_copy(tmp, X)) + goto err; + if (!BN_mul_word(tmp, D->d[0])) + goto err; + } else { + if (!BN_mul(tmp, D, X, ctx)) + goto err; + } + if (!BN_add(tmp, tmp, Y)) + goto err; + } + + M = Y; /* keep the BIGNUM object, the value does not matter */ + Y = X; + X = tmp; + sign = -sign; + } + } + + /*- + * The while loop (Euclid's algorithm) ends when + * A == gcd(a,n); + * we have + * sign*Y*a == A (mod |n|), + * where Y is non-negative. + */ + + if (sign < 0) { + if (!BN_sub(Y, n, Y)) + goto err; + } + /* Now Y*a == A (mod |n|). */ + + if (BN_is_one(A)) { + /* Y*a == 1 (mod |n|) */ + if (!Y->neg && BN_ucmp(Y, n) < 0) { + if (!BN_copy(R, Y)) + goto err; + } else { + if (!BN_nnmod(R, Y, n, ctx)) + goto err; + } + } else { + *pnoinv = 1; + goto err; + } + ret = R; + err: + if ((ret == NULL) && (in == NULL)) + // BN_free(R); + return 0; + BN_CTX_end(ctx); + bn_check_top(ret); + return ret; +} + +void BN_set_negative(BIGNUM *a, int b) +{ + if (b && !BN_is_zero(a)) + a->neg = 1; + else + a->neg = 0; +} + + +BIGNUM *BN_mod_inverse(BIGNUM *in, + const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx) +{ + BN_CTX *new_ctx = NULL; + BIGNUM *rv; + int noinv = 0; + + if (ctx == NULL) { + ctx = new_ctx = BN_CTX_new_ex(NULL); + if (ctx == NULL) { + //ERR_raise(ERR_LIB_BN, ERR_R_BN_LIB); + assert(0); + return NULL; + } + } + + rv = int_bn_mod_inverse(in, a, n, ctx, &noinv); + if (noinv) + // ERR_raise(ERR_LIB_BN, BN_R_NO_INVERSE); + assert(0); + return 0; + // BN_CTX_free(new_ctx); + return rv; +} + + + +int BN_sub_word(BIGNUM *a, BN_ULONG w) +{ + int i; + + bn_check_top(a); + w &= BN_MASK2; + + /* degenerate case: w is zero */ + if (!w) + return 1; + /* degenerate case: a is zero */ + if (BN_is_zero(a)) { + i = BN_set_word(a, w); + if (i != 0) + BN_set_negative(a, 1); + return i; + } + /* handle 'a' when negative */ + if (a->neg) { + a->neg = 0; + i = BN_add_word(a, w); + a->neg = 1; + return i; + } + + if ((a->top == 1) && (a->d[0] < w)) { + a->d[0] = w - a->d[0]; + a->neg = 1; + return 1; + } + i = 0; + for (;;) { + if (a->d[i] >= w) { + a->d[i] -= w; + break; + } else { + a->d[i] = (a->d[i] - w) & BN_MASK2; + i++; + w = 1; + } + } + if ((a->d[i] == 0) && (i == (a->top - 1))) + a->top--; + bn_check_top(a); + return 1; +} + +int BN_add_word(BIGNUM *a, BN_ULONG w) +{ + BN_ULONG l; + int i; + + bn_check_top(a); + w &= BN_MASK2; + + /* degenerate case: w is zero */ + if (!w) + return 1; + /* degenerate case: a is zero */ + if (BN_is_zero(a)) + return BN_set_word(a, w); + /* handle 'a' when negative */ + if (a->neg) { + a->neg = 0; + i = BN_sub_word(a, w); + if (!BN_is_zero(a)) + a->neg = !(a->neg); + return i; + } + for (i = 0; w != 0 && i < a->top; i++) { + a->d[i] = l = (a->d[i] + w) & BN_MASK2; + w = (w > l) ? 1 : 0; + } + if (w && i == a->top) { + if (bn_wexpand(a, a->top + 1) == NULL) + return 0; + a->top++; + a->d[i] = w; + } + bn_check_top(a); + return 1; +} + + + +int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) +{ + int i, ret = 0; + BIGNUM *Ri, *R; + + if (BN_is_zero(mod)) + return 0; + + BN_CTX_start(ctx); + if ((Ri = BN_CTX_get(ctx)) == NULL) + goto err; + R = &(mont->RR); /* grab RR as a temp */ + if (!BN_copy(&(mont->N), mod)) + goto err; /* Set N */ + if (BN_get_flags(mod, BN_FLG_CONSTTIME) != 0) + BN_set_flags(&(mont->N), BN_FLG_CONSTTIME); + mont->N.neg = 0; + +#ifdef MONT_WORD + { + BIGNUM tmod; + BN_ULONG buf[2]; + + bn_init(&tmod); + tmod.d = buf; + tmod.dmax = 2; + tmod.neg = 0; + + if (BN_get_flags(mod, BN_FLG_CONSTTIME) != 0) + BN_set_flags(&tmod, BN_FLG_CONSTTIME); + + mont->ri = (BN_num_bits(mod) + (BN_BITS2 - 1)) / BN_BITS2 * BN_BITS2; + +# if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32) + /* + * Only certain BN_BITS2<=32 platforms actually make use of n0[1], + * and we could use the #else case (with a shorter R value) for the + * others. However, currently only the assembler files do know which + * is which. + */ + + BN_zero(R); + if (!(BN_set_bit(R, 2 * BN_BITS2))) + goto err; + + tmod.top = 0; + if ((buf[0] = mod->d[0])) + tmod.top = 1; + if ((buf[1] = mod->top > 1 ? mod->d[1] : 0)) + tmod.top = 2; + + if (BN_is_one(&tmod)) + BN_zero(Ri); + else if ((BN_mod_inverse(Ri, R, &tmod, ctx)) == NULL) + goto err; + if (!BN_lshift(Ri, Ri, 2 * BN_BITS2)) + goto err; /* R*Ri */ + if (!BN_is_zero(Ri)) { + if (!BN_sub_word(Ri, 1)) + goto err; + } else { /* if N mod word size == 1 */ + + if (bn_expand(Ri, (int)sizeof(BN_ULONG) * 2) == NULL) + goto err; + /* Ri-- (mod double word size) */ + Ri->neg = 0; + Ri->d[0] = BN_MASK2; + Ri->d[1] = BN_MASK2; + Ri->top = 2; + } + if (!BN_div(Ri, NULL, Ri, &tmod, ctx)) + goto err; + /* + * Ni = (R*Ri-1)/N, keep only couple of least significant words: + */ + mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0; + mont->n0[1] = (Ri->top > 1) ? Ri->d[1] : 0; +# else + BN_zero(R); + if (!(BN_set_bit(R, BN_BITS2))) + goto err; /* R */ + + buf[0] = mod->d[0]; /* tmod = N mod word size */ + buf[1] = 0; + tmod.top = buf[0] != 0 ? 1 : 0; + /* Ri = R^-1 mod N */ + if (BN_is_one(&tmod)) + BN_zero(Ri); + else if ((BN_mod_inverse(Ri, R, &tmod, ctx)) == NULL) + goto err; + if (!BN_lshift(Ri, Ri, BN_BITS2)) + goto err; /* R*Ri */ + if (!BN_is_zero(Ri)) { + if (!BN_sub_word(Ri, 1)) + goto err; + } else { /* if N mod word size == 1 */ + + if (!BN_set_word(Ri, BN_MASK2)) + goto err; /* Ri-- (mod word size) */ + } + if (!BN_div(Ri, NULL, Ri, &tmod, ctx)) + goto err; + /* + * Ni = (R*Ri-1)/N, keep only least significant word: + */ + mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0; + mont->n0[1] = 0; +# endif + } +#else /* !MONT_WORD */ + { /* bignum version */ + mont->ri = BN_num_bits(&mont->N); + BN_zero(R); + if (!BN_set_bit(R, mont->ri)) + goto err; /* R = 2^ri */ + /* Ri = R^-1 mod N */ + if ((BN_mod_inverse(Ri, R, &mont->N, ctx)) == NULL) + goto err; + if (!BN_lshift(Ri, Ri, mont->ri)) + goto err; /* R*Ri */ + if (!BN_sub_word(Ri, 1)) + goto err; + /* + * Ni = (R*Ri-1) / N + */ + if (!BN_div(&(mont->Ni), NULL, Ri, &mont->N, ctx)) + goto err; + } +#endif + + /* setup RR for conversions */ + BN_zero(&(mont->RR)); + if (!BN_set_bit(&(mont->RR), mont->ri * 2)) + goto err; + if (!BN_mod(&(mont->RR), &(mont->RR), &(mont->N), ctx)) + goto err; + + for (i = mont->RR.top, ret = mont->N.top; i < ret; i++) + mont->RR.d[i] = 0; + mont->RR.top = ret; + mont->RR.flags |= BN_FLG_FIXED_TOP; + + ret = 1; + err: + BN_CTX_end(ctx); + return ret; +} + + + +// FINALLYYYYYYY + + +int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, + const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) +{ + int i, j, bits, ret = 0, wstart, wend, window; + int start = 1; + BIGNUM *d, *r; + const BIGNUM *aa; + /* Table of variables obtained from 'ctx' */ + BIGNUM *val[TABLE_SIZE]; + BN_MONT_CTX *mont = NULL; + + bn_check_top(a); + bn_check_top(p); + bn_check_top(m); + + if (!BN_is_odd(m)) { + // ERR_raise(ERR_LIB_BN, BN_R_CALLED_WITH_EVEN_MODULUS); + assert(0); + return 0; + } + + + /* + if (m->top <= BN_CONSTTIME_SIZE_LIMIT + && (BN_get_flags(p, BN_FLG_CONSTTIME) != 0 + || BN_get_flags(a, BN_FLG_CONSTTIME) != 0 + || BN_get_flags(m, BN_FLG_CONSTTIME) != 0)) { + return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont); + } +*/ + bits = BN_num_bits(p); + if (bits == 0) { + /* x**0 mod 1, or x**0 mod -1 is still zero. */ + if (BN_abs_is_word(m, 1)) { + ret = 1; + BN_zero(rr); + } else { + ret = BN_one(rr); + } + return ret; + } + + BN_CTX_start(ctx); + d = BN_CTX_get(ctx); + r = BN_CTX_get(ctx); + val[0] = BN_CTX_get(ctx); + if (val[0] == NULL) + goto err; + + /* + * If this is not done, things will break in the montgomery part + */ + + if (in_mont != NULL) + mont = in_mont; + else { + if ((mont = BN_MONT_CTX_new()) == NULL) + goto err; + if (!BN_MONT_CTX_set(mont, m, ctx)) + goto err; + } + + if (a->neg || BN_ucmp(a, m) >= 0) { + if (!BN_nnmod(val[0], a, m, ctx)) + goto err; + aa = val[0]; + } else + aa = a; + if (!bn_to_mont_fixed_top(val[0], aa, mont, ctx)) + goto err; /* 1 */ + + window = BN_window_bits_for_exponent_size(bits); + if (window > 1) { + if (!bn_mul_mont_fixed_top(d, val[0], val[0], mont, ctx)) + goto err; /* 2 */ + j = 1 << (window - 1); + for (i = 1; i < j; i++) { + if (((val[i] = BN_CTX_get(ctx)) == NULL) || + !bn_mul_mont_fixed_top(val[i], val[i - 1], d, mont, ctx)) + goto err; + } + } + + start = 1; /* This is used to avoid multiplication etc + * when there is only the value '1' in the + * buffer. */ + wstart = bits - 1; /* The top bit of the window */ + wend = 0; /* The bottom bit of the window */ + +#if 1 /* by Shay Gueron's suggestion */ + j = m->top; /* borrow j */ + if (m->d[j - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) { + if (bn_wexpand(r, j) == NULL) + goto err; + /* 2^(top*BN_BITS2) - m */ + r->d[0] = (0 - m->d[0]) & BN_MASK2; + for (i = 1; i < j; i++) + r->d[i] = (~m->d[i]) & BN_MASK2; + r->top = j; + r->flags |= BN_FLG_FIXED_TOP; + } else +#endif + if (!bn_to_mont_fixed_top(r, BN_value_one(), mont, ctx)) + goto err; + for (;;) { + int wvalue; /* The 'value' of the window */ + + if (BN_is_bit_set(p, wstart) == 0) { + if (!start) { + if (!bn_mul_mont_fixed_top(r, r, r, mont, ctx)) + goto err; + } + if (wstart == 0) + break; + wstart--; + continue; + } + /* + * We now have wstart on a 'set' bit, we now need to work out how bit + * a window to do. To do this we need to scan forward until the last + * set bit before the end of the window + */ + wvalue = 1; + wend = 0; + for (i = 1; i < window; i++) { + if (wstart - i < 0) + break; + if (BN_is_bit_set(p, wstart - i)) { + wvalue <<= (i - wend); + wvalue |= 1; + wend = i; + } + } + + /* wend is the size of the current window */ + j = wend + 1; + /* add the 'bytes above' */ + if (!start) + for (i = 0; i < j; i++) { + if (!bn_mul_mont_fixed_top(r, r, r, mont, ctx)) + goto err; + } + + /* wvalue will be an odd number < 2^window */ + if (!bn_mul_mont_fixed_top(r, r, val[wvalue >> 1], mont, ctx)) + goto err; + + /* move the 'window' down further */ + wstart -= wend + 1; + start = 0; + if (wstart < 0) + break; + } + /* + * Done with zero-padded intermediate BIGNUMs. Final BN_from_montgomery + * removes padding [if any] and makes return value suitable for public + * API consumer. + */ +#if defined(SPARC_T4_MONT) + if (OPENSSL_sparcv9cap_P[0] & (SPARCV9_VIS3 | SPARCV9_PREFER_FPU)) { + j = mont->N.top; /* borrow j */ + val[0]->d[0] = 1; /* borrow val[0] */ + for (i = 1; i < j; i++) + val[0]->d[i] = 0; + val[0]->top = j; + if (!BN_mod_mul_montgomery(rr, r, val[0], mont, ctx)) + goto err; + } else +#endif + if (!BN_from_montgomery(rr, r, mont, ctx)) + goto err; + ret = 1; + err: + if (in_mont == NULL) + // BN_MONT_CTX_free(mont); + return 0; + BN_CTX_end(ctx); + bn_check_top(rr); + return ret; +} + +int ossl_ctype_check(int c, unsigned int mask) +{ + const int max = sizeof(ctype_char_map) / sizeof(*ctype_char_map); + const int a = ossl_toascii(c); + + return a >= 0 && a < max && (ctype_char_map[a] & mask) != 0; +} + +int OPENSSL_hexchar2int(unsigned char c) +{ +#ifdef CHARSET_EBCDIC + c = os_toebcdic[c]; +#endif + + switch (c) { + case '0': + return 0; + case '1': + return 1; + case '2': + return 2; + case '3': + return 3; + case '4': + return 4; + case '5': + return 5; + case '6': + return 6; + case '7': + return 7; + case '8': + return 8; + case '9': + return 9; + case 'a': case 'A': + return 0x0A; + case 'b': case 'B': + return 0x0B; + case 'c': case 'C': + return 0x0C; + case 'd': case 'D': + return 0x0D; + case 'e': case 'E': + return 0x0E; + case 'f': case 'F': + return 0x0F; + } + return -1; +} + + +int BN_hex2bn(BIGNUM **bn, const char *a) +{ + BIGNUM *ret = NULL; + BN_ULONG l = 0; + int neg = 0, h, m, i, j, k, c; + int num; + + if (a == NULL || *a == '\0') + return 0; + + if (*a == '-') { + neg = 1; + a++; + } + + for (i = 0; i <= INT_MAX / 4 && ossl_isxdigit(a[i]); i++) + continue; + + if (i == 0 || i > INT_MAX / 4) + return 0; + + num = i + neg; + if (bn == NULL) + return num; + + /* a is the start of the hex digits, and it is 'i' long */ + if (*bn == NULL) { + if ((ret = BN_new()) == NULL) + return 0; + } else { + ret = *bn; + if (BN_get_flags(ret, BN_FLG_STATIC_DATA)) { + // ERR_raise(ERR_LIB_BN, ERR_R_PASSED_INVALID_ARGUMENT); + assert(0); + return 0; + } + BN_zero(ret); + } + + /* i is the number of hex digits */ + if (bn_expand(ret, i * 4) == NULL) + goto err; + + j = i; /* least significant 'hex' */ + m = 0; + h = 0; + while (j > 0) { + m = (BN_BYTES * 2 <= j) ? BN_BYTES * 2 : j; + l = 0; + for (;;) { + c = a[j - m]; + k = OPENSSL_hexchar2int(c); + if (k < 0) + k = 0; /* paranoia */ + l = (l << 4) | k; + + if (--m <= 0) { + ret->d[h++] = l; + break; + } + } + j -= BN_BYTES * 2; + } + ret->top = h; + bn_correct_top(ret); + + *bn = ret; + bn_check_top(ret); + /* Don't set the negative flag if it's zero. */ + if (ret->top != 0) + ret->neg = neg; + return num; + err: + if (*bn == NULL) + // BN_free(ret); + return 0; + return 0; +} + + + +// USAGE? + +//int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, BN_CTX *ctx) + +//BN_mod_exp_mont(r, a, p, m, ctx, NULL); + +void test(char * aa, char *pp, char *mm) { + + OSSL_LIB_CTX ossl; // should be initialised + + BN_CTX ctx = *BN_CTX_new_ex(&ossl); + + BIGNUM *a = BN_new(); + BIGNUM *p = BN_new(); + BIGNUM *m = BN_new(); + + BIGNUM *r = BN_new(); + + BN_hex2bn(&a, aa); + BN_hex2bn(&p, pp); + BN_hex2bn(&m, mm); + + BN_mod_exp_mont(r, a, p, m, &ctx, NULL); + + return; + +} + + diff --git a/openssl/openssl-test.h b/openssl/openssl-test.h @@ -0,0 +1,531 @@ +// +// openssl-test.h +// hello +// +// Created by Cedric Zwahlen on 07.10.2023. +// + +#ifndef openssl_test_h +#define openssl_test_h + +#include <stdio.h> +#include <assert.h> + +/* How many bignums are in each "pool item"; */ +#define BN_CTX_POOL_SIZE 16 +/* The stack frame info is resizing, set a first-time expansion size; */ +#define BN_CTX_START_FRAMES 32 + + + +#define SIXTY_FOUR_BIT // on gpu that would be 32 + +# ifdef SIXTY_FOUR_BIT +# define BN_ULONG unsigned long long +# define BN_BYTES 8 +# endif + +# ifdef THIRTY_TWO_BIT +# define BN_ULONG unsigned int +# define BN_BYTES 4 +# endif + + +# define CRYPTO_EX_INDEX__COUNT 16 + + +typedef struct bignum_st BIGNUM; + +struct bignum_st { + BN_ULONG *d; /* + * Pointer to an array of 'BN_BITS2' bit + * chunks. These chunks are organised in + * a least significant chunk first order. + */ + int top; /* Index of last used d +1. */ + /* The next are internal book keeping for bn_expand. */ + int dmax; /* Size of the d array. */ + int neg; /* one if the number is negative */ + int flags; +}; + + + + + +typedef struct crypto_ex_data_st CRYPTO_EX_DATA; + +typedef struct ex_callback_st EX_CALLBACK; + +typedef void CRYPTO_RWLOCK; + +typedef void CRYPTO_EX_new (void *parent, void *ptr, CRYPTO_EX_DATA *ad, + int idx, long argl, void *argp); +typedef void CRYPTO_EX_free (void *parent, void *ptr, CRYPTO_EX_DATA *ad, + int idx, long argl, void *argp); +typedef int CRYPTO_EX_dup (CRYPTO_EX_DATA *to, const CRYPTO_EX_DATA *from, + void *from_d, int idx, long argl, void *argp); +/* __owur */ int CRYPTO_get_ex_new_index(int class_index, long argl, void *argp, + CRYPTO_EX_new *new_func, CRYPTO_EX_dup *dup_func, + CRYPTO_EX_free *free_func); + + + +typedef struct ossl_lib_ctx_st OSSL_LIB_CTX; + + + + +struct ex_callback_st { + long argl; /* Arbitrary long */ + void *argp; /* Arbitrary void * */ + int priority; /* Priority ordering for freeing */ + CRYPTO_EX_new *new_func; + CRYPTO_EX_free *free_func; + CRYPTO_EX_dup *dup_func; +}; + + +typedef struct ex_callbacks_st { + // STACK_OF(EX_CALLBACK) *meth; + + void * meth; +} EX_CALLBACKS; + + + +typedef struct bignum_pool_item { + /* The bignum values */ + BIGNUM vals[BN_CTX_POOL_SIZE]; + /* Linked-list admin */ + struct bignum_pool_item *prev, *next; +} BN_POOL_ITEM; + + +typedef struct bignum_ctx_stack { + /* Array of indexes into the bignum stack */ + unsigned int *indexes; + /* Number of stack frames, and the size of the allocated array */ + unsigned int depth, size; +} BN_STACK; +static void BN_STACK_init(BN_STACK *); +static void BN_STACK_finish(BN_STACK *); +static int BN_STACK_push(BN_STACK *, unsigned int); +static unsigned int BN_STACK_pop(BN_STACK *); + + + + +typedef struct bignum_pool { + /* Linked-list admin */ + BN_POOL_ITEM *head, *current, *tail; + /* Stack depth and allocation size */ + unsigned used, size; +} BN_POOL; +static void BN_POOL_init(BN_POOL *); +static void BN_POOL_finish(BN_POOL *); +static BIGNUM *BN_POOL_get(BN_POOL *, int); +static void BN_POOL_release(BN_POOL *, unsigned int); + + + +typedef struct bignum_ctx BN_CTX; + +struct bignum_ctx { + /* The bignum bundles */ + BN_POOL pool; + /* The "stack frames", if you will */ + BN_STACK stack; + /* The number of bignums currently assigned */ + unsigned int used; + /* Depth of stack overflow */ + int err_stack; + /* Block "gets" until an "end" (compatibility behaviour) */ + int too_many; + /* Flags. */ + int flags; + /* The library context */ + OSSL_LIB_CTX *libctx; +}; + + +# define BN_FLG_FIXED_TOP 0 +# define bn_pollute(a) +# define bn_check_top(a) +# define bn_fix_top(a) bn_correct_top(a) +# define bn_check_size(bn, bits) +# define bn_wcheck_size(bn, words) + + + +#define BN_CONSTTIME_SIZE_LIMIT (INT_MAX / BN_BYTES / 256) + +#define BN_FLG_CONSTTIME 0x04 +# define BN_FLG_SECURE 0x08 + +# define BN_BITS2 (BN_BYTES * 8) +# define BN_BITS (BN_BITS2 * 2) +# define BN_TBIT ((BN_ULONG)1 << (BN_BITS2 - 1)) + +# define BN_MASK2 (0xffffffffffffffffLL) + +# define BN_zero(a) (BN_set_word((a),0)) + +# define ossl_inline inline + + +#define INT_MAX __INT_MAX__ + + +# define BN_BITS2 (BN_BYTES * 8) + + +#define OPENSSL_zalloc malloc +#define OPENSSL_malloc malloc +#define OPENSSL_free free + + + +# define OPENSSL_clear_free(addr, num) \ + CRYPTO_clear_free(addr, num, OPENSSL_FILE, OPENSSL_LINE) + + + +# define BN_one(a) (BN_set_word((a),1)) + +# define BN_FLG_MALLOCED 0x01 +# define BN_FLG_STATIC_DATA 0x02 + +# define BN_BITS4 16 +# define BN_MASK2l (0xffff) +# define BN_MASK2h1 (0xffff8000L) +# define BN_MASK2h (0xffff0000L) + +# define LBITS(a) ((a)&BN_MASK2l) +# define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l) +# define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2) + +# define LLBITS(a) ((a)&BN_MASKl) +# define LHBITS(a) (((a)>>BN_BITS2)&BN_MASKl) +# define LL2HBITS(a) ((BN_ULLONG)((a)&BN_MASKl)<<BN_BITS2) + +# define mul64(l,h,bl,bh) \ + { \ + BN_ULONG m,m1,lt,ht; \ + \ + lt=l; \ + ht=h; \ + m =(bh)*(lt); \ + lt=(bl)*(lt); \ + m1=(bl)*(ht); \ + ht =(bh)*(ht); \ + m=(m+m1)&BN_MASK2; ht += L2HBITS((BN_ULONG)(m < m1)); \ + ht+=HBITS(m); \ + m1=L2HBITS(m); \ + lt=(lt+m1)&BN_MASK2; ht += (lt < m1); \ + (l)=lt; \ + (h)=ht; \ + } + + +# define mul(r,a,bl,bh,c) { \ + BN_ULONG l,h; \ + \ + h= (a); \ + l=LBITS(h); \ + h=HBITS(h); \ + mul64(l,h,(bl),(bh)); \ + \ + /* non-multiply part */ \ + l+=(c); h += ((l&BN_MASK2) < (c)); \ + (c)=h&BN_MASK2; \ + (r)=l&BN_MASK2; \ + } + + +# define mul_add(r,a,bl,bh,c) { \ + BN_ULONG l,h; \ + \ + h= (a); \ + l=LBITS(h); \ + h=HBITS(h); \ + mul64(l,h,(bl),(bh)); \ + \ + /* non-multiply part */ \ + l=(l+(c))&BN_MASK2; h += (l < (c)); \ + (c)=(r); \ + l=(l+(c))&BN_MASK2; h += (l < (c)); \ + (c)=h&BN_MASK2; \ + (r)=l; \ + } + +# define sqr64(lo,ho,in) \ + { \ + BN_ULONG l,h,m; \ + \ + h=(in); \ + l=LBITS(h); \ + h=HBITS(h); \ + m =(l)*(h); \ + l*=l; \ + h*=h; \ + h+=(m&BN_MASK2h1)>>(BN_BITS4-1); \ + m =(m&BN_MASK2l)<<(BN_BITS4+1); \ + l=(l+m)&BN_MASK2; h += (l < m); \ + (lo)=l; \ + (ho)=h; \ + } + + +# define BN_window_bits_for_exponent_size(b) \ + ((b) > 671 ? 6 : \ + (b) > 239 ? 5 : \ + (b) > 79 ? 4 : \ + (b) > 23 ? 3 : 1) + +int BN_add_word(BIGNUM *a, BN_ULONG w); +int BN_sub_word(BIGNUM *a, BN_ULONG w); + +# define ossl_toascii(c) (c) + +# define ossl_isxdigit(c) (ossl_ctype_check((c), CTYPE_MASK_xdigit)) + +/* maximum precomputation table size for *variable* sliding windows */ +#define TABLE_SIZE 32 + +# define BN_mod(rem,m,d,ctx) BN_div(NULL,(rem),(m),(d),(ctx)) + +int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, + BN_CTX *ctx); + +# define CTYPE_MASK_lower 0x1 +# define CTYPE_MASK_upper 0x2 +# define CTYPE_MASK_digit 0x4 +# define CTYPE_MASK_space 0x8 +# define CTYPE_MASK_xdigit 0x10 +# define CTYPE_MASK_blank 0x20 +# define CTYPE_MASK_cntrl 0x40 +# define CTYPE_MASK_graph 0x80 +# define CTYPE_MASK_print 0x100 +# define CTYPE_MASK_punct 0x200 +# define CTYPE_MASK_base64 0x400 +# define CTYPE_MASK_asn1print 0x800 + +static const unsigned short ctype_char_map[128] = { + /* 00 nul */ CTYPE_MASK_cntrl, + /* 01 soh */ CTYPE_MASK_cntrl, + /* 02 stx */ CTYPE_MASK_cntrl, + /* 03 etx */ CTYPE_MASK_cntrl, + /* 04 eot */ CTYPE_MASK_cntrl, + /* 05 enq */ CTYPE_MASK_cntrl, + /* 06 ack */ CTYPE_MASK_cntrl, + /* 07 \a */ CTYPE_MASK_cntrl, + /* 08 \b */ CTYPE_MASK_cntrl, + /* 09 \t */ CTYPE_MASK_blank | CTYPE_MASK_cntrl | CTYPE_MASK_space, + /* 0A \n */ CTYPE_MASK_cntrl | CTYPE_MASK_space, + /* 0B \v */ CTYPE_MASK_cntrl | CTYPE_MASK_space, + /* 0C \f */ CTYPE_MASK_cntrl | CTYPE_MASK_space, + /* 0D \r */ CTYPE_MASK_cntrl | CTYPE_MASK_space, + /* 0E so */ CTYPE_MASK_cntrl, + /* 0F si */ CTYPE_MASK_cntrl, + /* 10 dle */ CTYPE_MASK_cntrl, + /* 11 dc1 */ CTYPE_MASK_cntrl, + /* 12 dc2 */ CTYPE_MASK_cntrl, + /* 13 dc3 */ CTYPE_MASK_cntrl, + /* 14 dc4 */ CTYPE_MASK_cntrl, + /* 15 nak */ CTYPE_MASK_cntrl, + /* 16 syn */ CTYPE_MASK_cntrl, + /* 17 etb */ CTYPE_MASK_cntrl, + /* 18 can */ CTYPE_MASK_cntrl, + /* 19 em */ CTYPE_MASK_cntrl, + /* 1A sub */ CTYPE_MASK_cntrl, + /* 1B esc */ CTYPE_MASK_cntrl, + /* 1C fs */ CTYPE_MASK_cntrl, + /* 1D gs */ CTYPE_MASK_cntrl, + /* 1E rs */ CTYPE_MASK_cntrl, + /* 1F us */ CTYPE_MASK_cntrl, + /* 20 */ CTYPE_MASK_blank | CTYPE_MASK_print | CTYPE_MASK_space + | CTYPE_MASK_asn1print, + /* 21 ! */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct, + /* 22 " */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct, + /* 23 # */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct, + /* 24 $ */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct, + /* 25 % */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct, + /* 26 & */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct, + /* 27 ' */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct + | CTYPE_MASK_asn1print, + /* 28 ( */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct + | CTYPE_MASK_asn1print, + /* 29 ) */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct + | CTYPE_MASK_asn1print, + /* 2A * */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct, + /* 2B + */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 2C , */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct + | CTYPE_MASK_asn1print, + /* 2D - */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct + | CTYPE_MASK_asn1print, + /* 2E . */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct + | CTYPE_MASK_asn1print, + /* 2F / */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 30 0 */ CTYPE_MASK_digit | CTYPE_MASK_graph | CTYPE_MASK_print + | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 31 1 */ CTYPE_MASK_digit | CTYPE_MASK_graph | CTYPE_MASK_print + | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 32 2 */ CTYPE_MASK_digit | CTYPE_MASK_graph | CTYPE_MASK_print + | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 33 3 */ CTYPE_MASK_digit | CTYPE_MASK_graph | CTYPE_MASK_print + | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 34 4 */ CTYPE_MASK_digit | CTYPE_MASK_graph | CTYPE_MASK_print + | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 35 5 */ CTYPE_MASK_digit | CTYPE_MASK_graph | CTYPE_MASK_print + | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 36 6 */ CTYPE_MASK_digit | CTYPE_MASK_graph | CTYPE_MASK_print + | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 37 7 */ CTYPE_MASK_digit | CTYPE_MASK_graph | CTYPE_MASK_print + | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 38 8 */ CTYPE_MASK_digit | CTYPE_MASK_graph | CTYPE_MASK_print + | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 39 9 */ CTYPE_MASK_digit | CTYPE_MASK_graph | CTYPE_MASK_print + | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 3A : */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct + | CTYPE_MASK_asn1print, + /* 3B ; */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct, + /* 3C < */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct, + /* 3D = */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 3E > */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct, + /* 3F ? */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct + | CTYPE_MASK_asn1print, + /* 40 @ */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct, + /* 41 A */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper + | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 42 B */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper + | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 43 C */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper + | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 44 D */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper + | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 45 E */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper + | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 46 F */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper + | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 47 G */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 48 H */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 49 I */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 4A J */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 4B K */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 4C L */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 4D M */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 4E N */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 4F O */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 50 P */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 51 Q */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 52 R */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 53 S */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 54 T */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 55 U */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 56 V */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 57 W */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 58 X */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 59 Y */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 5A Z */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 5B [ */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct, + /* 5C \ */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct, + /* 5D ] */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct, + /* 5E ^ */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct, + /* 5F _ */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct, + /* 60 ` */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct, + /* 61 a */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print + | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 62 b */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print + | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 63 c */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print + | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 64 d */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print + | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 65 e */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print + | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 66 f */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print + | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 67 g */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 68 h */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 69 i */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 6A j */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 6B k */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 6C l */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 6D m */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 6E n */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 6F o */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 70 p */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 71 q */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 72 r */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 73 s */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 74 t */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 75 u */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 76 v */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 77 w */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 78 x */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 79 y */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 7A z */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print + | CTYPE_MASK_base64 | CTYPE_MASK_asn1print, + /* 7B { */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct, + /* 7C | */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct, + /* 7D } */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct, + /* 7E ~ */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct, + /* 7F del */ CTYPE_MASK_cntrl +}; + + + +// maybe I should not use this is not + +#define MONT_WORD +//#define BN_MUL_COMBA + + + +void test(char * aa, char *pp, char *mm); + +#endif /* openssl_test_h */ + diff --git a/openssl/readme.txt b/openssl/readme.txt @@ -0,0 +1,24 @@ +This folder contains parts of code from the openssl project (version 3.2). + +https://github.com/openssl/openssl + +----- + +Use the program + +on Ubuntu, run these three commands: + + cmake -S . -B ./build + + cmake --build ./build --config Debug + + ./build/openssltest + +Libgcrypt is required to run the program. + +----- + +Comments + +Though it does compile, the montgomery multiplication algorithm I have copied from openssl does not run as intended. +I don't quite know why. +\ No newline at end of file diff --git a/other/.DS_Store b/other/.DS_Store Binary files differ. diff --git a/other/CMakeLists.txt b/other/CMakeLists.txt @@ -0,0 +1,15 @@ +cmake_minimum_required(VERSION 3.1) # 3.1 << C_STANDARD 11 + +project(HelloOpenCL LANGUAGES C) + +find_package(OpenCL REQUIRED) + +add_executable(${PROJECT_NAME} Main.c) + +target_link_libraries(${PROJECT_NAME} PRIVATE OpenCL::OpenCL) + +set_target_properties(${PROJECT_NAME} PROPERTIES C_STANDARD 11 + C_STANDARD_REQUIRED ON + C_EXTENSIONS OFF) + +target_compile_definitions(${PROJECT_NAME} PRIVATE CL_TARGET_OPENCL_VERSION=100) +\ No newline at end of file diff --git a/other/Main.c b/other/Main.c @@ -0,0 +1,20 @@ +// C standard includes +#include <stdio.h> + +// OpenCL includes +#include <CL/cl.h> + +int main() +{ + cl_int CL_err = CL_SUCCESS; + cl_uint numPlatforms = 0; + + CL_err = clGetPlatformIDs( 0, NULL, &numPlatforms ); + + if (CL_err == CL_SUCCESS) + printf("%u platform(s) found\n", numPlatforms); + else + printf("clGetPlatformIDs(%i)\n", CL_err); + + return 0; +} diff --git a/other/readme.txt b/other/readme.txt @@ -0,0 +1,31 @@ +a test if linking against OpenCL works + +to compile manually + +gcc -Wall -Wextra -D CL_TARGET_OPENCL_VERSION=100 Main.c -o HelloOpenCL -lOpenCL + + +with cmake... + +cmake -S . -B ./build + +then... + +cmake --build ./build --config Release + +and to run it: + +./build/HelloOpenCL + + +––––– + +If only it was that easy. If you get the -1001 error, and run your system in a VM – bad luck. +It seems difficult for VM's to facilitate two way communication with gpus, which I cannot solve. + +interestingly, at least to test whether it would build and run on systems other than macos, use + +sudo apt-get install libpocl2 + +this allows opencl to run on the cpu. +I believe, if it is run natively, the program may be able to choose which cards to use. +\ No newline at end of file diff --git a/source/.DS_Store b/source/.DS_Store Binary files differ. diff --git a/source/big-int-test.c b/source/big-int-test.c @@ -0,0 +1,1080 @@ +// +// bigNum_test.c +// hello +// +// Created by Cedric Zwahlen on 25.09.23. +// + +#include "big-int-test.h" + + + +// MARK: min functionality + +int mpModulo(DIGIT_T r[], const DIGIT_T u[], size_t udigits, + DIGIT_T v[], size_t vdigits) +{ + /* Computes r = u mod v + where r, v are multiprecision integers of length vdigits + and u is a multiprecision integer of length udigits. + r may overlap v. + + Note that r here is only vdigits long, + whereas in mpDivide it is udigits long. + + Use remainder from mpDivide function. + */ + + size_t nn = max(udigits, vdigits); +/* Allocate temp storage */ +//#ifdef NO_ALLOCS + // [v2.6] increased to two times + DIGIT_T qq[MAX_FIXED_DIGITS*2]; + DIGIT_T rr[MAX_FIXED_DIGITS*2]; + // assert(nn <= (MAX_FIXED_DIGITS*2)); +/*#else + DIGIT_T *qq, *rr; + qq = mpAlloc(udigits); + rr = mpAlloc(nn); +#endif +*/ + + /* rr[nn] = u mod v */ + mpDivide(qq, rr, u, udigits, v, vdigits); + + /* Final r is only vdigits long */ + mpSetEqual(r, rr, vdigits); + + mpDESTROY(rr, udigits); + mpDESTROY(qq, udigits); + + return 0; +} + +int mpModMult(DIGIT_T a[], const DIGIT_T x[], const DIGIT_T y[], + DIGIT_T m[], size_t ndigits) +{ /* Computes a = (x * y) mod m */ + +/* Double-length temp variable p */ +// #ifdef NO_ALLOCS + DIGIT_T p[MAX_FIXED_DIGITS * 2]; +// assert(ndigits <= MAX_FIXED_DIGITS); +/*#else + DIGIT_T *p; + p = mpAlloc(ndigits * 2); +#endif +*/ + /* Calc p[2n] = x * y */ + mpMultiply(p, x, y, ndigits); + + /* Then modulo (NOTE: a is OK at only ndigits long) */ + mpModulo(a, p, ndigits * 2, m, ndigits); + + mpDESTROY(p, ndigits * 2); + + return 0; +} + +int mpMultiply(DIGIT_T w[], const DIGIT_T u[], const DIGIT_T v[], size_t ndigits) +{ + /* Computes product w = u * v + where u, v are multiprecision integers of ndigits each + and w is a multiprecision integer of 2*ndigits + + Ref: Knuth Vol 2 Ch 4.3.1 p 268 Algorithm M. + */ + + DIGIT_T k, t[2]; + size_t i, j, m, n; + + // assert(w != u && w != v); + + m = n = ndigits; + + /* Step M1. Initialise */ + for (i = 0; i < 2 * m; i++) + w[i] = 0; + + for (j = 0; j < n; j++) + { + /* Step M2. Zero multiplier? */ + if (v[j] == 0) + { + w[j + m] = 0; + } + else + { + /* Step M3. Initialise i */ + k = 0; + for (i = 0; i < m; i++) + { + /* Step M4. Multiply and add */ + /* t = u_i * v_j + w_(i+j) + k */ + spMultiply(t, u[i], v[j]); + + t[0] += k; + if (t[0] < k) + t[1]++; + t[0] += w[i+j]; + if (t[0] < w[i+j]) + t[1]++; + + w[i+j] = t[0]; + k = t[1]; + } + /* Step M5. Loop on i, set w_(j+m) = k */ + w[j+m] = k; + } + } /* Step M6. Loop on j */ + + return 0; +} + +DIGIT_T mpAdd(DIGIT_T w[], const DIGIT_T u[], const DIGIT_T v[], size_t ndigits) +{ + /* Calculates w = u + v + where w, u, v are multiprecision integers of ndigits each + Returns carry if overflow. Carry = 0 or 1. + + Ref: Knuth Vol 2 Ch 4.3.1 p 266 Algorithm A. + */ + + DIGIT_T k; + size_t j; + + // assert(w != v); + + /* Step A1. Initialise */ + k = 0; + + for (j = 0; j < ndigits; j++) + { + /* Step A2. Add digits w_j = (u_j + v_j + k) + Set k = 1 if carry (overflow) occurs + */ + w[j] = u[j] + k; + if (w[j] < k) + k = 1; + else + k = 0; + + w[j] += v[j]; + if (w[j] < v[j]) + k++; + + } /* Step A3. Loop on j */ + + return k; /* w_n = k */ +} + +int mpDivide(DIGIT_T q[], DIGIT_T r[], const DIGIT_T u[], + size_t udigits, DIGIT_T v[], size_t vdigits) +{ /* Computes quotient q = u / v and remainder r = u mod v + where q, r, u are multiple precision digits + all of udigits and the divisor v is vdigits. + + Ref: Knuth Vol 2 Ch 4.3.1 p 272 Algorithm D. + + Do without extra storage space, i.e. use r[] for + normalised u[], unnormalise v[] at end, and cope with + extra digit Uj+n added to u after normalisation. + + WARNING: this trashes q and r first, so cannot do + u = u / v or v = u mod v. + It also changes v temporarily so cannot make it const. + */ + size_t shift; + int n, m, j; + DIGIT_T bitmask, overflow; + DIGIT_T qhat, rhat, t[2]; + DIGIT_T *uu, *ww; + int qhatOK, cmp; + + /* Clear q and r */ + mpSetZero(q, udigits); + mpSetZero(r, udigits); + + /* Work out exact sizes of u and v */ + n = (int)mpSizeof(v, vdigits); + m = (int)mpSizeof(u, udigits); + m -= n; + + /* Catch special cases */ + if (n == 0) + return -1; /* Error: divide by zero */ + + if (n == 1) + { /* Use short division instead */ + r[0] = mpShortDiv(q, u, v[0], udigits); + return 0; + } + + if (m < 0) + { /* v > u, so just set q = 0 and r = u */ + mpSetEqual(r, u, udigits); + return 0; + } + + if (m == 0) + { /* u and v are the same length */ + cmp = mpCompare(u, v, (size_t)n); + if (cmp < 0) + { /* v > u, as above */ + mpSetEqual(r, u, udigits); + return 0; + } + else if (cmp == 0) + { /* v == u, so set q = 1 and r = 0 */ + mpSetDigit(q, 1, udigits); + return 0; + } + } + + /* In Knuth notation, we have: + Given + u = (Um+n-1 ... U1U0) + v = (Vn-1 ... V1V0) + Compute + q = u/v = (QmQm-1 ... Q0) + r = u mod v = (Rn-1 ... R1R0) + */ + + /* Step D1. Normalise */ + /* Requires high bit of Vn-1 + to be set, so find most signif. bit then shift left, + i.e. d = 2^shift, u' = u * d, v' = v * d. + */ + bitmask = HIBITMASK; + for (shift = 0; shift < BITS_PER_DIGIT; shift++) + { + if (v[n-1] & bitmask) + break; + bitmask >>= 1; + } + + /* Normalise v in situ - NB only shift non-zero digits */ + overflow = mpShiftLeft(v, v, shift, n); + + /* Copy normalised dividend u*d into r */ + overflow = mpShiftLeft(r, u, shift, n + m); + uu = r; /* Use ptr to keep notation constant */ + + t[0] = overflow; /* Extra digit Um+n */ + + /* Step D2. Initialise j. Set j = m */ + for (j = m; j >= 0; j--) + { + /* Step D3. Set Qhat = [(b.Uj+n + Uj+n-1)/Vn-1] + and Rhat = remainder */ + qhatOK = 0; + t[1] = t[0]; /* This is Uj+n */ + t[0] = uu[j+n-1]; + overflow = spDivide(&qhat, &rhat, t, v[n-1]); + + /* Test Qhat */ + if (overflow) + { /* Qhat == b so set Qhat = b - 1 */ + qhat = MAX_DIGIT; + rhat = uu[j+n-1]; + rhat += v[n-1]; + if (rhat < v[n-1]) /* Rhat >= b, so no re-test */ + qhatOK = 1; + } + /* [VERSION 2: Added extra test "qhat && "] */ + if (qhat && !qhatOK && QhatTooBig(qhat, rhat, v[n-2], uu[j+n-2])) + { /* If Qhat.Vn-2 > b.Rhat + Uj+n-2 + decrease Qhat by one, increase Rhat by Vn-1 + */ + qhat--; + rhat += v[n-1]; + /* Repeat this test if Rhat < b */ + if (!(rhat < v[n-1])) + if (QhatTooBig(qhat, rhat, v[n-2], uu[j+n-2])) + qhat--; + } + + + /* Step D4. Multiply and subtract */ + ww = &uu[j]; + overflow = mpMultSub(t[1], ww, v, qhat, (size_t)n); + + /* Step D5. Test remainder. Set Qj = Qhat */ + q[j] = qhat; + if (overflow) + { /* Step D6. Add back if D4 was negative */ + q[j]--; + overflow = mpAdd(ww, ww, v, (size_t)n); + } + + t[0] = uu[j+n-1]; /* Uj+n on next round */ + + } /* Step D7. Loop on j */ + + /* Clear high digits in uu */ + for (j = n; j < m+n; j++) + uu[j] = 0; + + /* Step D8. Unnormalise. */ + + mpShiftRight(r, r, shift, n); + mpShiftRight(v, v, shift, n); + + return 0; +} + +void mpSetDigit(DIGIT_T a[], DIGIT_T d, size_t ndigits) +{ /* Sets a = d where d is a single digit */ + size_t i; + + for (i = 1; i < ndigits; i++) + { + a[i] = 0; + } + a[0] = d; +} + +DIGIT_T mpShortDiv(DIGIT_T q[], const DIGIT_T u[], DIGIT_T v, + size_t ndigits) +{ + /* Calculates quotient q = u div v + Returns remainder r = u mod v + where q, u are multiprecision integers of ndigits each + and r, v are single precision digits. + + Makes no assumptions about normalisation. + + Ref: Knuth Vol 2 Ch 4.3.1 Exercise 16 p625 + */ + size_t j; + DIGIT_T t[2], r; + size_t shift; + DIGIT_T bitmask, overflow, *uu; + + if (ndigits == 0) return 0; + if (v == 0) return 0; /* Divide by zero error */ + + /* Normalise first */ + /* Requires high bit of V + to be set, so find most signif. bit then shift left, + i.e. d = 2^shift, u' = u * d, v' = v * d. + */ + bitmask = HIBITMASK; + for (shift = 0; shift < BITS_PER_DIGIT; shift++) + { + if (v & bitmask) + break; + bitmask >>= 1; + } + + v <<= shift; + overflow = mpShiftLeft(q, u, shift, ndigits); + uu = q; + + /* Step S1 - modified for extra digit. */ + r = overflow; /* New digit Un */ + j = ndigits; + while (j--) + { + /* Step S2. */ + t[1] = r; + t[0] = uu[j]; + overflow = spDivide(&q[j], &r, t, v); + } + + /* Unnormalise */ + r >>= shift; + + return r; +} + +static int QhatTooBig(DIGIT_T qhat, DIGIT_T rhat, + DIGIT_T vn2, DIGIT_T ujn2) +{ /* Returns true if Qhat is too big + i.e. if (Qhat * Vn-2) > (b.Rhat + Uj+n-2) + */ + DIGIT_T t[2]; + + spMultiply(t, qhat, vn2); + if (t[1] < rhat) + return 0; + else if (t[1] > rhat) + return 1; + else if (t[0] > ujn2) + return 1; + + return 0; +} + +static DIGIT_T mpMultSub(DIGIT_T wn, DIGIT_T w[], const DIGIT_T v[], + DIGIT_T q, size_t n) +{ /* Compute w = w - qv + where w = (WnW[n-1]...W[0]) + return modified Wn. + */ + DIGIT_T k, t[2]; + size_t i; + + if (q == 0) /* No change */ + return wn; + + k = 0; + + for (i = 0; i < n; i++) + { + spMultiply(t, q, v[i]); + w[i] -= k; + if (w[i] > MAX_DIGIT - k) + k = 1; + else + k = 0; + w[i] -= t[0]; + if (w[i] > MAX_DIGIT - t[0]) + k++; + k += t[1]; + } + + /* Cope with Wn not stored in array w[0..n-1] */ + wn -= k; + + return wn; +} + +DIGIT_T mpShiftLeft(DIGIT_T a[], const DIGIT_T *b, + size_t shift, size_t ndigits) +{ /* Computes a = b << shift */ + /* [v2.1] Modified to cope with shift > BITS_PERDIGIT */ + size_t i, y, nw, bits; + DIGIT_T mask, carry, nextcarry; + + /* Do we shift whole digits? */ + if (shift >= BITS_PER_DIGIT) + { + nw = shift / BITS_PER_DIGIT; + i = ndigits; + while (i--) + { + if (i >= nw) + a[i] = b[i-nw]; + else + a[i] = 0; + } + /* Call again to shift bits inside digits */ + bits = shift % BITS_PER_DIGIT; + carry = b[ndigits-nw] << bits; + if (bits) + carry |= mpShiftLeft(a, a, bits, ndigits); + return carry; + } + else + { + bits = shift; + } + + /* Construct mask = high bits set */ + mask = ~(~(DIGIT_T)0 >> bits); + + y = BITS_PER_DIGIT - bits; + carry = 0; + for (i = 0; i < ndigits; i++) + { + nextcarry = (b[i] & mask) >> y; + a[i] = b[i] << bits | carry; + carry = nextcarry; + } + + return carry; +} + +DIGIT_T mpShiftRight(DIGIT_T a[], const DIGIT_T b[], size_t shift, size_t ndigits) +{ /* Computes a = b >> shift */ + /* [v2.1] Modified to cope with shift > BITS_PERDIGIT */ + size_t i, y, nw, bits; + DIGIT_T mask, carry, nextcarry; + + /* Do we shift whole digits? */ + if (shift >= BITS_PER_DIGIT) + { + nw = shift / BITS_PER_DIGIT; + for (i = 0; i < ndigits; i++) + { + if ((i+nw) < ndigits) + a[i] = b[i+nw]; + else + a[i] = 0; + } + /* Call again to shift bits inside digits */ + bits = shift % BITS_PER_DIGIT; + carry = b[nw-1] >> bits; + if (bits) + carry |= mpShiftRight(a, a, bits, ndigits); + return carry; + } + else + { + bits = shift; + } + + /* Construct mask to set low bits */ + /* (thanks to Jesse Chisholm for suggesting this improved technique) */ + mask = ~(~(DIGIT_T)0 << bits); + + y = BITS_PER_DIGIT - bits; + carry = 0; + i = ndigits; + while (i--) + { + nextcarry = (b[i] & mask) << y; + a[i] = b[i] >> bits | carry; + carry = nextcarry; + } + + return carry; +} + + +int spMultiply(uint32_t p[2], uint32_t x, uint32_t y) +{ + /* Use a 64-bit temp for product */ + uint64_t t = (uint64_t)x * (uint64_t)y; + /* then split into two parts */ + p[1] = (uint32_t)(t >> 32); + p[0] = (uint32_t)(t & 0xFFFFFFFF); + + return 0; +} + +uint32_t spDivide(uint32_t *pq, uint32_t *pr, const uint32_t u[2], uint32_t v) +{ + uint64_t uu, q; + uu = (uint64_t)u[1] << 32 | (uint64_t)u[0]; + q = uu / (uint64_t)v; + //r = uu % (uint64_t)v; + *pr = (uint32_t)(uu - q * v); + *pq = (uint32_t)(q & 0xFFFFFFFF); + return (uint32_t)(q >> 32); +} + +int mpCompare(const DIGIT_T a[], const DIGIT_T b[], size_t ndigits) +{ + /* if (ndigits == 0) return 0; // deleted [v2.5] */ + + while (ndigits--) + { + if (a[ndigits] > b[ndigits]) + return 1; /* GT */ + if (a[ndigits] < b[ndigits]) + return -1; /* LT */ + } + + return 0; /* EQ */ +} + +void mpSetEqual(DIGIT_T a[], const DIGIT_T b[], size_t ndigits) +{ /* Sets a = b */ + size_t i; + + for (i = 0; i < ndigits; i++) + { + a[i] = b[i]; + } +} + +volatile DIGIT_T mpSetZero(volatile DIGIT_T a[], size_t ndigits) +{ /* Sets a = 0 */ + + /* Prevent optimiser ignoring this */ + volatile DIGIT_T optdummy; + volatile DIGIT_T *p = a; + + while (ndigits--) + a[ndigits] = 0; + + optdummy = *p; + return optdummy; +} + +size_t mpSizeof(const DIGIT_T a[], size_t ndigits) +{ + while(ndigits--) + { + if (a[ndigits] != 0) + return (++ndigits); + } + return 0; +} + +// MARK: HELPERS + + +size_t mpConvToOctets(const DIGIT_T a[], size_t ndigits, unsigned char *c, size_t nbytes) +/* Convert big digit a into string of octets, in big-endian order, + padding on the left to nbytes or truncating if necessary. + Return number of octets required excluding leading zero bytes. +*/ +{ + int j, k, len; + DIGIT_T t; + size_t i, noctets, nbits; + + nbits = mpBitLength(a, ndigits); + noctets = (nbits + 7) / 8; + + len = (int)nbytes; + + for (i = 0, j = len - 1; i < ndigits && j >= 0; i++) + { + t = a[i]; + for (k = 0; j >= 0 && k < BITS_PER_DIGIT; j--, k += 8) + c[j] = (unsigned char)(t >> k); + } + + for ( ; j >= 0; j--) + c[j] = 0; + + return (size_t)noctets; +} + + + +size_t mpConvFromOctets(DIGIT_T a[], size_t ndigits, const unsigned char *c, size_t nbytes) +/* Converts nbytes octets into big digit a of max size ndigits + Returns actual number of digits set (may be larger than mpSizeof) +*/ +{ + size_t i; + int j, k; + DIGIT_T t; + + mpSetZero(a, ndigits); + + /* Read in octets, least significant first */ + /* i counts into big_d, j along c, and k is # bits to shift */ + for (i = 0, j = (int)nbytes - 1; i < ndigits && j >= 0; i++) + { + t = 0; + for (k = 0; j >= 0 && k < BITS_PER_DIGIT; j--, k += 8) + t |= ((DIGIT_T)c[j]) << k; + a[i] = t; + } + + return i; +} + + +size_t mpConvFromHex(DIGIT_T a[], size_t ndigits, const char *s) +/* Convert a string in hexadecimal format to a big digit. + Return actual number of digits set (may be larger than mpSizeof). + Just ignores invalid characters in s. +*/ +{ +//#ifdef NO_ALLOCS + uint8_t newdigits[MAX_ALLOC_SIZE*2]; // [v2.6] increased +/*#else + uint8_t *newdigits; +#endif*/ + size_t newlen; + size_t n; + unsigned long t; + size_t i, j; + + mpSetZero(a, ndigits); + + /* Create some temp storage for int values */ + n = strlen(s); + if (0 == n) return 0; + newlen = uiceil(n * 0.5); /* log(16)/log(256)=0.5 */ + ALLOC_BYTES(newdigits, newlen); + + /* Work through zero-terminated string */ + for (i = 0; s[i]; i++) + { + t = s[i]; + if ((t >= '0') && (t <= '9')) t = (t - '0'); + else if ((t >= 'a') && (t <= 'f')) t = (t - 'a' + 10); + else if ((t >= 'A') && (t <= 'F')) t = (t - 'A' + 10); + else continue; + for (j = newlen; j > 0; j--) + { + t += (unsigned long)newdigits[j-1] << 4; + newdigits[j-1] = (unsigned char)(t & 0xFF); + t >>= 8; + } + } + + /* Convert bytes to big digits */ + n = mpConvFromOctets(a, ndigits, newdigits, newlen); + + /* Clean up */ + FREE_BYTES(newdigits, newlen); + + return n; +} + +static size_t uiceil(double x) +/* Returns ceil(x) as a non-negative integer or 0 if x < 0 */ +{ + size_t c; + + if (x < 0) return 0; + c = (size_t)x; + if ((x - c) > 0.0) + c++; + + return c; +} + +volatile uint8_t zeroise_bytes(volatile void *v, size_t n) +{ /* Zeroise byte array b and make sure optimiser does not ignore this */ + volatile uint8_t optdummy; + volatile uint8_t *b = (uint8_t*)v; + while(n--) + b[n] = 0; + optdummy = *b; + return optdummy; +} + +void mpFail(char *msg) +{ + perror(msg); + printf("the program should stop here"); +} + +size_t mpBitLength(const DIGIT_T d[], size_t ndigits) +/* Returns no of significant bits in d */ +{ + size_t n, i, bits; + DIGIT_T mask; + + if (!d || ndigits == 0) + return 0; + + n = mpSizeof(d, ndigits); + if (0 == n) return 0; + + for (i = 0, mask = HIBITMASK; mask > 0; mask >>= 1, i++) + { + if (d[n-1] & mask) + break; + } + + bits = n * BITS_PER_DIGIT - i; + + return bits; +} + +void mpPrintHex(const char *prefix, const DIGIT_T *a, size_t len, const char *suffix) +{ + if (prefix) printf("%s", prefix); + /* Trim leading digits which are zero */ + while (len--) + { + if (a[len] != 0) + break; + } + len++; + if (0 == len) len = 1; + /* print first digit without leading zeros */ + printf("%" PRIxBIGD, a[--len]); + while (len--) + { + printf("%08" PRIxBIGD, a[len]); + } + if (suffix) printf("%s", suffix); +} + + +int mpModExpO(DIGIT_T yout[], const DIGIT_T x[], const DIGIT_T e[], DIGIT_T m[], size_t ndigits) +{ /* Computes y = x^e mod m */ + /* "Classic" binary left-to-right method */ + /* [v2.2] removed const restriction on m[] to avoid using an extra alloc'd var + (m is changed in-situ during the divide operation then restored) */ + DIGIT_T mask; + size_t n; + size_t nn = ndigits * 2; + /* Create some double-length temps */ +//#ifdef NO_ALLOCS + DIGIT_T t1[MAX_FIXED_DIGITS * 2]; + DIGIT_T t2[MAX_FIXED_DIGITS * 2]; + DIGIT_T y[MAX_FIXED_DIGITS * 2]; + assert(ndigits <= MAX_FIXED_DIGITS); +/*#else + DIGIT_T *t1, *t2, *y; + t1 = mpAlloc(nn); + t2 = mpAlloc(nn); + y = mpAlloc(nn); +#endif + */ + assert(ndigits != 0); + + n = mpSizeof(e, ndigits); + /* Catch e==0 => x^0=1 */ + if (0 == n) + { + mpSetDigit(yout, 1, ndigits); + goto done; + } + /* Find second-most significant bit in e */ + for (mask = HIBITMASK; mask > 0; mask >>= 1) + { + if (e[n-1] & mask) + break; + } + mpNEXTBITMASK(mask, n); + + /* Set y = x */ + mpSetEqual(y, x, ndigits); + + /* For bit j = k-2 downto 0 */ + while (n) + { + /* Square y = y * y mod n */ + mpMODSQUARETEMP(y, m, ndigits, t1, t2); + if (e[n-1] & mask) + { /* if e(j) == 1 then multiply + y = y * x mod n */ + mpMODMULTTEMP(y, x, m, ndigits, t1, t2); + } + + /* Move to next bit */ + mpNEXTBITMASK(mask, n); + } + + /* Return y */ + mpSetEqual(yout, y, ndigits); + +done: + mpDESTROY(t1, nn); + mpDESTROY(t2, nn); + mpDESTROY(y, ndigits); + + return 0; +} + +int mpSquare(DIGIT_T w[], const DIGIT_T x[], size_t ndigits) +/* New in Version 2.0 */ +{ + /* Computes square w = x * x + where x is a multiprecision integer of ndigits + and w is a multiprecision integer of 2*ndigits + + Ref: Menezes p596 Algorithm 14.16 with errata. + */ + + DIGIT_T k, p[2], u[2], cbit, carry; + size_t i, j, t, i2, cpos; + + assert(w != x); + + t = ndigits; + + /* 1. For i from 0 to (2t-1) do: w_i = 0 */ + i2 = t << 1; + for (i = 0; i < i2; i++) + w[i] = 0; + + carry = 0; + cpos = i2-1; + /* 2. For i from 0 to (t-1) do: */ + for (i = 0; i < t; i++) + { + /* 2.1 (uv) = w_2i + x_i * x_i, w_2i = v, c = u + Careful, w_2i may be double-prec + */ + i2 = i << 1; /* 2*i */ + spMultiply(p, x[i], x[i]); + p[0] += w[i2]; + if (p[0] < w[i2]) + p[1]++; + k = 0; /* p[1] < b, so no overflow here */ + if (i2 == cpos && carry) + { + p[1] += carry; + if (p[1] < carry) + k++; + carry = 0; + } + w[i2] = p[0]; + u[0] = p[1]; + u[1] = k; + + /* 2.2 for j from (i+1) to (t-1) do: + (uv) = w_{i+j} + 2x_j * x_i + c, + w_{i+j} = v, c = u, + u is double-prec + w_{i+j} is dbl if [i+j] == cpos + */ + k = 0; + for (j = i+1; j < t; j++) + { + /* p = x_j * x_i */ + spMultiply(p, x[j], x[i]); + /* p = 2p <=> p <<= 1 */ + cbit = (p[0] & HIBITMASK) != 0; + k = (p[1] & HIBITMASK) != 0; + p[0] <<= 1; + p[1] <<= 1; + p[1] |= cbit; + /* p = p + c */ + p[0] += u[0]; + if (p[0] < u[0]) + { + p[1]++; + if (p[1] == 0) + k++; + } + p[1] += u[1]; + if (p[1] < u[1]) + k++; + /* p = p + w_{i+j} */ + p[0] += w[i+j]; + if (p[0] < w[i+j]) + { + p[1]++; + if (p[1] == 0) + k++; + } + if ((i+j) == cpos && carry) + { /* catch overflow from last round */ + p[1] += carry; + if (p[1] < carry) + k++; + carry = 0; + } + /* w_{i+j} = v, c = u */ + w[i+j] = p[0]; + u[0] = p[1]; + u[1] = k; + } + /* 2.3 w_{i+t} = u */ + w[i+t] = u[0]; + /* remember overflow in w_{i+t} */ + carry = u[1]; + cpos = i+t; + } + + /* (NB original step 3 deleted in Menezes errata) */ + + /* Return w */ + + return 0; +} + + +size_t mpConvToHex(const DIGIT_T a[], size_t ndigits, char *s, size_t smax) +/* Convert big digit a into a string in hexadecimal format, + where s has max size smax. + Return number of chars set excluding leading zeroes. +*/ +{ + return conv_to_base(a, ndigits, s, smax, 16); +} + + +static size_t conv_to_base(const DIGIT_T a[], size_t ndigits, char *s, size_t smax, int base) +/* Convert big digit a into a string in given base format, + where s has max size smax. + Return number of chars set excluding leading zeroes. + smax can be 0 to find out the required length. +*/ +{ + + uint8_t bytes[MAX_ALLOC_SIZE], newdigits[MAX_ALLOC_SIZE*3]; // [v2.6] increased + + const char DEC_DIGITS[] = "0123456789"; + const char HEX_DIGITS[] = "0123456789abcdef"; + size_t newlen, nbytes, nchars; + size_t n; + unsigned long t; + size_t i, j, isig; + const char *digits; + double factor; + + switch (base) + { + case 10: + digits = DEC_DIGITS; + factor = 2.40824; /* log(256)/log(10)=2.40824 */ + break; + case 16: + digits = HEX_DIGITS; + factor = 2.0; /* log(256)/log(16)=2.0 */ + break; + default: + assert (10 == base || 16 == base); + return 0; + } + + /* Set up output string with null chars */ + if (smax > 0 && s) + { + memset(s, '0', smax-1); + s[smax-1] = '\0'; + } + + /* Catch zero input value (return 1 not zero) */ + if (mpIsZero(a, ndigits)) + { + if (smax > 0 && s) + s[1] = '\0'; + return 1; + } + + /* First, we convert to 8-bit octets (bytes), which are easier to handle */ + nbytes = ndigits * BITS_PER_DIGIT / 8; + ALLOC_BYTES(bytes, nbytes); + + n = mpConvToOctets(a, ndigits, bytes, nbytes); + + /* Create some temp storage for int values */ + newlen = uiceil(n * factor); + ALLOC_BYTES(newdigits, newlen); + + for (i = 0; i < nbytes; i++) + { + t = bytes[i]; + for (j = newlen; j > 0; j--) + { + t += (unsigned long)newdigits[j-1] * 256; + newdigits[j-1] = (unsigned char)(t % base); + t /= base; + } + } + + /* Find index of leading significant digit */ + for (isig = 0; isig < newlen; isig++) + if (newdigits[isig]) + break; + + nchars = newlen - isig; + + /* Convert to a null-terminated string of decimal chars */ + /* up to limit, unless user has specified null or size == 0 */ + if (smax > 0 && s) + { + for (i = 0; i < nchars && i < smax-1; i++) + { + s[i] = digits[newdigits[isig+i]]; + } + s[i] = '\0'; + } + + FREE_BYTES(bytes, nbytes); + FREE_BYTES(newdigits, newlen); + + return nchars; +} + +int mpIsZero(const DIGIT_T a[], size_t ndigits) +{ + size_t i; + + /* if (ndigits == 0) return -1; // deleted [v2.5] */ + + for (i = 0; i < ndigits; i++) /* Start at lsb */ + { + if (a[i] != 0) + return 0; /* False */ + } + + return (!0); /* True */ +} diff --git a/source/big-int-test.h b/source/big-int-test.h @@ -0,0 +1,127 @@ +// +// Created by Cedric Zwahlen on 25.09.23. +// + +#ifndef big_int_test_h +#define big_int_test_h + +#include <stdio.h> + + + + +#include <inttypes.h> + +#include <stdint.h> +#include <stdlib.h> + +#include <string.h> // only used for the convert from hex function +#include <assert.h> + +// MARK: definitions + +typedef uint32_t DIGIT_T; // for gpu might need to be half? is that half? + +typedef uint16_t HALF_DIGIT_T; + +/* Sizes to match */ + + + +// MARK: MACROS + +#define mpDESTROY(b, n) do{if(b)mpSetZero(b,n);}while(0) +#define max(a,b) (((a) > (b)) ? (a) : (b)) + +// only for that string conversion +#define ALLOC_BYTES(b,n) do{assert((n)<=sizeof((b)));zeroise_bytes((b),(n));}while(0) +#define FREE_BYTES(b,n) zeroise_bytes((b),(n)) + + +#define MAX_DIGIT 0xFFFFFFFFUL +#define MAX_HALF_DIGIT 0xFFFFUL /* NB 'L' */ +#define BITS_PER_DIGIT 32 +#define HIBITMASK 0x80000000UL + +#define MAX_FIXED_BIT_LENGTH 8192 +#define MAX_FIXED_DIGITS ((MAX_FIXED_BIT_LENGTH + BITS_PER_DIGIT - 1) / BITS_PER_DIGIT) + +#define MAX_ALLOC_SIZE (MAX_FIXED_DIGITS*BYTES_PER_DIGIT) + +#define BYTES_PER_DIGIT (BITS_PER_DIGIT / 8) + +#define PRIuBIGD PRIu32 +#define PRIxBIGD PRIx32 +#define PRIXBIGD PRIX32 + +/* MACROS TO DO MODULAR SQUARING AND MULTIPLICATION USING PRE-ALLOCATED TEMPS */ +/* Required lengths |y|=|t1|=|t2|=2*n, |m|=n; but final |y|=n */ +/* Square: y = (y * y) mod m */ +#define mpMODSQUARETEMP(y,m,n,t1,t2) do{mpSquare(t1,y,n);mpDivide(t2,y,t1,n*2,m,n);}while(0) +/* Mult: y = (y * x) mod m */ +#define mpMODMULTTEMP(y,x,m,n,t1,t2) do{mpMultiply(t1,x,y,n);mpDivide(t2,y,t1,n*2,m,n);}while(0) +/* Mult: w = (y * x) mod m */ +#define mpMODMULTXYTEMP(w,y,x,m,n,t1,t2) do{mpMultiply(t1,x,y,(n));mpDivide(t2,w,t1,(n)*2,m,(n));}while(0) + +#define mpNEXTBITMASK(mask, n) do{if(mask==1){mask=HIBITMASK;n--;}else{mask>>=1;}}while(0) + +int mpModulo(DIGIT_T r[], const DIGIT_T u[], size_t udigits, DIGIT_T v[], size_t vdigits); + +int mpModMult(DIGIT_T a[], const DIGIT_T x[], const DIGIT_T y[], DIGIT_T m[], size_t ndigits); + +int mpMultiply(DIGIT_T w[], const DIGIT_T u[], const DIGIT_T v[], size_t ndigits); +DIGIT_T mpAdd(DIGIT_T w[], const DIGIT_T u[], const DIGIT_T v[], size_t ndigits); +int mpDivide(DIGIT_T q[], DIGIT_T r[], const DIGIT_T u[], size_t udigits, DIGIT_T v[], size_t vdigits); +static int QhatTooBig(DIGIT_T qhat, DIGIT_T rhat, DIGIT_T vn2, DIGIT_T ujn2); +static DIGIT_T mpMultSub(DIGIT_T wn, DIGIT_T w[], const DIGIT_T v[], DIGIT_T q, size_t n); +DIGIT_T mpShiftLeft(DIGIT_T a[], const DIGIT_T *b, size_t shift, size_t ndigits); + + +void mpSetDigit(DIGIT_T a[], DIGIT_T d, size_t ndigits); + +int mpCompare(const DIGIT_T a[], const DIGIT_T b[], size_t ndigits); + + +DIGIT_T mpShiftRight(DIGIT_T a[], const DIGIT_T b[], size_t shift, size_t ndigits); +int spMultiply(uint32_t p[2], uint32_t x, uint32_t y); +uint32_t spDivide(uint32_t *pq, uint32_t *pr, const uint32_t u[2], uint32_t v); + +int mpSquare(DIGIT_T w[], const DIGIT_T x[], size_t ndigits); + +size_t mpBitLength(const DIGIT_T d[], size_t ndigits); + +size_t mpConvToOctets(const DIGIT_T a[], size_t ndigits, unsigned char *c, size_t nbytes); + +DIGIT_T mpShortDiv(DIGIT_T q[], const DIGIT_T u[], DIGIT_T v, + size_t ndigits); + +void mpSetEqual(DIGIT_T a[], const DIGIT_T b[], size_t ndigits); + + + +size_t mpSizeof(const DIGIT_T a[], size_t ndigits); + +volatile DIGIT_T mpSetZero(volatile DIGIT_T a[], size_t ndigits); + + +void mpPrintDecimal(const char *prefix, const DIGIT_T *a, size_t ndigits, const char *suffix); + + +size_t mpConvFromOctets(DIGIT_T a[], size_t ndigits, const unsigned char *c, size_t nbytes); +size_t mpConvFromHex(DIGIT_T a[], size_t ndigits, const char *s); + +static size_t uiceil(double x); +volatile uint8_t zeroise_bytes(volatile void *v, size_t n); +void mpFail(char *msg); + +void mpPrintHex(const char *prefix, const DIGIT_T *a, size_t len, const char *suffix); + +int mpModExpO(DIGIT_T yout[], const DIGIT_T x[], const DIGIT_T e[], DIGIT_T m[], size_t ndigits); + +static size_t conv_to_base(const DIGIT_T a[], size_t ndigits, char *s, size_t smax, int base); + +int mpIsZero(const DIGIT_T a[], size_t ndigits); + +size_t mpConvToHex(const DIGIT_T a[], size_t ndigits, char *s, size_t smax); + +#endif /* big_int_test_h */ diff --git a/source/lib-gpu-verify.c b/source/lib-gpu-verify.c @@ -0,0 +1,589 @@ +// +// File: hello.c +// +// Abstract: A simple "Hello World" compute example showing basic usage of OpenCL which +// calculates the mathematical square (X[i] = pow(X[i],2)) for a buffer of +// floating point values. +// +// +// Version: <1.0> +// +// Disclaimer: IMPORTANT: This Apple software is supplied to you by Apple Inc. ("Apple") +// in consideration of your agreement to the following terms, and your use, +// installation, modification or redistribution of this Apple software +// constitutes acceptance of these terms. If you do not agree with these +// terms, please do not use, install, modify or redistribute this Apple +// software. +// +// In consideration of your agreement to abide by the following terms, and +// subject to these terms, Apple grants you a personal, non - exclusive +// license, under Apple's copyrights in this original Apple software ( the +// "Apple Software" ), to use, reproduce, modify and redistribute the Apple +// Software, with or without modifications, in source and / or binary forms; +// provided that if you redistribute the Apple Software in its entirety and +// without modifications, you must retain this notice and the following text +// and disclaimers in all such redistributions of the Apple Software. Neither +// the name, trademarks, service marks or logos of Apple Inc. may be used to +// endorse or promote products derived from the Apple Software without specific +// prior written permission from Apple. Except as expressly stated in this +// notice, no other rights or licenses, express or implied, are granted by +// Apple herein, including but not limited to any patent rights that may be +// infringed by your derivative works or by other works in which the Apple +// Software may be incorporated. +// +// The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO +// WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED +// WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION +// ALONE OR IN COMBINATION WITH YOUR PRODUCTS. +// +// IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR +// CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION +// AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER +// UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR +// OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Copyright ( C ) 2008 Apple Inc. All Rights Reserved. +// + +//////////////////////////////////////////////////////////////////////////////// + + + +#include "big-int-test.h" + +#include "rsa-test.h" +#include "opencl-test.h" + + + +// +// +//#include <stdio.h> +//#include <string.h> +//#include <time.h> +//#include "more-tests.h" +// +// +//#define LUT_SIZE 2049 +// +//static struct bn lut[LUT_SIZE]; +// +///* +// * Performs bitwise Montgomery modular multiplication ( X*Y*R^(-1) mod M) +// * +// * Parameters: +// * x,y,m - bignums +// * mBits - # of bits in m +// * out - bignum result +// */ +// +//void montMult(struct bn* x, struct bn* y, struct bn* m, int mBits, struct bn* out){ +// +// struct bn t; +// bignum_init(&t); +// +// int i; +// for(i = mBits; i > 0 ; i--){ //efficient loop exit +// +// int t0Bit = bignum_getbit(&t,0); +// int xiBit = bignum_getbit(x, mBits - i); //loop exit requires subtraction here +// int y0Bit = bignum_getbit(y,0); +// int op = t0Bit + (xiBit * y0Bit); +// +// if(xiBit == 1){ +// bignum_add(&t, y, &t); +// } +// +// if(op == 1){ +// bignum_add(&t, m, &t); +// } +// +// bignum_rshift(&t,&t, 1); +// } +// +// if(bignum_cmp(&t, m) >= 0){ +// bignum_sub(&t,m,&t); +// } +// +// bignum_assign(out,&t); +//} +// +// +// +///*mod exp, no LUT */ +// +//void modExp(struct bn* x, struct bn* e, int eBits, struct bn* m, int mBits, struct bn* r2m, struct bn* out){ +// +// struct bn z,one; +// struct bn parr[3]; +// struct bn zarr[3]; +// +// //reduce z? +// bignum_from_int(&z, 1); +// montMult(&z,r2m,m, mBits, &zarr[1]); +// +// //reduce x, assign to p +// montMult(x,r2m,m, mBits,&parr[1]); +// +// struct bn tm; +// +// int i = 0; +// for(; i < eBits; i++){ +// +// bignum_assign(&tm, &parr[1]); +// montMult(&tm,&parr[1],m, mBits, &parr[2]); +// +// if(bignum_getbit(e, i) == 1){ +// montMult(&zarr[1],&parr[1],m,mBits,&zarr[2]); +// }else{ +// bignum_assign(&zarr[2],&zarr[1]); +// } +// +// //printf("num bits p: %d, num bits z: %d\n", bignum_numbits(&parr[1]), bignum_numbits(&zarr[1])); +// bignum_assign(&parr[1], &parr[2]); +// bignum_assign(&zarr[1], &zarr[2]); +// } +// +// bignum_from_int(&one, 1); +// montMult(&zarr[1], &one, m, mBits, out); +//} +// +// +///* Mod Exp using precomputed LUT */ +// +//void modExpLUT(struct bn* x, struct bn* e, int eBits, struct bn* m, int mBits, struct bn* r2m, struct bn* out){ +// +// struct bn z,one; +// struct bn parr[3]; +// struct bn zarr[3]; +// +// //reduce z? +// bignum_from_int(&z, 1); +// montMult(&z,r2m,m, mBits, &zarr[1]); +// +// bignum_assign(&parr[1],&lut[0]); +// +// int b = 1; +// int i = 0; +// for(; i < eBits; i++){ +// bignum_assign(&parr[2],&lut[i+1]); +// +// if(bignum_getbit(e, i) == 1){ +// montMult(&zarr[1],&lut[i],m,mBits,&zarr[2]); +// }else{ +// bignum_assign(&zarr[2],&zarr[1]); +// } +// +// //printf("num bits p: %d, num bits z: %d\n", bignum_numbits(&parr[1]), bignum_numbits(&zarr[1])); +// bignum_assign(&parr[1], &parr[2]); +// bignum_assign(&zarr[1], &zarr[2]); +// b++; +// } +// +// bignum_from_int(&one, 1); +// montMult(&zarr[1], &one, m, mBits, out); +//} +// +///* +//void genLUT(struct bn* valToDec, struct bn* m, int mBits, struct bn* r2m) { +// +// struct bn two; +// bignum_from_int(&two, 2); +// +// bignum_assign(&lut[0],valToDec); +// +// int i; +// for (i = 1; i < (LUT_SIZE + 1); i++) { +// struct bn temp; +// bignum_assign(&temp,&lut[i-1]); +// struct bn tmp, tmp1, tmp2; +// bignum_assign(&tmp2, &two); +// bignum_pow(&temp, &tmp2, &tmp); +// bignum_mod(&tmp, m, &tmp1); +// bignum_assign(&lut[i],&tmp1); +// } +// +// int a = 0; +// FILE *f; +// f = fopen("LUT.txt", "a"); +// +// for(; a < (mBits + 1); a++){ +// montMult(&lut[a],r2m,m, mBits, &lut[a]); +// int size = 8192; +// char str[size]; +// bignum_to_string(&lut[a],str, size); +// fprintf(f,"%s\n", str); +// } +// +// fclose(f); +//} +//*/ +// +//int parseLUT(int start, int num){ +// +// FILE *f; +// f = fopen("./LUT.txt", "r"); +// char str[num+1]; +// static char* zpad[8] = {"", "0", "00", "000", "0000", "00000", "000000", "0000000"}; +// //printf("Parse LUT: %d to %d\n", start, num + start); +// +// //puts(""); +// int i = 0; +// int a = 0; +// for(; i < (num + start); i++){ +// fscanf(f, "%s\n", str); +// +// if(i >= start){ +// +// int len = strlen(str); +// char* dup; +// if((len & 1) == 1){ +// sprintf(str, "%s%s", zpad[1], (dup = strdup(str))); +// len++; +// } +// +// int lenMod8 = len -((len >> 3) << 3); +// if(lenMod8 != 0){ +// sprintf(str, "%s%s", zpad[lenMod8], (dup = strdup(str))); +// len += lenMod8; +// } +// bignum_from_string(&lut[i-start],str,len); +// } +// a++; +// } +// return i; +//} +// +//int main(void) { +// +// /* ----------- 12-bit Test -------------- */ +// +// struct bn n,e,d,r2m; +// bignum_from_int(&n, 3233); //modulus +// bignum_from_int(&e, 17); //public +// bignum_from_int(&d, 2753); //private +// bignum_from_int(&r2m, 1179); //R^2m mod M +// +// struct bn valToDec; //value to decrypt/encrypt +// bignum_from_int(&valToDec, 855); +// +// int nBits = bignum_numbits(&n); +// int dBits = bignum_numbits(&d); +// +// struct bn result; +// bignum_init(&result); +// +// clock_t before = clock(); +// modExp(&valToDec, &d, dBits, &n, nBits, &r2m, &result); +// clock_t after = clock(); +// +// double msec = (double)(after - before) / CLOCKS_PER_SEC; +// +// //print result and timing +// printf("-------Test 1--------\n"); +// printf(" RSA Keysize: %3d [bits]\n",dBits); +// printf(" RSA Result: "); +// bignum_print(&result); +// printf("Time(no LUT): %.5f [sec]\n", msec); +// +// bignum_init(&result); +// +// //genLUT(&valToDec, &n, nBits, &r2m); +// int lutSeek = parseLUT(0,dBits+1); +// //printf("Lutseek: %d\n", lutSeek); +// +// before = clock(); +// modExpLUT(&valToDec, &d, dBits, &n, nBits, &r2m, &result); +// after = clock(); +// +// msec = (double)(after - before) / CLOCKS_PER_SEC; +// +// //print result and timing +// printf(" RSA Result: "); +// bignum_print(&result); +// printf(" Time(LUT): %.5f [sec]\n", msec); +// +// /* ----------- 512-bit Modulus Test -------------- */ +// +// +// struct bn n512, pub, priv, v2Dec; +// int e1 = 65537; +// bignum_from_int(&pub,e1); +// char str1[] = "758463d46999c11496449db8dddd1e407de2e9a8f33612f454866acddd759da8173d4e3fe8c4eaf121f86f87ac8e1d58f54e2c6a80bcf8c404884795252224ad"; +// bignum_from_string(&n512,str1, 128); +// char str2[] = "68827b718d1452d4e72a5085f6b14dd516df34e3ae9fb94d96da0fa3d33e651cc244b0275a24ab0753b5c01eac2f8f0d700c587bbd6d8aeb6a4e99e1a9372655"; +// bignum_from_string(&priv,str2, 128); +// char str3[] = "45462476f31c3dfde5ac5fde4862d33d917f52255d80555b543584a32b71762a1fc719a341c0e925e9fff02a657764ae78b143d324cfc8892695c55801237885"; +// bignum_from_string(&v2Dec,str3,128); +// char r2ms[] = "47395beb0ae85106f9f8548040a9b165d9a37499d0d98a14a5bcd0b943d0549be18b2ced65bfc42db40331f3ec67faf9cccf19e51d3ef7a09e03ebb1855d5e5e"; +// bignum_from_string(&r2m,r2ms,128); +// +// nBits = bignum_numbits(&n512); +// dBits = bignum_numbits(&priv); +// +// bignum_init(&result); +// +// clock_t before1 = clock(); +// modExp(&v2Dec, &priv,dBits, &n512, nBits, &r2m, &result); +// clock_t after1 = clock(); +// +// double msec1 = (float)(after1 - before1) / CLOCKS_PER_SEC; +// +// //print result and timing +// printf("-------Test 2--------\n"); +// printf(" RSA Keysize: %4d [bits]\n",dBits); +// printf(" RSA Result: "); +// bignum_print(&result); +// printf("Time(no LUT): %.5f [sec]\n", msec1); +// +// bignum_init(&result); +// +// //genLUT(&v2Dec, &n512, nBits, &r2m); +// lutSeek = parseLUT(lutSeek,dBits+1); +// +// +// before1 = clock(); +// modExpLUT(&v2Dec, &priv,dBits, &n512, nBits, &r2m, &result); +// after1 = clock(); +// +// msec1 = (float)(after1 - before1) / CLOCKS_PER_SEC; +// +// //print result and timing +// printf(" RSA Result: "); +// bignum_print(&result); +// printf(" Time(LUT): %.5f [sec]\n", msec1); +// +// +// /* ----------- 1024-bit Modulus Test -------------- */ +// +// struct bn n1024, pub1, priv1, v2Dec1; +// +// bignum_from_int(&pub1,e1); +// char str4[] = "79eec1e33a41bf4592557bb1991b1830d4b445f55e3c9e683afc7a7f4abf05549a5e7ea811f8c3faf58450c2eafce1a25c5eb49821d0f930247ef2c6a6e426f01f91a6090292a433d84b93a1e6c5ba933c48f48923aa727f3de18c5fa4f1c0f7cce43cf407f94ee1d316d572b4428c7399158b76fa15f8b3dfbb36bd5f4bc5d1"; +// bignum_from_string(&n1024,str4, 256); +// char str5[] = "233c05371e4c85731b382c88438ffacb918b8e73bb099554d546c43728684ea805fbac69f0d78bfa671c17225c393b1269d2cc28f20cab1568566edd4cb8bd2f59e4b25f4b3787af54e002216bc42a34a2bdbd7bfe4ddab35dde5256fc7bfbc1b39f641c86e99950768214e69b18f806b0d200908484eb7cf6e817ab57400861"; +// bignum_from_string(&priv1,str5, 256); +// char str6[] = "4e29e645da6efddda068a8dcfceea970a5e86f7b518655cd3fba103d6899618a6b7caa86df16f28f7bdadbe2ad250794c9f20c9c42338624ab077f9f9ae3733a5c3bf8b4686b56cfe635be0010bf734fdc2a4f2ce5cf920fd4e79c6b7330a8fc2025e61d33dd8b3056390a2226d9d9eaec37f7aea1682f25120c260ecb165823"; +// bignum_from_string(&v2Dec1,str6,256); +// char r2ms1[] = "1a32ca1d9343f9ac08567501d91b0b29540e5e6914aaf46c460b92007b6264ca7a4be15e5346933dd2865022a2535729ea817c215f80714384b8235705b88bc3a295fe00ae789bd241d5816e5d617c362a2ed1bdd8b45ca26f558a987de829afe0253c33b6a7bab59c35429c29c4ab63a0ab16c7f8c4b9319f6f1947266522a5"; +// bignum_from_string(&r2m,r2ms1,256); +// +// dBits = bignum_numbits(&priv1); +// nBits = bignum_numbits(&n1024); +// +// bignum_init(&result); +// +// clock_t before2 = clock(); +// modExp(&v2Dec1, &priv1,dBits, &n1024, nBits, &r2m, &result); +// clock_t after2 = clock(); +// +// msec1 = (float)(after2 - before2) / CLOCKS_PER_SEC; +// +// //print result and timing +// printf("-------Test 3--------\n"); +// printf(" RSA Keysize: %5d [bits]\n",dBits); +// printf(" RSA Result: "); +// bignum_print(&result); +// printf("Time(no LUT): %.5f [sec]\n", msec1); +// bignum_init(&result); +// +// //genLUT(&v2Dec1, &n1024, nBits, &r2m); +// lutSeek = parseLUT(lutSeek,dBits+2); +// before2 = clock(); +// modExpLUT(&v2Dec1, &priv1,dBits, &n1024, nBits,&r2m, &result); +// after2 = clock(); +// +// msec1 = (float)(after2 - before2) / CLOCKS_PER_SEC; +// +// //print result and timing +// printf(" RSA Result: "); +// bignum_print(&result); +// printf(" Time(LUT): %.5f [sec]\n", msec1); +// +// +// /* ----------- 2048-bit Modulus Test -------------- */ +// +// struct bn n2048, pub2, priv2, v2Dec2; +// +// bignum_from_int(&pub2,e1); +// char str7[] = "bc07d529450214ef63a8d61966987e8ca0594d9a7ec4f1881117b4f8ecbdc74b8769f6c98bfe931c9474116be8bd36527acfd95f6633d12cc8a960ab3d3e7a0b4b3e4990b594ee61af3b56315337501225525fb997b65c38118d614601dcb8bd631673a510498f2c3dab44d723d8b6daa697d0108e7fcb4d27525f386e7fcd9ce29c4ab12c4258aa77872259a25804791a1eaef54b65226ec84765442ac839db30467d86910e700d802807de1f4fef5235738d66359cb0a2707cb9cd90e90bb1f2d0d807aafbd048b1ddbb156d4984cfbbaa9a435b9230d213140dd5be64b5e594945d474665eaf5267fc598a5f75b99f83b029971b80c4149891d43abe62b95"; +// bignum_from_string(&n2048,str7, 512); +// char str8[] = "9b78ea83264133684182400d5eaca6aec68330cc97176712f7f71f3758210f61df44f9beead78372753987922f2e0c75a480aa1edc95e9d65ad0da529ce044ef83b6ac03507125ae75c2dd61098ac9d54730d65fd21702278633dd8392549c18548f22ee100a92aca50d316da68131a897691dac22f77df57c96fa8ee1a7212db313396410a5c9c8a31f6f940724cff2b2db5eb078eedad92b6ff29a8636fcd370e99773e96168f34839693f84b7a083597bfbe0f674c79b2348b038ca730cada30bcf2dd9cde27dd555891d3cc10b7831b23e7cda163570635727f11d569492a201f55c56d9a92d46f71b6ecea30f28f8c040f834a2da43f72a1ec927df9441"; +// bignum_from_string(&priv2,str8, 512); +// char str9[] = "4bd1a139b2ae5bcae58410ae32ce65e41ef226d30bb2d020e1cb02387f13985d4f18d154444954a4831a26870d1671f54f3ff87efe3adc66cac098160a524674740948d6ba8466054cce1d27018bbd5dd9c4b58def7d62cf8c0d6621ad846324b72a0414c56843075c4199c5963f55977ea6437a501afe3eebf150b1e2cbbbcde4be89e1ce8e72f8a334297224418e29ad44882a99ba59c5eb481b5faeeebd423b5bdab6c7edb288e8ab42f01a18cef3521c3cbcad8fde05e2f189070725c13716112b7497bb27250fe4141b41de67e0b0fe1763e0831ecf692ad1ff18e5f0186a6e7729ec84e7b9e2483838be73cbe4fea67fa186b329bfd1434dad528524a4"; +// bignum_from_string(&v2Dec2,str9,512); +// char r2ms2[]= "10e2f70a5f5ea34371cd7f6d36ce95604746f2aa503bc45369201212a4006df4433827b085890ed3a614058df7af4caa9a988bb5cbc49179e0a4e76b046926b3f700532e1ed1d191985176c2cd7f9600f45eb96323d975060c44f06ef3bdaa220957e7905c5641276e7752e7e503f930cb49a4abe90cae46270a41e17964206bd6edaa7943b32237d2bfa4063060b388424944ec21c7c2f3bc29554214dee86c848116fc1fd28b60b0b438aa8bc8303c0788ea216bace026f78c09aa10b139a5ee415aa73888ac15157ab9a355eda90b7838e8cddb44a626d1c17a203eaf3c64be524f077df6892984a7198b9c3ba31228bb49259162572747ec51e5b49849ae"; +// bignum_from_string(&r2m,r2ms2,512); +// +// dBits = bignum_numbits(&priv2); +// nBits = bignum_numbits(&n2048); +// +// bignum_init(&result); +// +// clock_t before3 = clock(); +// modExp(&v2Dec2, &priv2,dBits, &n2048, nBits, &r2m, &result); +// clock_t after3 = clock(); +// +// msec1 = (float)(after3 - before3) / CLOCKS_PER_SEC; +// +// //print result and timing +// printf("-------Test 4--------\n"); +// printf(" RSA Keysize: %6d [bits]\n",dBits); +// printf(" RSA Result: "); +// bignum_print(&result); +// printf("Time(no LUT): %.5f [sec]\n", msec1); +// bignum_init(&result); +// +// //genLUT(&v2Dec2, &n2048, nBits, &r2m); +// parseLUT(lutSeek, dBits+1); +// before3 = clock(); +// modExpLUT(&v2Dec2, &priv2,dBits, &n2048, nBits, &r2m, &result); +// after3 = clock(); +// +// msec1 = (float)(after3 - before3) / CLOCKS_PER_SEC; +// +// //print result and timing +// printf(" RSA Result: "); +// bignum_print(&result); +// printf(" Time(LUT): %.5f [sec]\n", msec1); +// +// +// +// return 0; +//} +// +// +// + + + + +int main(int argc, char** argv) +{ + + //opencl_tests(); + + rsa_tests(); + + // montgomery_test(); + + return 0; +} + + + +// +// +//#define NEED_LIBGCRYPT_VERSION "1.10.2" +// +//int main(int argc, char** argv) +//{ +// +// gcry_control (GCRYCTL_DISABLE_HWF, "intel-cpu", NULL); +// +// /* Version check should be the very first call because it +// makes sure that important subsystems are initialized. +// #define NEED_LIBGCRYPT_VERSION to the minimum required version. */ +// if (!gcry_check_version (NEED_LIBGCRYPT_VERSION)) +// { +// fprintf (stderr, "libgcrypt is too old (need %s, have %s)\n", +// NEED_LIBGCRYPT_VERSION, gcry_check_version (NULL)); +// exit (2); } +// /* Disable secure memory. */ +// gcry_control (GCRYCTL_DISABLE_SECMEM, 0); +// /* ... If required, other initialization goes here. */ +// /* Tell Libgcrypt that initialization has completed. */ +// gcry_control (GCRYCTL_INITIALIZATION_FINISHED, 0); +// +// +// char *template = "(genkey(rsa(nbits 4:2048)))"; +// gcry_sexp_t parms; +// +// gcry_sexp_new(&parms, template, strlen(template), 1); +// +// // contains key material +// gcry_sexp_t key; +// +// gcry_pk_genkey(&key,parms); +// +// // show_sexp("\n", key); +// +// // create a large number, we want to encrypt it +// +// +// char *val = "1234567890ABCDEF"; +// gcry_mpi_t mpi = gcry_mpi_new((int)strlen(val) * 8); +// size_t scanned = 0; +// +// gcry_mpi_scan(&mpi, GCRYMPI_FMT_HEX, val, 0, &scanned); +// +// gcry_sexp_t toSign; +// size_t errOff = 0; +// char *dataformat = "(data (flags raw) (value %m))"; +// +// gcry_sexp_build(&toSign,&errOff,dataformat,mpi); +// +// // show_sexp("\n", toSign); +// +// // MARK: RSA verification +// +// // use gcry_rsa_sign – without padding? +// gcry_sexp_t resSign; +// +// // clock_t start_0 = clock(); +// /*Do something*/ +// +// gcry_pk_sign(&resSign, toSign, key); +// +// // clock_t end_0 = clock(); +// // float seconds_0 = (float)(end_0 - start_0) / CLOCKS_PER_SEC; +// +// // printf("\nSign 0 (gcrpypt): %f sec\n", seconds_0); +// +// +// +// // show_sexp("\n", resSign); +// +// // measure time +// +// // do the same thing with our bigNum library – do we even get the same signature? +// +// // these must be freed manually +// gcry_mpi_t n_mpi; +// gcry_mpi_t e_mpi; +// gcry_mpi_t d_mpi; +// +// gcry_sexp_extract_param(key,NULL,"n e d",&n_mpi, &e_mpi, &d_mpi, NULL); +// +// gcry_mpi_t sig_mpi; +// +// gcry_sexp_extract_param(resSign,NULL,"s",&sig_mpi, NULL); +// +// +// // may be a lot shorter – these will contain the numbers in HEX string form – for use in my bigNum +// unsigned char *n = malloc(2048); +// unsigned char *e = malloc(2048); +// unsigned char *d = malloc(2048); +// size_t nL = 0; +// +// // check returns +// gcry_mpi_print(GCRYMPI_FMT_HEX,n,2048,&nL,n_mpi); +// gcry_mpi_print(GCRYMPI_FMT_HEX,e,2048,&nL,e_mpi); +// gcry_mpi_print(GCRYMPI_FMT_HEX,d,2048,&nL,d_mpi); +// +// unsigned char *sgn = malloc(2048); +// gcry_mpi_print(GCRYMPI_FMT_HEX,sgn,2048,&nL,sig_mpi); +// +// +// test(e,d,n); +// +// +// return 0; +//} +// +// +// +// diff --git a/source/opencl-test.c b/source/opencl-test.c @@ -0,0 +1,211 @@ +// +// opencl-test.c +// hello +// +// Created by Cedric Zwahlen on 28.09.2023. +// + +#include "opencl-test.h" + +#define DATA_SIZE (1024) + +int opencl_tests(void) { + + int err; // error code returned from api calls + + float data[DATA_SIZE]; // original data set given to device + float results[DATA_SIZE]; // results returned from device + unsigned int correct; // number of correct results returned + + size_t global; // global domain size for our calculation + size_t local; // local domain size for our calculation + + cl_device_id device_id; // compute device id + cl_context context; // compute context + cl_command_queue commands; // compute command queue + cl_program program; // compute program + cl_kernel kernel; // compute kernel + + cl_mem input; // device memory used for the input array + cl_mem output; // device memory used for the output array + + // Fill our data set with random float values + // + int i = 0; + unsigned int count = DATA_SIZE; + for(i = 0; i < count; i++) + data[i] = rand() / (float)RAND_MAX; + + // Connect to a compute device + // + int gpu = 1; + err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL); + if (err != CL_SUCCESS) + { + printf("Error: Failed to create a device group!\n"); + return EXIT_FAILURE; + } + + size_t retSize = 0; + clGetDeviceInfo(device_id, CL_DRIVER_VERSION, 0, NULL, &retSize); + + char driver_version[retSize]; + clGetDeviceInfo(device_id, CL_DRIVER_VERSION, retSize, &driver_version, &retSize); + + //char *driver_version; + //clGetDeviceInfo(device_id, CL_DRIVER_VERSION, NULL, &driver_version, NULL); + printf("%s\n", driver_version); + + + // Create a compute context + // + context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); + if (!context) + { + printf("Error: Failed to create a compute context!\n"); + return EXIT_FAILURE; + } + + // Create a command commands + // + commands = clCreateCommandQueue(context, device_id, 0, &err); + if (!commands) + { + printf("Error: Failed to create a command commands!\n"); + return EXIT_FAILURE; + } + + // get the kernel from a file instead of a constant + + + FILE *fp = fopen("rsa-kernel.cl", "r"); + fseek(fp, 0L, SEEK_END); + size_t sz = ftell(fp); + rewind(fp); + + char *kernelBuf = malloc(sz); + fread(kernelBuf, sizeof(char), sz, fp); + fclose(fp); + + // Create the compute program from the source buffer + // + //program = clCreateProgramWithSource(context, 1, (const char **) & KernelSource, NULL, &err); + program = clCreateProgramWithSource(context, 1, (const char **) & kernelBuf, NULL, &err); + if (!program) + { + printf("Error: Failed to create compute program!\n"); + return EXIT_FAILURE; + } + + // Build the program executable + // + err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); + if (err != CL_SUCCESS) + { + size_t len; + char buffer[2048]; + + printf("Error: Failed to build program executable!\n"); + clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); + printf("%s\n", buffer); + exit(1); + } + + // Create the compute kernel in the program we wish to run + // + kernel = clCreateKernel(program, "square", &err); + if (!kernel || err != CL_SUCCESS) + { + printf("Error: Failed to create compute kernel!\n"); + exit(1); + } + + // Create the input and output arrays in device memory for our calculation + // + input = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * count, NULL, NULL); + output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * count, NULL, NULL); + if (!input || !output) + { + printf("Error: Failed to allocate device memory!\n"); + exit(1); + } + + // Write our data set into the input array in device memory + // + err = clEnqueueWriteBuffer(commands, input, CL_TRUE, 0, sizeof(float) * count, data, 0, NULL, NULL); + if (err != CL_SUCCESS) + { + printf("Error: Failed to write to source array!\n"); + exit(1); + } + + // Set the arguments to our compute kernel + // + err = 0; + err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input); + err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output); + err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count); + if (err != CL_SUCCESS) + { + printf("Error: Failed to set kernel arguments! %d\n", err); + exit(1); + } + + // Get the maximum work group size for executing the kernel on the device + // + err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL); + if (err != CL_SUCCESS) + { + printf("Error: Failed to retrieve kernel work group info! %d\n", err); + exit(1); + } + + // Execute the kernel over the entire range of our 1d input data set + // using the maximum number of work group items for this device + // + global = count; + err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, NULL); + if (err) + { + printf("Error: Failed to execute kernel!\n"); + return EXIT_FAILURE; + } + + // Wait for the command commands to get serviced before reading back results + // + clFinish(commands); + + // Read back the results from the device to verify the output + // + err = clEnqueueReadBuffer( commands, output, CL_TRUE, 0, sizeof(float) * count, results, 0, NULL, NULL ); + if (err != CL_SUCCESS) + { + printf("Error: Failed to read output array! %d\n", err); + exit(1); + } + + // Validate our results + // + correct = 0; + for(i = 0; i < count; i++) + { + if(results[i] == data[i] * data[i]) + correct++; + } + + + // Print a brief summary detailing the results + // + printf("Computed '%d/%d' correct values!\n", correct, count); + + // Shutdown and cleanup + // + clReleaseMemObject(input); + clReleaseMemObject(output); + clReleaseProgram(program); + clReleaseKernel(kernel); + clReleaseCommandQueue(commands); + clReleaseContext(context); + + +} diff --git a/source/opencl-test.h b/source/opencl-test.h @@ -0,0 +1,25 @@ +// +// opencl-test.h +// hello +// +// Created by Cedric Zwahlen on 28.09.2023. +// + +#ifndef opencl_test_h +#define opencl_test_h + +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <OpenCL/opencl.h> + +#include "big-int-test.h" + +int opencl_tests(void); + +#endif /* opencl_test_h */ diff --git a/source/rsa-test.c b/source/rsa-test.c @@ -0,0 +1,276 @@ +// +// rsa-test.c +// hello +// +// Created by Cedric Zwahlen on 28.09.2023. +// + +#include "rsa-test.h" +#include "big-int-test.h" + + +#include "ctype.h" +#include "time.h" + + + +#define NEED_LIBGCRYPT_VERSION "1.10.2" + + + +void rsa_tests(void) { + + // MARK: UNSAFE init + + // consider disabling optimizations, since they dont make for a fair comparison + + gcry_control (GCRYCTL_DISABLE_HWF, "intel-cpu", NULL); + + /* Version check should be the very first call because it + makes sure that important subsystems are initialized. + #define NEED_LIBGCRYPT_VERSION to the minimum required version. */ + if (!gcry_check_version (NEED_LIBGCRYPT_VERSION)) + { + fprintf (stderr, "libgcrypt is too old (need %s, have %s)\n", + NEED_LIBGCRYPT_VERSION, gcry_check_version (NULL)); + exit (2); } + /* Disable secure memory. */ + gcry_control (GCRYCTL_DISABLE_SECMEM, 0); + /* ... If required, other initialization goes here. */ + /* Tell Libgcrypt that initialization has completed. */ + gcry_control (GCRYCTL_INITIALIZATION_FINISHED, 0); + + + char *template = "(genkey(rsa(nbits 4:2048)))"; + gcry_sexp_t parms; + + gcry_sexp_new(&parms, template, strlen(template), 1); + + // contains key material + gcry_sexp_t key; + + gcry_pk_genkey(&key,parms); + + // show_sexp("\n", key); + + // create a large number, we want to encrypt it + + + char *val = "1234567890ABCDEF1234567890969"; + gcry_mpi_t mpi = gcry_mpi_new((int)strlen(val) * 8); + size_t scanned = 0; + + gcry_mpi_scan(&mpi, GCRYMPI_FMT_HEX, val, 0, &scanned); + + gcry_sexp_t toSign; + size_t errOff = 0; + char *dataformat = "(data (flags raw) (value %m))"; + + gcry_sexp_build(&toSign,&errOff,dataformat,mpi); + + // show_sexp("\n", toSign); + + // MARK: RSA verification + + // use gcry_rsa_sign – without padding? + gcry_sexp_t resSign; + + + + gcry_pk_sign(&resSign, toSign, key); + + + // show_sexp("\n", resSign); + + // measure time + + // do the same thing with our bigNum library – do we even get the same signature? + + // these must be freed manually + gcry_mpi_t n_mpi; + gcry_mpi_t e_mpi; + gcry_mpi_t d_mpi; + + gcry_sexp_extract_param(key,NULL,"n e d",&n_mpi, &e_mpi, &d_mpi, NULL); + + gcry_mpi_t sig_mpi; + + gcry_sexp_extract_param(resSign,NULL,"s",&sig_mpi, NULL); + + + // may be a lot shorter – these will contain the numbers in HEX string form – for use in my bigNum + unsigned char *n = malloc(2048); + unsigned char *e = malloc(2048); + unsigned char *d = malloc(2048); + size_t nL = 0; + + // check returns + gcry_mpi_print(GCRYMPI_FMT_HEX,n,2048,&nL,n_mpi); + gcry_mpi_print(GCRYMPI_FMT_HEX,e,2048,&nL,e_mpi); + gcry_mpi_print(GCRYMPI_FMT_HEX,d,2048,&nL,d_mpi); + + unsigned char *sgn = malloc(2048); + gcry_mpi_print(GCRYMPI_FMT_HEX,sgn,2048,&nL,sig_mpi); + + // printf("%s",n); + + // use 'real time' – not clock time + + + + bigNum_tests(n, val, d); + + + struct timespec t1, t2; + + clock_gettime(CLOCK_REALTIME, &t1); + + + + if (verify(sgn, e, n, val)) { + + printf("\nverification failed\n"); + + } else { + + printf("\nverification successful\n"); + + } + + clock_gettime(CLOCK_REALTIME, &t2); + + + float seconds_1 = (t2.tv_nsec - t1.tv_nsec) / 1000; + + printf("\nSign 1 (my algorithm): %f micro seconds\n", seconds_1); + + + clock_gettime(CLOCK_REALTIME, &t1); + // maybe we want to do more here + if (gcry_pk_verify(resSign, toSign, key)) { + + printf("\nverification failed\n"); + + } else { + + printf("\nverification successful\n"); + + } + + clock_gettime(CLOCK_REALTIME, &t2); + float seconds_0 = (t2.tv_nsec - t1.tv_nsec) / 1000; + printf("\nSign 0 (gcrpypt): %f micro seconds\n", seconds_0); + + // try to put it onto the gpu + + return; + +} + + +//void bigNum_tests(void) { +void bigNum_tests(unsigned char* nn,unsigned char* ee,unsigned char* dd) { + + + // MARK: BIG NUM TESTs + + DIGIT_T N [MAX_ALLOC_SIZE*2]; + DIGIT_T e [MAX_ALLOC_SIZE*2]; + DIGIT_T d [MAX_ALLOC_SIZE*2]; + + DIGIT_T res [MAX_ALLOC_SIZE*2]; + + mpSetZero(N, MAX_ALLOC_SIZE*2); + mpSetZero(e, MAX_ALLOC_SIZE*2); + mpSetZero(d, MAX_ALLOC_SIZE*2); + + mpSetZero(res, MAX_ALLOC_SIZE*2); + /* + + char* nn = "E08973398DD8F5F5E88776397F4EB005BB5383DE0FB7ABDC7DC775290D052E6D12DFA68626D4D26FAA5829FC97ECFA82510F3080BEB1509E4644F12CBBD832CFC6686F07D9B060ACBEEE34096A13F5F7050593DF5EBA3556D961FF197FC981E6F86CEA874070EFAC6D2C749F2DFA553AB9997702A648528C4EF357385774575F"; + + char* ee = "010001"; + + char* dd = "A403C327477634346CA686B57949014B2E8AD2C862B2C7D748096A8B91F736F275D6E8CD15906027314735644D95CD6763CEB49F56AC2F376E1CEE0EBF282DF439906F34D86E085BD5656AD841F313D72D395EFE33CBFF29E4030B3D05A28FB7F18EA27637B07957D32F2BDE8706227D04665EC91BAF8B1AC3EC9144AB7F21"; + */ + mpConvFromHex(N, strlen(nn), nn); + mpConvFromHex(e, strlen(ee), ee); + mpConvFromHex(d, strlen(dd), dd); + + size_t sz_n = mpSizeof(N, MAX_ALLOC_SIZE*2); + size_t sz_d = mpSizeof(d, MAX_ALLOC_SIZE*2); + + //mpModMult(res, e, d, N, max(sz_d,sz_n)); // that works :) + + mpModExpO(res, e, d, N, max(sz_d,sz_n)); + + size_t sz_res = mpSizeof(res, MAX_ALLOC_SIZE*2); + + char* pref = "\nHEX:\n"; + char* suf = "\n"; + // mpPrintHex(pref, res, sz_res, suf); + + + +} + +int verify(unsigned char* sign, unsigned char* ee, unsigned char* nn, unsigned char* mm) { + + + DIGIT_T N [MAX_ALLOC_SIZE*2]; + DIGIT_T e [MAX_ALLOC_SIZE*2]; + DIGIT_T s [MAX_ALLOC_SIZE*2]; + + DIGIT_T res [MAX_ALLOC_SIZE*2]; + + mpSetZero(N, MAX_ALLOC_SIZE*2); + mpSetZero(e, MAX_ALLOC_SIZE*2); + mpSetZero(s, MAX_ALLOC_SIZE*2); + + mpSetZero(res, MAX_ALLOC_SIZE*2); + + mpConvFromHex(N, strlen(nn), nn); + mpConvFromHex(e, strlen(ee), ee); + mpConvFromHex(s, strlen(sign), sign); + + size_t sz_n = mpSizeof(N, MAX_ALLOC_SIZE*2); + size_t sz_s = mpSizeof(s, MAX_ALLOC_SIZE*2); + + //mpModMult(res, e, d, N, max(sz_d,sz_n)); // that works :) + + mpModExpO(res, s, e, N, max(sz_s,sz_n)); + + size_t sz_res = mpSizeof(res, MAX_ALLOC_SIZE*2); + + int sz_mm = strlen(mm) + 2; + + unsigned char comp[sz_mm]; + + mpConvToHex(res, sz_res, comp, sz_mm); + + upper(comp); + + return strcmp(comp, mm); +} + +static void show_sexp(const char *prefix, gcry_sexp_t a) { + char *buf; + size_t size; + + if (prefix) + fputs(prefix, stderr); + size = gcry_sexp_sprint(a, GCRYSEXP_FMT_ADVANCED, NULL, 0); + buf = gcry_xmalloc(size); + + gcry_sexp_sprint(a, GCRYSEXP_FMT_ADVANCED, buf, size); + fprintf(stderr, "%.*s", (int) size, buf); + gcry_free(buf); + } + +static void upper(unsigned char* str) { + + for(int i = 0; str[i]; i++){ + str[i] = toupper(str[i]); + } + +} diff --git a/source/rsa-test.h b/source/rsa-test.h @@ -0,0 +1,27 @@ +// +// rsa-test.h +// hello +// +// Created by Cedric Zwahlen on 28.09.2023. +// + +#ifndef rsa_test_h +#define rsa_test_h + +#include <stdio.h> + +#include <gcrypt.h> + +void rsa_tests(void); + +static void show_sexp(const char *prefix, gcry_sexp_t a); + +void bigNum_tests(unsigned char* n, unsigned char* e, unsigned char* d); + +void montgomery_test(void); + +int verify(unsigned char* sign, unsigned char* ee, unsigned char* nn, unsigned char* mm); + +static void upper(unsigned char* str); + +#endif /* rsa_test_h */ diff --git a/xcode/.DS_Store b/xcode/.DS_Store Binary files differ. diff --git a/xcode/ReadMe.txt b/xcode/ReadMe.txt @@ -0,0 +1 @@ +### OpenCL Hello World Example ###===========================================================================DESCRIPTION:A simple "Hello World" compute example showing basic usage of OpenCL whichcalculates the mathematical square (X[i] = pow(X[i],2)) for a buffer offloating point values.For simplicity, this example is intended to be run from the command line.If run from within XCode, open the Run Log (Command-Shift-R) to see the output. Alternatively, run the applications from within a Terminal.app session to launch from the command line.===========================================================================BUILD REQUIREMENTS:Mac OS X v10.6 or later===========================================================================RUNTIME REQUIREMENTS:Mac OS X v10.6 or laterTo use the GPU as a compute device, use one of the following devices:- MacBook Pro w/NVidia GeForce 8600M - Mac Pro w/NVidia GeForce 8800GT===========================================================================PACKAGING LIST:ReadMe.txthello.chello.xcodeproj===========================================================================CHANGES FROM PREVIOUS VERSIONS:Version 1.0- First version.===========================================================================Copyright (C) 2008 Apple Inc. All rights reserved. +\ No newline at end of file diff --git a/xcode/lib-gpu-verify.xcodeproj/project.pbxproj b/xcode/lib-gpu-verify.xcodeproj/project.pbxproj @@ -0,0 +1,328 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 54; + objects = { + +/* Begin PBXBuildFile section */ + 6A8A795D2A89357400116D7D /* rsa-kernel.cl in Sources */ = {isa = PBXBuildFile; fileRef = 6A8A795C2A89357400116D7D /* rsa-kernel.cl */; }; + 6A8A795F2A89672700116D7D /* modexp.cl in Sources */ = {isa = PBXBuildFile; fileRef = 6A8A795E2A89672700116D7D /* modexp.cl */; }; + 6AF7487A2ADADEBD00D58E08 /* lib-gpu-verify.c in Sources */ = {isa = PBXBuildFile; fileRef = 6AF748792ADADEBD00D58E08 /* lib-gpu-verify.c */; }; + 6AF748822ADADF4500D58E08 /* big-int-test.c in Sources */ = {isa = PBXBuildFile; fileRef = 6AF7487D2ADADF4500D58E08 /* big-int-test.c */; }; + 6AF748832ADADF4500D58E08 /* rsa-test.c in Sources */ = {isa = PBXBuildFile; fileRef = 6AF7487F2ADADF4500D58E08 /* rsa-test.c */; }; + 6AF748862ADADFAD00D58E08 /* opencl-test.c in Sources */ = {isa = PBXBuildFile; fileRef = 6AF748852ADADFAD00D58E08 /* opencl-test.c */; }; + C3770EFD0E6F1138009A5A77 /* OpenCL.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = C3770EFC0E6F1138009A5A77 /* OpenCL.framework */; }; +/* End PBXBuildFile section */ + +/* Begin PBXCopyFilesBuildPhase section */ + C39444690DAFF5A0008FFE68 /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 2147483647; + dstPath = ""; + dstSubfolderSpec = 16; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXCopyFilesBuildPhase section */ + +/* Begin PBXFileReference section */ + 466E0F5F0C932E1A00ED01DB /* lib-gpu-verify */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "lib-gpu-verify"; sourceTree = BUILT_PRODUCTS_DIR; }; + 6A8A795C2A89357400116D7D /* rsa-kernel.cl */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.opencl; path = "rsa-kernel.cl"; sourceTree = "<group>"; }; + 6A8A795E2A89672700116D7D /* modexp.cl */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.opencl; path = modexp.cl; sourceTree = "<group>"; }; + 6AF748792ADADEBD00D58E08 /* lib-gpu-verify.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "lib-gpu-verify.c"; path = "../source/lib-gpu-verify.c"; sourceTree = "<group>"; }; + 6AF7487B2ADADF4500D58E08 /* big-int-test.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "big-int-test.h"; path = "../source/big-int-test.h"; sourceTree = "<group>"; }; + 6AF7487D2ADADF4500D58E08 /* big-int-test.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "big-int-test.c"; path = "../source/big-int-test.c"; sourceTree = "<group>"; }; + 6AF7487F2ADADF4500D58E08 /* rsa-test.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "rsa-test.c"; path = "../source/rsa-test.c"; sourceTree = "<group>"; }; + 6AF748802ADADF4500D58E08 /* rsa-test.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "rsa-test.h"; path = "../source/rsa-test.h"; sourceTree = "<group>"; }; + 6AF748842ADADFAD00D58E08 /* opencl-test.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "opencl-test.h"; path = "../source/opencl-test.h"; sourceTree = "<group>"; }; + 6AF748852ADADFAD00D58E08 /* opencl-test.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "opencl-test.c"; path = "../source/opencl-test.c"; sourceTree = "<group>"; }; + C3770EFC0E6F1138009A5A77 /* OpenCL.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = OpenCL.framework; path = /System/Library/Frameworks/OpenCL.framework; sourceTree = "<absolute>"; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 466E0F5D0C932E1A00ED01DB /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + C3770EFD0E6F1138009A5A77 /* OpenCL.framework in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 466E0F490C93291B00ED01DB = { + isa = PBXGroup; + children = ( + C3770EF30E6F10CF009A5A77 /* Frameworks */, + C3770EF10E6F10BB009A5A77 /* Sources */, + 466E0F600C932E1A00ED01DB /* Products */, + ); + sourceTree = "<group>"; + }; + 466E0F600C932E1A00ED01DB /* Products */ = { + isa = PBXGroup; + children = ( + 466E0F5F0C932E1A00ED01DB /* lib-gpu-verify */, + ); + name = Products; + sourceTree = "<group>"; + }; + 6A984F162AC5B18A00F530FD /* Headers */ = { + isa = PBXGroup; + children = ( + 6AF748802ADADF4500D58E08 /* rsa-test.h */, + 6AF7487B2ADADF4500D58E08 /* big-int-test.h */, + 6AF748842ADADFAD00D58E08 /* opencl-test.h */, + ); + name = Headers; + sourceTree = "<group>"; + }; + C3770EF10E6F10BB009A5A77 /* Sources */ = { + isa = PBXGroup; + children = ( + 6A984F162AC5B18A00F530FD /* Headers */, + 6A8A795C2A89357400116D7D /* rsa-kernel.cl */, + 6A8A795E2A89672700116D7D /* modexp.cl */, + 6AF748792ADADEBD00D58E08 /* lib-gpu-verify.c */, + 6AF7487D2ADADF4500D58E08 /* big-int-test.c */, + 6AF7487F2ADADF4500D58E08 /* rsa-test.c */, + 6AF748852ADADFAD00D58E08 /* opencl-test.c */, + ); + name = Sources; + sourceTree = "<group>"; + }; + C3770EF30E6F10CF009A5A77 /* Frameworks */ = { + isa = PBXGroup; + children = ( + C3770EFC0E6F1138009A5A77 /* OpenCL.framework */, + ); + name = Frameworks; + sourceTree = "<group>"; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + 466E0F5E0C932E1A00ED01DB /* lib-gpu-verify */ = { + isa = PBXNativeTarget; + buildConfigurationList = 466E0F640C932E1A00ED01DB /* Build configuration list for PBXNativeTarget "lib-gpu-verify" */; + buildPhases = ( + 466E0F5C0C932E1A00ED01DB /* Sources */, + 466E0F5D0C932E1A00ED01DB /* Frameworks */, + C39444690DAFF5A0008FFE68 /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = "lib-gpu-verify"; + productName = hello; + productReference = 466E0F5F0C932E1A00ED01DB /* lib-gpu-verify */; + productType = "com.apple.product-type.tool"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 466E0F4B0C93291B00ED01DB /* Project object */ = { + isa = PBXProject; + attributes = { + BuildIndependentTargetsInParallel = YES; + LastUpgradeCheck = 1500; + }; + buildConfigurationList = 466E0F4E0C93291B00ED01DB /* Build configuration list for PBXProject "lib-gpu-verify" */; + compatibilityVersion = "Xcode 2.4"; + developmentRegion = English; + hasScannedForEncodings = 0; + knownRegions = ( + English, + Japanese, + French, + German, + ); + mainGroup = 466E0F490C93291B00ED01DB; + productRefGroup = 466E0F600C932E1A00ED01DB /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 466E0F5E0C932E1A00ED01DB /* lib-gpu-verify */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXSourcesBuildPhase section */ + 466E0F5C0C932E1A00ED01DB /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 6AF7487A2ADADEBD00D58E08 /* lib-gpu-verify.c in Sources */, + 6A8A795D2A89357400116D7D /* rsa-kernel.cl in Sources */, + 6A8A795F2A89672700116D7D /* modexp.cl in Sources */, + 6AF748832ADADF4500D58E08 /* rsa-test.c in Sources */, + 6AF748862ADADFAD00D58E08 /* opencl-test.c in Sources */, + 6AF748822ADADF4500D58E08 /* big-int-test.c in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + 466E0F4C0C93291B00ED01DB /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEAD_CODE_STRIPPING = YES; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + ENABLE_USER_SCRIPT_SANDBOXING = YES; + GCC_NO_COMMON_BLOCKS = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + HEADER_SEARCH_PATHS = ../../inc; + LIBRARY_SEARCH_PATHS = ../../lib; + ONLY_ACTIVE_ARCH = YES; + }; + name = Debug; + }; + 466E0F4D0C93291B00ED01DB /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = YES; + DEAD_CODE_STRIPPING = YES; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_USER_SCRIPT_SANDBOXING = YES; + GCC_NO_COMMON_BLOCKS = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + HEADER_SEARCH_PATHS = ../../inc; + LIBRARY_SEARCH_PATHS = ../../lib; + }; + name = Release; + }; + 466E0F620C932E1A00ED01DB /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + CLANG_ENABLE_OBJC_WEAK = YES; + COPY_PHASE_STRIP = NO; + DEAD_CODE_STRIPPING = YES; + GCC_DYNAMIC_NO_PIC = NO; + GCC_MODEL_TUNING = G5; + GCC_OPTIMIZATION_LEVEL = 0; + HEADER_SEARCH_PATHS = ( + ../../inc, + /usr/local/include, + ); + INSTALL_PATH = /usr/local/bin; + LIBRARY_SEARCH_PATHS = ( + "$(inherited)", + "$(LIBRARY_SEARCH_PATHS_QUOTED_FOR_TARGET_1)", + ); + LIBRARY_SEARCH_PATHS_QUOTED_FOR_TARGET_1 = "\"$(SRCROOT)/../lib\" \"/usr/local/lib\""; + MACOSX_DEPLOYMENT_TARGET = 13.5; + OTHER_LDFLAGS = "-lgcrypt"; + PRODUCT_NAME = "lib-gpu-verify"; + SYSTEM_FRAMEWORK_SEARCH_PATHS = ""; + USE_HEADERMAP = NO; + ZERO_LINK = YES; + }; + name = Debug; + }; + 466E0F630C932E1A00ED01DB /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + CLANG_ENABLE_OBJC_WEAK = YES; + COPY_PHASE_STRIP = YES; + DEAD_CODE_STRIPPING = YES; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + GCC_MODEL_TUNING = G5; + HEADER_SEARCH_PATHS = ( + ../../inc, + /usr/local/include, + ); + INSTALL_PATH = /usr/local/bin; + LIBRARY_SEARCH_PATHS = ( + "$(inherited)", + "$(LIBRARY_SEARCH_PATHS_QUOTED_FOR_TARGET_1)", + ); + LIBRARY_SEARCH_PATHS_QUOTED_FOR_TARGET_1 = "\"$(SRCROOT)/../lib\" \"/usr/local/lib\""; + MACOSX_DEPLOYMENT_TARGET = 13.5; + OTHER_LDFLAGS = "-lgcrypt"; + PRODUCT_NAME = "lib-gpu-verify"; + USE_HEADERMAP = NO; + ZERO_LINK = NO; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 466E0F4E0C93291B00ED01DB /* Build configuration list for PBXProject "lib-gpu-verify" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 466E0F4C0C93291B00ED01DB /* Debug */, + 466E0F4D0C93291B00ED01DB /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 466E0F640C932E1A00ED01DB /* Build configuration list for PBXNativeTarget "lib-gpu-verify" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 466E0F620C932E1A00ED01DB /* Debug */, + 466E0F630C932E1A00ED01DB /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 466E0F4B0C93291B00ED01DB /* Project object */; +} diff --git a/xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/contents.xcworkspacedata @@ -0,0 +1,7 @@ +<?xml version="1.0" encoding="UTF-8"?> +<Workspace + version = "1.0"> + <FileRef + location = "self:/Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/lib-gpu-verify.xcodeproj"> + </FileRef> +</Workspace> diff --git a/xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist @@ -0,0 +1,8 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> +<plist version="1.0"> +<dict> + <key>IDEDidComputeMac32BitWarning</key> + <true/> +</dict> +</plist> diff --git a/xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcuserdata/cedriczwahlen.xcuserdatad/UserInterfaceState.xcuserstate b/xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcuserdata/cedriczwahlen.xcuserdatad/UserInterfaceState.xcuserstate Binary files differ. diff --git a/xcode/lib-gpu-verify.xcodeproj/xcshareddata/xcschemes/lib-gpu-verify.xcscheme b/xcode/lib-gpu-verify.xcodeproj/xcshareddata/xcschemes/lib-gpu-verify.xcscheme @@ -0,0 +1,79 @@ +<?xml version="1.0" encoding="UTF-8"?> +<Scheme + LastUpgradeVersion = "1500" + version = "1.7"> + <BuildAction + parallelizeBuildables = "YES" + buildImplicitDependencies = "YES"> + <BuildActionEntries> + <BuildActionEntry + buildForTesting = "YES" + buildForRunning = "YES" + buildForProfiling = "YES" + buildForArchiving = "YES" + buildForAnalyzing = "YES"> + <BuildableReference + BuildableIdentifier = "primary" + BlueprintIdentifier = "466E0F5E0C932E1A00ED01DB" + BuildableName = "lib-gpu-verify" + BlueprintName = "lib-gpu-verify" + ReferencedContainer = "container:lib-gpu-verify.xcodeproj"> + </BuildableReference> + </BuildActionEntry> + </BuildActionEntries> + </BuildAction> + <TestAction + buildConfiguration = "Debug" + selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB" + selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB" + shouldUseLaunchSchemeArgsEnv = "YES" + shouldAutocreateTestPlan = "YES"> + </TestAction> + <LaunchAction + buildConfiguration = "Debug" + selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB" + selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB" + launchStyle = "0" + useCustomWorkingDirectory = "YES" + customWorkingDirectory = "/Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example" + ignoresPersistentStateOnLaunch = "NO" + debugDocumentVersioning = "YES" + debugServiceExtension = "internal" + allowLocationSimulation = "YES" + viewDebuggingEnabled = "No"> + <BuildableProductRunnable + runnableDebuggingMode = "0"> + <BuildableReference + BuildableIdentifier = "primary" + BlueprintIdentifier = "466E0F5E0C932E1A00ED01DB" + BuildableName = "lib-gpu-verify" + BlueprintName = "lib-gpu-verify" + ReferencedContainer = "container:lib-gpu-verify.xcodeproj"> + </BuildableReference> + </BuildableProductRunnable> + </LaunchAction> + <ProfileAction + buildConfiguration = "Release" + shouldUseLaunchSchemeArgsEnv = "YES" + savedToolIdentifier = "" + useCustomWorkingDirectory = "NO" + debugDocumentVersioning = "YES"> + <BuildableProductRunnable + runnableDebuggingMode = "0"> + <BuildableReference + BuildableIdentifier = "primary" + BlueprintIdentifier = "466E0F5E0C932E1A00ED01DB" + BuildableName = "lib-gpu-verify" + BlueprintName = "lib-gpu-verify" + ReferencedContainer = "container:lib-gpu-verify.xcodeproj"> + </BuildableReference> + </BuildableProductRunnable> + </ProfileAction> + <AnalyzeAction + buildConfiguration = "Debug"> + </AnalyzeAction> + <ArchiveAction + buildConfiguration = "Release" + revealArchiveInOrganizer = "YES"> + </ArchiveAction> +</Scheme> diff --git a/xcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist b/xcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist @@ -0,0 +1,684 @@ +<?xml version="1.0" encoding="UTF-8"?> +<Bucket + uuid = "6398D026-4CA4-45B6-AB2F-69FCDCABE312" + type = "1" + version = "2.0"> + <Breakpoints> + <BreakpointProxy + BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> + <BreakpointContent + uuid = "B1F33327-CFAB-4924-80EE-C04139FE2BA8" + shouldBeEnabled = "No" + ignoreCount = "0" + continueAfterRunningActions = "No" + filePath = "hello.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "474" + endingLineNumber = "474" + landmarkName = "main(argc, argv)" + landmarkType = "9"> + <Locations> + <Location + uuid = "B1F33327-CFAB-4924-80EE-C04139FE2BA8 - 221db7784ae30d96" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "main" + moduleName = "hello" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/hello.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "339" + endingLineNumber = "339" + offsetFromSymbolStart = "2771"> + </Location> + <Location + uuid = "B1F33327-CFAB-4924-80EE-C04139FE2BA8 - 221db7784ae30d96" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "main" + moduleName = "hello" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/hello.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "339" + endingLineNumber = "339" + offsetFromSymbolStart = "2757"> + </Location> + </Locations> + </BreakpointContent> + </BreakpointProxy> + <BreakpointProxy + BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> + <BreakpointContent + uuid = "ABCAF350-AA73-4A6C-8638-AE4D8D9AF298" + shouldBeEnabled = "No" + ignoreCount = "0" + continueAfterRunningActions = "No" + filePath = "rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "65" + endingLineNumber = "65" + landmarkName = "rsa_tests()" + landmarkType = "9"> + <Locations> + <Location + uuid = "ABCAF350-AA73-4A6C-8638-AE4D8D9AF298 - f37c0f4c30700625" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "rsa_tests" + moduleName = "hello" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "56" + endingLineNumber = "56" + offsetFromSymbolStart = "246"> + </Location> + <Location + uuid = "ABCAF350-AA73-4A6C-8638-AE4D8D9AF298 - f37c0f4c307007c6" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "rsa_tests" + moduleName = "hello" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "57" + endingLineNumber = "57" + offsetFromSymbolStart = "270"> + </Location> + </Locations> + </BreakpointContent> + </BreakpointProxy> + <BreakpointProxy + BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> + <BreakpointContent + uuid = "EEB8911E-E203-4453-B41E-78BD7D8FC6A9" + shouldBeEnabled = "No" + ignoreCount = "0" + continueAfterRunningActions = "No" + filePath = "rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "173" + endingLineNumber = "173" + landmarkName = "rsa_tests()" + landmarkType = "9"> + <Locations> + <Location + uuid = "EEB8911E-E203-4453-B41E-78BD7D8FC6A9 - 706f0ea9b024a22d" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "bigNum_tests" + moduleName = "hello" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "111" + endingLineNumber = "111" + offsetFromSymbolStart = "31"> + </Location> + <Location + uuid = "EEB8911E-E203-4453-B41E-78BD7D8FC6A9 - f37c0f4c30700c50" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "rsa_tests" + moduleName = "hello" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "99" + endingLineNumber = "99" + offsetFromSymbolStart = "584"> + </Location> + <Location + uuid = "EEB8911E-E203-4453-B41E-78BD7D8FC6A9 - f37c0f4c30700d48" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "rsa_tests" + moduleName = "hello" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "107" + endingLineNumber = "107" + offsetFromSymbolStart = "601"> + </Location> + <Location + uuid = "EEB8911E-E203-4453-B41E-78BD7D8FC6A9 - f37c0f4c30700d69" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "rsa_tests" + moduleName = "hello" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "108" + endingLineNumber = "108" + offsetFromSymbolStart = "601"> + </Location> + <Location + uuid = "EEB8911E-E203-4453-B41E-78BD7D8FC6A9 - f37c0f4c30700f86" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "rsa_tests" + moduleName = "hello" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "121" + endingLineNumber = "121" + offsetFromSymbolStart = "642"> + </Location> + <Location + uuid = "EEB8911E-E203-4453-B41E-78BD7D8FC6A9 - f37c0f4c307009f4" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "rsa_tests" + moduleName = "hello" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "135" + endingLineNumber = "135" + offsetFromSymbolStart = "794"> + </Location> + <Location + uuid = "EEB8911E-E203-4453-B41E-78BD7D8FC6A9 - f37c0f4c307009f4" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "rsa_tests" + moduleName = "hello" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "135" + endingLineNumber = "135" + offsetFromSymbolStart = "797"> + </Location> + <Location + uuid = "EEB8911E-E203-4453-B41E-78BD7D8FC6A9 - f37c0f4c307009b6" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "rsa_tests" + moduleName = "hello" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "137" + endingLineNumber = "137" + offsetFromSymbolStart = "797"> + </Location> + </Locations> + </BreakpointContent> + </BreakpointProxy> + <BreakpointProxy + BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> + <BreakpointContent + uuid = "BE5A95CC-4FA1-4426-8251-E2724903FEE5" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + filePath = "rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "260" + endingLineNumber = "260" + landmarkName = "verify(sign, ee, nn, mm)" + landmarkType = "9"> + <Locations> + <Location + uuid = "BE5A95CC-4FA1-4426-8251-E2724903FEE5 - 93446d8fed0e0b39" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "verify" + moduleName = "hello" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "214" + endingLineNumber = "214" + offsetFromSymbolStart = "492"> + </Location> + <Location + uuid = "BE5A95CC-4FA1-4426-8251-E2724903FEE5 - 93446d8fed0e0bbd" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "verify" + moduleName = "hello" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "218" + endingLineNumber = "218" + offsetFromSymbolStart = "523"> + </Location> + <Location + uuid = "BE5A95CC-4FA1-4426-8251-E2724903FEE5 - 93446d8fed0e0401" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "verify" + moduleName = "hello" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "222" + endingLineNumber = "222" + offsetFromSymbolStart = "535"> + </Location> + <Location + uuid = "BE5A95CC-4FA1-4426-8251-E2724903FEE5 - 93446d8fed0e07bc" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "verify" + moduleName = "hello" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "249" + endingLineNumber = "249" + offsetFromSymbolStart = "535"> + </Location> + <Location + uuid = "BE5A95CC-4FA1-4426-8251-E2724903FEE5 - 93446d8fed0e00c6" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "verify" + moduleName = "hello" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "255" + endingLineNumber = "255" + offsetFromSymbolStart = "535"> + </Location> + <Location + uuid = "BE5A95CC-4FA1-4426-8251-E2724903FEE5 - 93446d8fed0e014a" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "verify" + moduleName = "hello" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "259" + endingLineNumber = "259" + offsetFromSymbolStart = "535"> + </Location> + <Location + uuid = "BE5A95CC-4FA1-4426-8251-E2724903FEE5 - 93446d8fed0e0108" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "verify" + moduleName = "hello" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "261" + endingLineNumber = "261" + offsetFromSymbolStart = "610"> + </Location> + <Location + uuid = "BE5A95CC-4FA1-4426-8251-E2724903FEE5 - 93446d8fed0e0108" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "verify" + moduleName = "hello" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "261" + endingLineNumber = "261" + offsetFromSymbolStart = "621"> + </Location> + </Locations> + </BreakpointContent> + </BreakpointProxy> + <BreakpointProxy + BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> + <BreakpointContent + uuid = "F515B97A-2DDB-4CD7-B177-0930CB82886E" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + filePath = "openssl-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "3083" + endingLineNumber = "3083" + landmarkName = "test(aa, pp, mm)" + landmarkType = "9"> + <Locations> + <Location + uuid = "F515B97A-2DDB-4CD7-B177-0930CB82886E - a07591907a67b234" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "test" + moduleName = "hello" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/openssl-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "3010" + endingLineNumber = "3010" + offsetFromSymbolStart = "196"> + </Location> + <Location + uuid = "F515B97A-2DDB-4CD7-B177-0930CB82886E - a07591907a67bb4f" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "test" + moduleName = "hello" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/openssl-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "3083" + endingLineNumber = "3083" + offsetFromSymbolStart = "196"> + </Location> + </Locations> + </BreakpointContent> + </BreakpointProxy> + <BreakpointProxy + BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> + <BreakpointContent + uuid = "CDA3CB38-3A0F-4B51-B1BA-98E3DCC1FB9D" + shouldBeEnabled = "No" + ignoreCount = "0" + continueAfterRunningActions = "No" + filePath = "openssl-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "3081" + endingLineNumber = "3081" + landmarkName = "test(aa, pp, mm)" + landmarkType = "9"> + <Locations> + <Location + uuid = "CDA3CB38-3A0F-4B51-B1BA-98E3DCC1FB9D - a07591907a67b5fa" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "test" + moduleName = "hello" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/openssl-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "3008" + endingLineNumber = "3008" + offsetFromSymbolStart = "151"> + </Location> + <Location + uuid = "CDA3CB38-3A0F-4B51-B1BA-98E3DCC1FB9D - a07591907a67bb0d" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "test" + moduleName = "hello" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/openssl-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "3081" + endingLineNumber = "3081" + offsetFromSymbolStart = "151"> + </Location> + </Locations> + </BreakpointContent> + </BreakpointProxy> + <BreakpointProxy + BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> + <BreakpointContent + uuid = "A8352521-4868-49CB-8637-0BE665EE874C" + shouldBeEnabled = "No" + ignoreCount = "0" + continueAfterRunningActions = "No" + filePath = "openssl-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "2917" + endingLineNumber = "2917" + landmarkName = "BN_mod_exp_mont(rr, a, p, m, ctx, in_mont)" + landmarkType = "9"> + <Locations> + <Location + uuid = "A8352521-4868-49CB-8637-0BE665EE874C - 16f55520f2fdcffa" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "BN_mod_exp_mont" + moduleName = "hello" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/openssl-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "2845" + endingLineNumber = "2845" + offsetFromSymbolStart = "2068"> + </Location> + <Location + uuid = "A8352521-4868-49CB-8637-0BE665EE874C - 16f55520f2fc3913" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "BN_mod_exp_mont" + moduleName = "hello" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/openssl-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "2918" + endingLineNumber = "2918" + offsetFromSymbolStart = "2068"> + </Location> + </Locations> + </BreakpointContent> + </BreakpointProxy> + <BreakpointProxy + BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> + <BreakpointContent + uuid = "0C98EE14-2401-420F-8D20-F49ED0DBC41B" + shouldBeEnabled = "No" + ignoreCount = "0" + continueAfterRunningActions = "No" + filePath = "openssl-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "332" + endingLineNumber = "332" + landmarkName = "BN_MONT_CTX_new()" + landmarkType = "9"> + </BreakpointContent> + </BreakpointProxy> + <BreakpointProxy + BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> + <BreakpointContent + uuid = "96C72BD4-E8C9-412E-9AB7-413108288B0B" + shouldBeEnabled = "No" + ignoreCount = "0" + continueAfterRunningActions = "No" + filePath = "openssl-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "2594" + endingLineNumber = "2594" + landmarkName = "BN_MONT_CTX_set(mont, mod, ctx)" + landmarkType = "9"> + <Locations> + <Location + uuid = "96C72BD4-E8C9-412E-9AB7-413108288B0B - 1aaa91d4f9a1e20c" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "BN_MONT_CTX_set" + moduleName = "hello" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/openssl-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "2521" + endingLineNumber = "2521" + offsetFromSymbolStart = "20"> + </Location> + <Location + uuid = "96C72BD4-E8C9-412E-9AB7-413108288B0B - 1aaa91d4f9a1e965" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + symbolName = "BN_MONT_CTX_set" + moduleName = "hello" + usesParentBreakpointCondition = "Yes" + urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/openssl-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "2594" + endingLineNumber = "2594" + offsetFromSymbolStart = "37"> + </Location> + </Locations> + </BreakpointContent> + </BreakpointProxy> + <BreakpointProxy + BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> + <BreakpointContent + uuid = "4C95F997-80D7-4841-B985-AC5D4BB9A5EF" + shouldBeEnabled = "No" + ignoreCount = "0" + continueAfterRunningActions = "No" + filePath = "openssl-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "2485" + endingLineNumber = "2485" + landmarkName = "BN_mod_inverse(in, a, n, ctx)" + landmarkType = "9"> + </BreakpointContent> + </BreakpointProxy> + <BreakpointProxy + BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> + <BreakpointContent + uuid = "CC868260-8B6B-4BA4-AC45-233E3DE02D02" + shouldBeEnabled = "No" + ignoreCount = "0" + continueAfterRunningActions = "No" + filePath = "openssl-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "2442" + endingLineNumber = "2442" + landmarkName = "int_bn_mod_inverse(in, a, n, ctx, pnoinv)" + landmarkType = "9"> + </BreakpointContent> + </BreakpointProxy> + <BreakpointProxy + BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> + <BreakpointContent + uuid = "49F72DAD-C98D-426E-9106-693BED1DC57C" + shouldBeEnabled = "No" + ignoreCount = "0" + continueAfterRunningActions = "No" + filePath = "openssl-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "1080" + endingLineNumber = "1080" + landmarkName = "bn_div_fixed_top(dv, rm, num, divisor, ctx)" + landmarkType = "9"> + </BreakpointContent> + </BreakpointProxy> + <BreakpointProxy + BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> + <BreakpointContent + uuid = "1DB7672F-C154-4530-8338-1D40045A83BB" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + filePath = "openssl-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "1099" + endingLineNumber = "1099" + landmarkName = "bn_div_fixed_top(dv, rm, num, divisor, ctx)" + landmarkType = "9"> + </BreakpointContent> + </BreakpointProxy> + <BreakpointProxy + BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> + <BreakpointContent + uuid = "92922D1E-77A7-417A-A75F-7C60A90557C3" + shouldBeEnabled = "Yes" + ignoreCount = "0" + continueAfterRunningActions = "No" + filePath = "openssl-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "1110" + endingLineNumber = "1110" + landmarkName = "bn_div_fixed_top(dv, rm, num, divisor, ctx)" + landmarkType = "9"> + </BreakpointContent> + </BreakpointProxy> + <BreakpointProxy + BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint"> + <BreakpointContent + uuid = "30F57658-A9E6-4742-B54A-BC31A7EEC6B1" + shouldBeEnabled = "No" + ignoreCount = "0" + continueAfterRunningActions = "No" + filePath = "openssl-test.c" + startingColumnNumber = "9223372036854775807" + endingColumnNumber = "9223372036854775807" + startingLineNumber = "1040" + endingLineNumber = "1040" + landmarkName = "bn_div_fixed_top(dv, rm, num, divisor, ctx)" + landmarkType = "9"> + </BreakpointContent> + </BreakpointProxy> + </Breakpoints> +</Bucket> diff --git a/xcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcschemes/xcschememanagement.plist b/xcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcschemes/xcschememanagement.plist @@ -0,0 +1,22 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> +<plist version="1.0"> +<dict> + <key>SchemeUserState</key> + <dict> + <key>lib-gpu-verify.xcscheme_^#shared#^_</key> + <dict> + <key>orderHint</key> + <integer>0</integer> + </dict> + </dict> + <key>SuppressBuildableAutocreation</key> + <dict> + <key>466E0F5E0C932E1A00ED01DB</key> + <dict> + <key>primary</key> + <true/> + </dict> + </dict> +</dict> +</plist> diff --git a/xcode/modexp.cl b/xcode/modexp.cl @@ -0,0 +1,5 @@ +__kernel void verify() { + +} + + diff --git a/xcode/rsa-kernel.cl b/xcode/rsa-kernel.cl @@ -0,0 +1,9 @@ + + +__kernel void square(__global float* input, __global float* output, const unsigned int count) +{ + int i = get_global_id(0); + + if(i < count) + output[i] = input[i] * input[i]; +}