initial commit - libgpuverify - Signature verification on GPUs (WiP)

commit ceca94318f8347d9141f1e833f1d7b0445430643
Author: Cedric <cedric.zwahlen@students.bfh.ch>
Date:   Wed, 18 Oct 2023 13:47:36 +0200

initial commit

Diffstat:
A .DS_Store  | 0 
A .gitignore  | 5 +++++
A openssl/CMakeLists.txt  | 28 ++++++++++++++++++++++++++++
A openssl/main.c  | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A openssl/openssl-test.c  | 3087 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A openssl/openssl-test.h  | 531 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A openssl/readme.txt  | 25 +++++++++++++++++++++++++
A other/.DS_Store  | 0 
A other/CMakeLists.txt  | 16 ++++++++++++++++
A other/Main.c  | 20 ++++++++++++++++++++
A other/readme.txt  | 32 ++++++++++++++++++++++++++++++++
A source/.DS_Store  | 0 
A source/big-int-test.c  | 1080 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A source/big-int-test.h  | 127 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A source/lib-gpu-verify.c  | 589 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A source/opencl-test.c  | 211 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A source/opencl-test.h  | 25 +++++++++++++++++++++++++
A source/rsa-test.c  | 276 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A source/rsa-test.h  | 27 +++++++++++++++++++++++++++
A xcode/.DS_Store  | 0 
A xcode/ReadMe.txt  | 2 ++
A xcode/lib-gpu-verify.xcodeproj/project.pbxproj  | 328 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/contents.xcworkspacedata  | 7 +++++++
A xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist  | 8 ++++++++
A xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcuserdata/cedriczwahlen.xcuserdatad/UserInterfaceState.xcuserstate  | 0 
A xcode/lib-gpu-verify.xcodeproj/xcshareddata/xcschemes/lib-gpu-verify.xcscheme  | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A xcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist  | 684 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A xcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcschemes/xcschememanagement.plist  | 22 ++++++++++++++++++++++
A xcode/modexp.cl  | 5 +++++
A xcode/rsa-kernel.cl  | 9 +++++++++

30 files changed, 7316 insertions(+), 0 deletions(-)
diff --git a/.DS_Store b/.DS_Store
Binary files differ.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,5 @@
+# ignore build in openssl
+openssl/build
+
+# ignore in other
+other/build
diff --git a/openssl/CMakeLists.txt b/openssl/CMakeLists.txt
@@ -0,0 +1,27 @@
+cmake_minimum_required(VERSION 3.1) # 3.1 << C_STANDARD 11
+
+set(CMAKE_TRY_COMPILE_TARGET_TYPE "STATIC_LIBRARY")
+
+project(openssltest LANGUAGES C)
+
+set(SOURCES
+    ./openssl-test.h
+    ./openssl-test.c
+)
+
+add_executable(${PROJECT_NAME} main.c ${SOURCES})
+
+# paths for ubuntu, they might be different on other systems.
+
+target_include_directories(openssltest PRIVATE /usr/include)
+target_include_directories(openssltest PRIVATE /usr/lib)
+
+ADD_LIBRARY(libgcrypt STATIC IMPORTED)
+SET_TARGET_PROPERTIES(libgcrypt PROPERTIES IMPORTED_LOCATION /usr/lib/x86_64-linux-gnu/libgcrypt.so.20)
+target_link_libraries(openssltest libgcrypt)
+
+set_target_properties(${PROJECT_NAME} PROPERTIES C_STANDARD 11
+                                                 C_STANDARD_REQUIRED ON
+                                                 C_EXTENSIONS OFF)
+
+target_compile_definitions(${PROJECT_NAME} PUBLIC)
+\ No newline at end of file
diff --git a/openssl/main.c b/openssl/main.c
@@ -0,0 +1,92 @@
+
+#include "openssl-test.h"
+
+#include <gcrypt.h>
+
+
+#define NEED_LIBGCRYPT_VERSION "1.10.2"
+int main(int argc, char** argv)
+{
+    
+    // disable any optimisations
+    gcry_control (GCRYCTL_DISABLE_HWF, "intel-cpu", NULL);
+    
+    /* Version check should be the very first call because it
+     makes sure that important subsystems are initialized.
+     #define NEED_LIBGCRYPT_VERSION to the minimum required version. */
+    if (!gcry_check_version (NEED_LIBGCRYPT_VERSION))
+    {
+        fprintf (stderr, "libgcrypt is too old (need %s, have %s)\n",
+                 NEED_LIBGCRYPT_VERSION, gcry_check_version (NULL));
+        exit (2); }
+    /* Disable secure memory.  */
+    gcry_control (GCRYCTL_DISABLE_SECMEM, 0);
+    /* ... If required, other initialization goes here.  */
+    /* Tell Libgcrypt that initialization has completed. */
+    gcry_control (GCRYCTL_INITIALIZATION_FINISHED, 0);
+    
+   
+    char *template = "(genkey(rsa(nbits 4:2048)))";
+    gcry_sexp_t parms;
+        
+    gcry_sexp_new(&parms, template, strlen(template), 1);
+        
+    // will contain key material
+    gcry_sexp_t key;
+        
+    gcry_pk_genkey(&key,parms);
+        
+    // create a number, we want to encrypt it 
+    char *val = "1234567890ABCDEF";
+    gcry_mpi_t mpi = gcry_mpi_new((int)strlen(val) * 8);
+    size_t scanned = 0;
+        
+    gcry_mpi_scan(&mpi, GCRYMPI_FMT_HEX, val, 0, &scanned);
+        
+    gcry_sexp_t toSign;
+    size_t errOff = 0;
+    char *dataformat = "(data (flags raw) (value %m))";
+        
+    gcry_sexp_build(&toSign,&errOff,dataformat,mpi);
+       
+    // -- sign --
+
+    gcry_sexp_t resSign;
+        
+    // should not use padding
+    gcry_pk_sign(&resSign, toSign, key);
+       
+    // these must be freed manually
+    gcry_mpi_t n_mpi;
+    gcry_mpi_t e_mpi;
+    gcry_mpi_t d_mpi;
+        
+    gcry_sexp_extract_param(key,NULL,"n e d",&n_mpi, &e_mpi, &d_mpi, NULL);
+        
+    gcry_mpi_t sig_mpi;
+        
+    gcry_sexp_extract_param(resSign,NULL,"s",&sig_mpi, NULL);
+
+    int buflen = 2048;
+          
+    // may be a lot shorter – these will contain the numbers in HEX string form – for use
+    unsigned char *n = malloc(buflen);
+    unsigned char *e = malloc(buflen);
+    unsigned char *d = malloc(buflen);
+    size_t nL = 0;
+        
+    // check returns
+    gcry_mpi_print(GCRYMPI_FMT_HEX,n,buflen,&nL,n_mpi);
+    gcry_mpi_print(GCRYMPI_FMT_HEX,e,buflen,&nL,e_mpi);
+    gcry_mpi_print(GCRYMPI_FMT_HEX,d,buflen,&nL,d_mpi);
+        
+    unsigned char *sgn = malloc(buflen);
+    gcry_mpi_print(GCRYMPI_FMT_HEX,sgn,buflen,&nL,sig_mpi);
+        
+    // everytime the program is run, the inputs calculated on change, keep this in mind when debugging.
+    test(e,d,n);
+    
+    // here, we should free the mpis
+    
+    return 0;
+}
+\ No newline at end of file
diff --git a/openssl/openssl-test.c b/openssl/openssl-test.c
@@ -0,0 +1,3087 @@
+//
+//  openssl-test.c
+//  hello
+//
+//  Created by Cedric Zwahlen on 07.10.2023.
+//
+
+#include "openssl-test.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+typedef struct ossl_ex_data_global_st {
+    CRYPTO_RWLOCK *ex_data_lock;
+    EX_CALLBACKS ex_data[CRYPTO_EX_INDEX__COUNT];
+} OSSL_EX_DATA_GLOBAL;
+
+
+
+
+struct ossl_lib_ctx_st {
+    CRYPTO_RWLOCK *lock, *rand_crngt_lock;
+    OSSL_EX_DATA_GLOBAL global;
+
+    void *property_string_data;
+    void *evp_method_store;
+    void *provider_store;
+    void *namemap;
+    void *property_defns;
+    void *global_properties;
+    void *drbg;
+    void *drbg_nonce;
+/*#ifndef FIPS_MODULE
+    void *provider_conf;
+    void *bio_core;
+    void *child_provider;
+    OSSL_METHOD_STORE *decoder_store;
+    void *decoder_cache;
+    OSSL_METHOD_STORE *encoder_store;
+    OSSL_METHOD_STORE *store_loader_store;
+    void *self_test_cb;
+#endif*/
+#if defined(OPENSSL_THREADS)
+    void *threads;
+#endif
+    void *rand_crngt;
+#ifdef FIPS_MODULE
+    void *thread_event_handler;
+    void *fips_prov;
+#endif
+
+    unsigned int ischild:1;
+};
+
+
+
+
+
+
+
+/*
+typedef void CRYPTO_EX_new (void *parent, void *ptr, CRYPTO_EX_DATA *ad,
+                           int idx, long argl, void *argp);
+typedef void CRYPTO_EX_free (void *parent, void *ptr, CRYPTO_EX_DATA *ad,
+                             int idx, long argl, void *argp);
+typedef int CRYPTO_EX_dup (CRYPTO_EX_DATA *to, const CRYPTO_EX_DATA *from,
+                           void *from_d, int idx, long argl, void *argp);
+__owur int CRYPTO_get_ex_new_index(int class_index, long argl, void *argp,
+                            CRYPTO_EX_new *new_func, CRYPTO_EX_dup *dup_func,
+                            CRYPTO_EX_free *free_func);
+*/
+
+struct crypto_ex_data_st {
+    //STACK_OF(void) *sk;
+    void * sk;
+};
+//DEFINE_STACK_OF(void)
+
+typedef struct bn_mont_ctx_st BN_MONT_CTX;
+struct bn_mont_ctx_st {
+    int ri;                     /* number of bits in R */
+    BIGNUM RR;                  /* used to convert to montgomery form,
+                                   possibly zero-padded */
+    BIGNUM N;                   /* The modulus */
+    BIGNUM Ni;                  /* R*(1/R mod N) - N*Ni = 1 (Ni is only
+                                 * stored for bignum algorithm) */
+    BN_ULONG n0[2];             /* least significant word(s) of Ni; (type
+                                 * changed with 0.9.9, was "BN_ULONG n0;"
+                                 * before) */
+    int flags;
+};
+
+
+int BN_get_flags(const BIGNUM *b, int n)
+{
+    return b->flags & n;
+}
+
+int BN_is_zero(const BIGNUM *a)
+{
+    return a->top == 0;
+}
+
+int BN_num_bits_word(BN_ULONG l)
+{
+    BN_ULONG x, mask;
+    int bits = (l != 0);
+
+#if BN_BITS2 > 32
+    x = l >> 32;
+    mask = (0 - x) & BN_MASK2;
+    mask = (0 - (mask >> (BN_BITS2 - 1)));
+    bits += 32 & mask;
+    l ^= (x ^ l) & mask;
+#endif
+
+    x = l >> 16;
+    mask = (0 - x) & BN_MASK2;
+    mask = (0 - (mask >> (BN_BITS2 - 1)));
+    bits += 16 & mask;
+    l ^= (x ^ l) & mask;
+
+    x = l >> 8;
+    mask = (0 - x) & BN_MASK2;
+    mask = (0 - (mask >> (BN_BITS2 - 1)));
+    bits += 8 & mask;
+    l ^= (x ^ l) & mask;
+
+    x = l >> 4;
+    mask = (0 - x) & BN_MASK2;
+    mask = (0 - (mask >> (BN_BITS2 - 1)));
+    bits += 4 & mask;
+    l ^= (x ^ l) & mask;
+
+    x = l >> 2;
+    mask = (0 - x) & BN_MASK2;
+    mask = (0 - (mask >> (BN_BITS2 - 1)));
+    bits += 2 & mask;
+    l ^= (x ^ l) & mask;
+
+    x = l >> 1;
+    mask = (0 - x) & BN_MASK2;
+    mask = (0 - (mask >> (BN_BITS2 - 1)));
+    bits += 1 & mask;
+
+    return bits;
+}
+
+int BN_num_bits(const BIGNUM *a)
+{
+    int i = a->top - 1;
+    bn_check_top(a);
+
+    if (BN_is_zero(a))
+        return 0;
+
+    return ((i * BN_BITS2) + BN_num_bits_word(a->d[i]));
+}
+
+void CRYPTO_free(void *str, const char *file, int line)
+{/*
+    INCREMENT(free_count);
+    if (free_impl != CRYPTO_free) {
+        free_impl(str, file, line);
+        return;
+    }
+*/
+    free(str);
+}
+
+void CRYPTO_clear_free(void *str, size_t num, const char *file, int line)
+{
+    if (str == NULL)
+        return;
+    if (num)
+    {} //  OPENSSL_cleanse(str, num);
+    CRYPTO_free(str, file, line);
+}
+
+static BN_ULONG *bn_expand_internal(const BIGNUM *b, int words)
+{
+    BN_ULONG *a = NULL;
+
+    if (words > (INT_MAX / (4 * BN_BITS2))) {
+       // ERR_raise(ERR_LIB_BN, BN_R_BIGNUM_TOO_LONG);
+        assert(0);
+        return NULL;
+    }
+    
+    if (BN_get_flags(b, BN_FLG_SECURE))
+    {} //   a = OPENSSL_secure_zalloc(words * sizeof(*a));
+    else
+        a = OPENSSL_zalloc(words * sizeof(*a));
+    if (a == NULL)
+        return NULL;
+
+    assert(b->top <= words);
+    if (b->top > 0)
+        memcpy(a, b->d, sizeof(*a) * b->top);
+
+    return a;
+}
+
+static void bn_free_d(BIGNUM *a, int clear)
+{
+   // if (BN_get_flags(a, BN_FLG_SECURE))
+    //    OPENSSL_secure_clear_free(a->d, a->dmax * sizeof(a->d[0]));
+    /*else*/ //if (clear != 0)
+    //    OPENSSL_clear_free(a->d, a->dmax * sizeof(a->d[0]));
+   // else
+        OPENSSL_free(a->d);
+}
+
+
+BIGNUM *bn_expand2(BIGNUM *b, int words)
+{
+    if (words > b->dmax) {
+        BN_ULONG *a = bn_expand_internal(b, words);
+        if (!a)
+            return NULL;
+        if (b->d != NULL)
+            bn_free_d(b, 1);
+        b->d = a;
+        b->dmax = words;
+    }
+
+    return b;
+}
+
+static ossl_inline BIGNUM *bn_expand(BIGNUM *a, int bits)
+{
+    if (bits > (INT_MAX - BN_BITS2 + 1))
+        return NULL;
+
+    if (((bits+BN_BITS2-1)/BN_BITS2) <= (a)->dmax)
+        return a;
+
+    return bn_expand2((a),(bits+BN_BITS2-1)/BN_BITS2);
+}
+
+int BN_set_word(BIGNUM *a, BN_ULONG w)
+{
+    bn_check_top(a);
+    if (bn_expand(a, (int)sizeof(BN_ULONG) * 8) == NULL)
+        return 0;
+    a->neg = 0;
+    a->d[0] = w;
+    a->top = (w ? 1 : 0);
+    a->flags &= ~BN_FLG_FIXED_TOP;
+    bn_check_top(a);
+    return 1;
+}
+
+
+int BN_abs_is_word(const BIGNUM *a, const BN_ULONG w)
+{
+    return ((a->top == 1) && (a->d[0] == w)) || ((w == 0) && (a->top == 0));
+}
+
+
+
+
+
+void BN_CTX_start(BN_CTX *ctx)
+{
+  //  CTXDBG("ENTER BN_CTX_start()", ctx);
+    /* If we're already overflowing ... */
+    if (ctx->err_stack || ctx->too_many)
+        ctx->err_stack++;
+    /* (Try to) get a new frame pointer */
+    else if (!BN_STACK_push(&ctx->stack, ctx->used)) {
+     //   ERR_raise(ERR_LIB_BN, BN_R_TOO_MANY_TEMPORARY_VARIABLES);
+        assert(0);
+        ctx->err_stack++;
+    }
+   // CTXDBG("LEAVE BN_CTX_start()", ctx);
+}
+
+BIGNUM *BN_CTX_get(BN_CTX *ctx)
+{
+    BIGNUM *ret;
+
+   // CTXDBG("ENTER BN_CTX_get()", ctx);
+    if (ctx->err_stack || ctx->too_many)
+        return NULL;
+    if ((ret = BN_POOL_get(&ctx->pool, ctx->flags)) == NULL) {
+        /*
+         * Setting too_many prevents repeated "get" attempts from cluttering
+         * the error stack.
+         */
+        ctx->too_many = 1;
+      //  ERR_raise(ERR_LIB_BN, BN_R_TOO_MANY_TEMPORARY_VARIABLES);
+        assert(0);
+        return NULL;
+    }
+    /* OK, make sure the returned bignum is "zero" */
+    BN_zero(ret);
+    /* clear BN_FLG_CONSTTIME if leaked from previous frames */
+    ret->flags &= (~BN_FLG_CONSTTIME);
+    ctx->used++;
+ //   CTXDBG("LEAVE BN_CTX_get()", ctx);
+    return ret;
+}
+
+void bn_init(BIGNUM *a)
+{
+    static BIGNUM nilbn;
+
+    *a = nilbn;
+    bn_check_top(a);
+}
+
+void BN_MONT_CTX_init(BN_MONT_CTX *ctx)
+{
+    ctx->ri = 0;
+    bn_init(&ctx->RR);
+    bn_init(&ctx->N);
+    bn_init(&ctx->Ni);
+    ctx->n0[0] = ctx->n0[1] = 0;
+    ctx->flags = 0;
+}
+
+BN_MONT_CTX *BN_MONT_CTX_new(void)
+{
+    BN_MONT_CTX *ret;
+
+    if ((ret = OPENSSL_malloc(sizeof(*ret))) == NULL)
+        return NULL;
+
+    BN_MONT_CTX_init(ret);
+    ret->flags = BN_FLG_MALLOCED;
+    return ret;
+}
+
+int BN_ucmp(const BIGNUM *a, const BIGNUM *b)
+{
+    int i;
+    BN_ULONG t1, t2, *ap, *bp;
+
+    bn_check_top(a);
+    bn_check_top(b);
+
+    i = a->top - b->top;
+    if (i != 0)
+        return i;
+    ap = a->d;
+    bp = b->d;
+    for (i = a->top - 1; i >= 0; i--) {
+        t1 = ap[i];
+        t2 = bp[i];
+        if (t1 != t2)
+            return ((t1 > t2) ? 1 : -1);
+    }
+    return 0;
+}
+
+BIGNUM *bn_wexpand(BIGNUM *a, int words)
+{
+    return (words <= a->dmax) ? a : bn_expand2(a, words);
+}
+
+BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
+                      int n)
+{
+    BN_ULONG t1, t2;
+    int c = 0;
+
+    assert(n >= 0);
+    if (n <= 0)
+        return (BN_ULONG)0;
+
+#ifndef OPENSSL_SMALL_FOOTPRINT
+    while (n & ~3) {
+        t1 = a[0];
+        t2 = (t1 - c) & BN_MASK2;
+        c  = (t2 > t1);
+        t1 = b[0];
+        t1 = (t2 - t1) & BN_MASK2;
+        r[0] = t1;
+        c += (t1 > t2);
+        t1 = a[1];
+        t2 = (t1 - c) & BN_MASK2;
+        c  = (t2 > t1);
+        t1 = b[1];
+        t1 = (t2 - t1) & BN_MASK2;
+        r[1] = t1;
+        c += (t1 > t2);
+        t1 = a[2];
+        t2 = (t1 - c) & BN_MASK2;
+        c  = (t2 > t1);
+        t1 = b[2];
+        t1 = (t2 - t1) & BN_MASK2;
+        r[2] = t1;
+        c += (t1 > t2);
+        t1 = a[3];
+        t2 = (t1 - c) & BN_MASK2;
+        c  = (t2 > t1);
+        t1 = b[3];
+        t1 = (t2 - t1) & BN_MASK2;
+        r[3] = t1;
+        c += (t1 > t2);
+        a += 4;
+        b += 4;
+        r += 4;
+        n -= 4;
+    }
+#endif
+    while (n) {
+        t1 = a[0];
+        t2 = (t1 - c) & BN_MASK2;
+        c  = (t2 > t1);
+        t1 = b[0];
+        t1 = (t2 - t1) & BN_MASK2;
+        r[0] = t1;
+        c += (t1 > t2);
+        a++;
+        b++;
+        r++;
+        n--;
+    }
+    return c;
+}
+
+
+int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
+{
+    int max, min, dif;
+    BN_ULONG t1, t2, borrow, *rp;
+    const BN_ULONG *ap, *bp;
+
+    bn_check_top(a);
+    bn_check_top(b);
+
+    max = a->top;
+    min = b->top;
+    dif = max - min;
+
+    if (dif < 0) {              /* hmm... should not be happening */
+        //ERR_raise(ERR_LIB_BN, BN_R_ARG2_LT_ARG3);
+        assert(0);
+        return 0;
+    }
+
+    if (bn_wexpand(r, max) == NULL)
+        return 0;
+
+    ap = a->d;
+    bp = b->d;
+    rp = r->d;
+
+    borrow = bn_sub_words(rp, ap, bp, min);
+    ap += min;
+    rp += min;
+
+    while (dif) {
+        dif--;
+        t1 = *(ap++);
+        t2 = (t1 - borrow) & BN_MASK2;
+        *(rp++) = t2;
+        borrow &= (t1 == 0);
+    }
+
+    while (max && *--rp == 0)
+        max--;
+
+    r->top = max;
+    r->neg = 0;
+    bn_pollute(r);
+
+    return 1;
+}
+
+#ifdef BN_LLONG
+BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
+                      int n)
+{
+    BN_ULLONG ll = 0;
+
+    assert(n >= 0);
+    if (n <= 0)
+        return (BN_ULONG)0;
+
+# ifndef OPENSSL_SMALL_FOOTPRINT
+    while (n & ~3) {
+        ll += (BN_ULLONG) a[0] + b[0];
+        r[0] = (BN_ULONG)ll & BN_MASK2;
+        ll >>= BN_BITS2;
+        ll += (BN_ULLONG) a[1] + b[1];
+        r[1] = (BN_ULONG)ll & BN_MASK2;
+        ll >>= BN_BITS2;
+        ll += (BN_ULLONG) a[2] + b[2];
+        r[2] = (BN_ULONG)ll & BN_MASK2;
+        ll >>= BN_BITS2;
+        ll += (BN_ULLONG) a[3] + b[3];
+        r[3] = (BN_ULONG)ll & BN_MASK2;
+        ll >>= BN_BITS2;
+        a += 4;
+        b += 4;
+        r += 4;
+        n -= 4;
+    }
+# endif
+    while (n) {
+        ll += (BN_ULLONG) a[0] + b[0];
+        r[0] = (BN_ULONG)ll & BN_MASK2;
+        ll >>= BN_BITS2;
+        a++;
+        b++;
+        r++;
+        n--;
+    }
+    return (BN_ULONG)ll;
+}
+#else                           /* !BN_LLONG */
+BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
+                      int n)
+{
+    BN_ULONG c, l, t;
+
+    assert(n >= 0);
+    if (n <= 0)
+        return (BN_ULONG)0;
+
+    c = 0;
+# ifndef OPENSSL_SMALL_FOOTPRINT
+    while (n & ~3) {
+        t = a[0];
+        t = (t + c) & BN_MASK2;
+        c = (t < c);
+        l = (t + b[0]) & BN_MASK2;
+        c += (l < t);
+        r[0] = l;
+        t = a[1];
+        t = (t + c) & BN_MASK2;
+        c = (t < c);
+        l = (t + b[1]) & BN_MASK2;
+        c += (l < t);
+        r[1] = l;
+        t = a[2];
+        t = (t + c) & BN_MASK2;
+        c = (t < c);
+        l = (t + b[2]) & BN_MASK2;
+        c += (l < t);
+        r[2] = l;
+        t = a[3];
+        t = (t + c) & BN_MASK2;
+        c = (t < c);
+        l = (t + b[3]) & BN_MASK2;
+        c += (l < t);
+        r[3] = l;
+        a += 4;
+        b += 4;
+        r += 4;
+        n -= 4;
+    }
+# endif
+    while (n) {
+        t = a[0];
+        t = (t + c) & BN_MASK2;
+        c = (t < c);
+        l = (t + b[0]) & BN_MASK2;
+        c += (l < t);
+        r[0] = l;
+        a++;
+        b++;
+        r++;
+        n--;
+    }
+    return (BN_ULONG)c;
+}
+#endif                          /* !BN_LLONG */
+
+/* unsigned add of b to a, r can be equal to a or b. */
+int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
+{
+    int max, min, dif;
+    const BN_ULONG *ap, *bp;
+    BN_ULONG *rp, carry, t1, t2;
+
+    bn_check_top(a);
+    bn_check_top(b);
+
+    if (a->top < b->top) {
+        const BIGNUM *tmp;
+
+        tmp = a;
+        a = b;
+        b = tmp;
+    }
+    max = a->top;
+    min = b->top;
+    dif = max - min;
+
+    if (bn_wexpand(r, max + 1) == NULL)
+        return 0;
+
+    r->top = max;
+
+    ap = a->d;
+    bp = b->d;
+    rp = r->d;
+
+    carry = bn_add_words(rp, ap, bp, min);
+    rp += min;
+    ap += min;
+
+    while (dif) {
+        dif--;
+        t1 = *(ap++);
+        t2 = (t1 + carry) & BN_MASK2;
+        *(rp++) = t2;
+        carry &= (t2 == 0);
+    }
+    *rp = carry;
+    r->top += carry;
+
+    r->neg = 0;
+    bn_check_top(r);
+    return 1;
+}
+
+int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
+{
+    int ret, r_neg, cmp_res;
+
+    bn_check_top(a);
+    bn_check_top(b);
+
+    if (a->neg != b->neg) {
+        r_neg = a->neg;
+        ret = BN_uadd(r, a, b);
+    } else {
+        cmp_res = BN_ucmp(a, b);
+        if (cmp_res > 0) {
+            r_neg = a->neg;
+            ret = BN_usub(r, a, b);
+        } else if (cmp_res < 0) {
+            r_neg = !b->neg;
+            ret = BN_usub(r, b, a);
+        } else {
+            r_neg = 0;
+            BN_zero(r);
+            ret = 1;
+        }
+    }
+
+    r->neg = r_neg;
+    bn_check_top(r);
+    return ret;
+}
+
+int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
+{
+    int ret, r_neg, cmp_res;
+
+    bn_check_top(a);
+    bn_check_top(b);
+
+    if (a->neg == b->neg) {
+        r_neg = a->neg;
+        ret = BN_uadd(r, a, b);
+    } else {
+        cmp_res = BN_ucmp(a, b);
+        if (cmp_res > 0) {
+            r_neg = a->neg;
+            ret = BN_usub(r, a, b);
+        } else if (cmp_res < 0) {
+            r_neg = b->neg;
+            ret = BN_usub(r, b, a);
+        } else {
+            r_neg = 0;
+            BN_zero(r);
+            ret = 1;
+        }
+    }
+
+    r->neg = r_neg;
+    bn_check_top(r);
+    return ret;
+}
+
+int BN_nnmod(BIGNUM *r, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx)
+{
+    /*
+     * like BN_mod, but returns non-negative remainder (i.e., 0 <= r < |d|
+     * always holds)
+     */
+
+    if (!(BN_mod(r, m, d, ctx)))
+        return 0;
+    if (!r->neg)
+        return 1;
+    /* now   -|d| < r < 0,  so we have to set  r := r + |d| */
+    return (d->neg ? BN_sub : BN_add) (r, r, d);
+}
+
+BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b)
+{
+    int bn_words;
+
+    bn_check_top(b);
+
+    bn_words = BN_get_flags(b, BN_FLG_CONSTTIME) ? b->dmax : b->top;
+
+    if (a == b)
+        return a;
+    if (bn_wexpand(a, bn_words) == NULL)
+        return NULL;
+
+    if (b->top > 0)
+        memcpy(a->d, b->d, sizeof(b->d[0]) * bn_words);
+
+    a->neg = b->neg;
+    a->top = b->top;
+    a->flags |= b->flags & BN_FLG_FIXED_TOP;
+    bn_check_top(a);
+    return a;
+}
+
+static int bn_left_align(BIGNUM *num)
+{
+    BN_ULONG *d = num->d, n, m, rmask;
+    int top = num->top;
+    int rshift = BN_num_bits_word(d[top - 1]), lshift, i;
+
+    lshift = BN_BITS2 - rshift;
+    rshift %= BN_BITS2;            /* say no to undefined behaviour */
+    rmask = (BN_ULONG)0 - rshift;  /* rmask = 0 - (rshift != 0) */
+    rmask |= rmask >> 8;
+
+    for (i = 0, m = 0; i < top; i++) {
+        n = d[i];
+        d[i] = ((n << lshift) | m) & BN_MASK2;
+        m = (n >> rshift) & rmask;
+    }
+
+    return lshift;
+}
+
+/*
+ * In respect to shift factor the execution time is invariant of
+ * |n % BN_BITS2|, but not |n / BN_BITS2|. Or in other words pre-condition
+ * for constant-time-ness is |n < BN_BITS2| or |n / BN_BITS2| being
+ * non-secret.
+ */
+int bn_lshift_fixed_top(BIGNUM *r, const BIGNUM *a, int n)
+{
+    int i, nw;
+    unsigned int lb, rb;
+    BN_ULONG *t, *f;
+    BN_ULONG l, m, rmask = 0;
+
+    assert(n >= 0);
+
+    bn_check_top(r);
+    bn_check_top(a);
+
+    nw = n / BN_BITS2;
+    if (bn_wexpand(r, a->top + nw + 1) == NULL)
+        return 0;
+
+    if (a->top != 0) {
+        lb = (unsigned int)n % BN_BITS2;
+        rb = BN_BITS2 - lb;
+        rb %= BN_BITS2;            /* say no to undefined behaviour */
+        rmask = (BN_ULONG)0 - rb;  /* rmask = 0 - (rb != 0) */
+        rmask |= rmask >> 8;
+        f = &(a->d[0]);
+        t = &(r->d[nw]);
+        l = f[a->top - 1];
+        t[a->top] = (l >> rb) & rmask;
+        for (i = a->top - 1; i > 0; i--) {
+            m = l << lb;
+            l = f[i - 1];
+            t[i] = (m | ((l >> rb) & rmask)) & BN_MASK2;
+        }
+        t[0] = (l << lb) & BN_MASK2;
+    } else {
+        /* shouldn't happen, but formally required */
+        r->d[nw] = 0;
+    }
+    if (nw != 0)
+        memset(r->d, 0, sizeof(*t) * nw);
+
+    r->neg = a->neg;
+    r->top = a->top + nw + 1;
+    r->flags |= BN_FLG_FIXED_TOP;
+
+    return 1;
+}
+
+#if defined(BN_LLONG) && defined(BN_DIV2W)
+
+BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
+{
+    return ((BN_ULONG)(((((BN_ULLONG) h) << BN_BITS2) | l) / (BN_ULLONG) d));
+}
+
+#else
+
+/* Divide h,l by d and return the result. */
+/* I need to test this some more :-( */
+BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
+{
+    BN_ULONG dh, dl, q, ret = 0, th, tl, t;
+    int i, count = 2;
+
+    if (d == 0)
+        return BN_MASK2;
+
+    i = BN_num_bits_word(d);
+    assert((i == BN_BITS2) || (h <= (BN_ULONG)1 << i));
+
+    i = BN_BITS2 - i;
+    if (h >= d)
+        h -= d;
+
+    if (i) {
+        d <<= i;
+        h = (h << i) | (l >> (BN_BITS2 - i));
+        l <<= i;
+    }
+    dh = (d & BN_MASK2h) >> BN_BITS4;
+    dl = (d & BN_MASK2l);
+    for (;;) {
+        if ((h >> BN_BITS4) == dh)
+            q = BN_MASK2l;
+        else
+            q = h / dh;
+
+        th = q * dh;
+        tl = dl * q;
+        for (;;) {
+            t = h - th;
+            if ((t & BN_MASK2h) ||
+                ((tl) <= ((t << BN_BITS4) | ((l & BN_MASK2h) >> BN_BITS4))))
+                break;
+            q--;
+            th -= dh;
+            tl -= dl;
+        }
+        t = (tl >> BN_BITS4);
+        tl = (tl << BN_BITS4) & BN_MASK2h;
+        th += t;
+
+        if (l < tl)
+            th++;
+        l -= tl;
+        if (h < th) {
+            h += d;
+            q--;
+        }
+        h -= th;
+
+        if (--count == 0)
+            break;
+
+        ret = q << BN_BITS4;
+        h = ((h << BN_BITS4) | (l >> BN_BITS4)) & BN_MASK2;
+        l = (l & BN_MASK2l) << BN_BITS4;
+    }
+    ret |= q;
+    return ret;
+}
+#endif                          /* !defined(BN_LLONG) && defined(BN_DIV2W) */
+
+
+
+
+BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
+{
+    BN_ULONG carry = 0;
+    BN_ULONG bl, bh;
+
+    assert(num >= 0);
+    if (num <= 0)
+        return (BN_ULONG)0;
+
+    bl = LBITS(w);
+    bh = HBITS(w);
+
+# ifndef OPENSSL_SMALL_FOOTPRINT
+    while (num & ~3) {
+        mul(rp[0], ap[0], bl, bh, carry);
+        mul(rp[1], ap[1], bl, bh, carry);
+        mul(rp[2], ap[2], bl, bh, carry);
+        mul(rp[3], ap[3], bl, bh, carry);
+        ap += 4;
+        rp += 4;
+        num -= 4;
+    }
+# endif
+    while (num) {
+        mul(rp[0], ap[0], bl, bh, carry);
+        ap++;
+        rp++;
+        num--;
+    }
+    return carry;
+}
+
+void BN_CTX_end(BN_CTX *ctx)
+{
+    if (ctx == NULL)
+        return;
+  //  CTXDBG("ENTER BN_CTX_end()", ctx);
+    if (ctx->err_stack)
+        ctx->err_stack--;
+    else {
+        unsigned int fp = BN_STACK_pop(&ctx->stack);
+        /* Does this stack frame have anything to release? */
+        if (fp < ctx->used)
+            BN_POOL_release(&ctx->pool, ctx->used - fp);
+        ctx->used = fp;
+        /* Unjam "too_many" in case "get" had failed */
+        ctx->too_many = 0;
+    }
+  //  CTXDBG("LEAVE BN_CTX_end()", ctx);
+}
+
+int bn_rshift_fixed_top(BIGNUM *r, const BIGNUM *a, int n)
+{
+    int i, top, nw;
+    unsigned int lb, rb;
+    BN_ULONG *t, *f;
+    BN_ULONG l, m, mask;
+
+    bn_check_top(r);
+    bn_check_top(a);
+
+    assert(n >= 0);
+
+    nw = n / BN_BITS2;
+    if (nw >= a->top) {
+        /* shouldn't happen, but formally required */
+        BN_zero(r);
+        return 1;
+    }
+
+    rb = (unsigned int)n % BN_BITS2;
+    lb = BN_BITS2 - rb;
+    lb %= BN_BITS2;            /* say no to undefined behaviour */
+    mask = (BN_ULONG)0 - lb;   /* mask = 0 - (lb != 0) */
+    mask |= mask >> 8;
+    top = a->top - nw;
+    if (r != a && bn_wexpand(r, top) == NULL)
+        return 0;
+
+    t = &(r->d[0]);
+    f = &(a->d[nw]);
+    l = f[0];
+    for (i = 0; i < top - 1; i++) {
+        m = f[i + 1];
+        t[i] = (l >> rb) | ((m << lb) & mask);
+        l = m;
+    }
+    t[i] = l >> rb;
+
+    r->neg = a->neg;
+    r->top = top;
+    r->flags |= BN_FLG_FIXED_TOP;
+
+    return 1;
+}
+
+BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
+                          BN_ULONG w)
+{
+    BN_ULONG c = 0;
+    BN_ULONG bl, bh;
+
+    assert(num >= 0);
+    if (num <= 0)
+        return (BN_ULONG)0;
+
+    bl = LBITS(w);
+    bh = HBITS(w);
+
+# ifndef OPENSSL_SMALL_FOOTPRINT
+    while (num & ~3) {
+        mul_add(rp[0], ap[0], bl, bh, c);
+        mul_add(rp[1], ap[1], bl, bh, c);
+        mul_add(rp[2], ap[2], bl, bh, c);
+        mul_add(rp[3], ap[3], bl, bh, c);
+        ap += 4;
+        rp += 4;
+        num -= 4;
+    }
+# endif
+    while (num) {
+        mul_add(rp[0], ap[0], bl, bh, c);
+        ap++;
+        rp++;
+        num--;
+    }
+    return c;
+}
+
+
+
+/*
+ * It's argued that *length* of *significant* part of divisor is public.
+ * Even if it's private modulus that is. Again, *length* is assumed
+ * public, but not *value*. Former is likely to be pre-defined by
+ * algorithm with bit granularity, though below subroutine is invariant
+ * of limb length. Thanks to this assumption we can require that |divisor|
+ * may not be zero-padded, yet claim this subroutine "constant-time"(*).
+ * This is because zero-padded dividend, |num|, is tolerated, so that
+ * caller can pass dividend of public length(*), but with smaller amount
+ * of significant limbs. This naturally means that quotient, |dv|, would
+ * contain correspongly less significant limbs as well, and will be zero-
+ * padded accordingly. Returned remainder, |rm|, will have same bit length
+ * as divisor, also zero-padded if needed. These actually leave sign bits
+ * in ambiguous state. In sense that we try to avoid negative zeros, while
+ * zero-padded zeros would retain sign.
+ *
+ * (*) "Constant-time-ness" has two pre-conditions:
+ *
+ *     - availability of constant-time bn_div_3_words;
+ *     - dividend is at least as "wide" as divisor, limb-wise, zero-padded
+ *       if so required, which shouldn't be a privacy problem, because
+ *       divisor's length is considered public;
+ */
+int bn_div_fixed_top(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num,
+                     const BIGNUM *divisor, BN_CTX *ctx)
+{
+    int norm_shift, i, j, loop;
+    BIGNUM *tmp, *snum, *sdiv, *res;
+    BN_ULONG *resp, *wnum, *wnumtop;
+    BN_ULONG d0, d1;
+    int num_n, div_n, num_neg;
+
+    assert(divisor->top > 0 && divisor->d[divisor->top - 1] != 0);
+
+    bn_check_top(num);
+    bn_check_top(divisor);
+    bn_check_top(dv);
+    bn_check_top(rm);
+
+    BN_CTX_start(ctx);
+    res = (dv == NULL) ? BN_CTX_get(ctx) : dv;
+    tmp = BN_CTX_get(ctx);
+    snum = BN_CTX_get(ctx);
+    sdiv = BN_CTX_get(ctx);
+    if (sdiv == NULL)
+        goto err;
+
+    /* First we normalise the numbers */
+    if (!BN_copy(sdiv, divisor))
+        goto err;
+    norm_shift = bn_left_align(sdiv);
+    sdiv->neg = 0;
+    /*
+     * Note that bn_lshift_fixed_top's output is always one limb longer
+     * than input, even when norm_shift is zero. This means that amount of
+     * inner loop iterations is invariant of dividend value, and that one
+     * doesn't need to compare dividend and divisor if they were originally
+     * of the same bit length.
+     */
+    if (!(bn_lshift_fixed_top(snum, num, norm_shift)))
+        goto err;
+
+    div_n = sdiv->top;
+    num_n = snum->top;
+
+    if (num_n <= div_n) {
+        /* caller didn't pad dividend -> no constant-time guarantee... */
+        if (bn_wexpand(snum, div_n + 1) == NULL)
+            goto err;
+        memset(&(snum->d[num_n]), 0, (div_n - num_n + 1) * sizeof(BN_ULONG));
+        snum->top = num_n = div_n + 1;
+    }
+
+    loop = num_n - div_n;
+    /*
+     * Lets setup a 'window' into snum This is the part that corresponds to
+     * the current 'area' being divided
+     */
+    wnum = &(snum->d[loop]);
+    wnumtop = &(snum->d[num_n - 1]);
+
+    /* Get the top 2 words of sdiv */
+    d0 = sdiv->d[div_n - 1];
+    d1 = (div_n == 1) ? 0 : sdiv->d[div_n - 2];
+
+    /* Setup quotient */
+    if (!bn_wexpand(res, loop))
+        goto err;
+    num_neg = num->neg;
+    res->neg = (num_neg ^ divisor->neg);
+    res->top = loop;
+    res->flags |= BN_FLG_FIXED_TOP;
+    resp = &(res->d[loop]);
+
+    /* space for temp */
+    if (!bn_wexpand(tmp, (div_n + 1)))
+        goto err;
+
+    for (i = 0; i < loop; i++, wnumtop--) {
+        BN_ULONG q, l0;
+        /*
+         * the first part of the loop uses the top two words of snum and sdiv
+         * to calculate a BN_ULONG q such that | wnum - sdiv * q | < sdiv
+         */
+# if defined(BN_DIV3W)
+        q = bn_div_3_words(wnumtop, d1, d0);
+# else
+        BN_ULONG n0, n1, rem = 0;
+
+        n0 = wnumtop[0];
+        n1 = wnumtop[-1];
+        if (n0 == d0)
+            q = BN_MASK2;
+        else {                  /* n0 < d0 */
+            BN_ULONG n2 = (wnumtop == wnum) ? 0 : wnumtop[-2];
+#  ifdef BN_LLONG
+            BN_ULLONG t2;
+
+#   if defined(BN_LLONG) && defined(BN_DIV2W) && !defined(bn_div_words)
+            q = (BN_ULONG)(((((BN_ULLONG) n0) << BN_BITS2) | n1) / d0);
+#   else
+            q = bn_div_words(n0, n1, d0);
+#   endif
+
+#   ifndef REMAINDER_IS_ALREADY_CALCULATED
+            /*
+             * rem doesn't have to be BN_ULLONG. The least we
+             * know it's less that d0, isn't it?
+             */
+            rem = (n1 - q * d0) & BN_MASK2;
+#   endif
+            t2 = (BN_ULLONG) d1 *q;
+
+            for (;;) {
+                if (t2 <= ((((BN_ULLONG) rem) << BN_BITS2) | n2))
+                    break;
+                q--;
+                rem += d0;
+                if (rem < d0)
+                    break;      /* don't let rem overflow */
+                t2 -= d1;
+            }
+#  else                         /* !BN_LLONG */
+            BN_ULONG t2l, t2h;
+
+            q = bn_div_words(n0, n1, d0);
+#   ifndef REMAINDER_IS_ALREADY_CALCULATED
+            rem = (n1 - q * d0) & BN_MASK2;
+#   endif
+
+#   if defined(BN_UMULT_LOHI)
+            BN_UMULT_LOHI(t2l, t2h, d1, q);
+#   elif defined(BN_UMULT_HIGH)
+            t2l = d1 * q;
+            t2h = BN_UMULT_HIGH(d1, q);
+#   else
+            {
+                BN_ULONG ql, qh;
+                t2l = LBITS(d1);
+                t2h = HBITS(d1);
+                ql = LBITS(q);
+                qh = HBITS(q);
+                mul64(t2l, t2h, ql, qh); /* t2=(BN_ULLONG)d1*q; */
+            }
+#   endif
+
+            for (;;) {
+                if ((t2h < rem) || ((t2h == rem) && (t2l <= n2)))
+                    break;
+                q--;
+                rem += d0;
+                if (rem < d0)
+                    break;      /* don't let rem overflow */
+                if (t2l < d1)
+                    t2h--;
+                t2l -= d1;
+            }
+#  endif                        /* !BN_LLONG */
+        }
+# endif                         /* !BN_DIV3W */
+
+        l0 = bn_mul_words(tmp->d, sdiv->d, div_n, q);
+        tmp->d[div_n] = l0;
+        wnum--;
+        /*
+         * ignore top values of the bignums just sub the two BN_ULONG arrays
+         * with bn_sub_words
+         */
+        l0 = bn_sub_words(wnum, wnum, tmp->d, div_n + 1);
+        q -= l0;
+        /*
+         * Note: As we have considered only the leading two BN_ULONGs in
+         * the calculation of q, sdiv * q might be greater than wnum (but
+         * then (q-1) * sdiv is less or equal than wnum)
+         */
+        for (l0 = 0 - l0, j = 0; j < div_n; j++)
+            tmp->d[j] = sdiv->d[j] & l0;
+        l0 = bn_add_words(wnum, wnum, tmp->d, div_n);
+        (*wnumtop) += l0;
+        assert((*wnumtop) == 0);
+
+        /* store part of the result */
+        *--resp = q;
+    }
+    /* snum holds remainder, it's as wide as divisor */
+    snum->neg = num_neg;
+    snum->top = div_n;
+    snum->flags |= BN_FLG_FIXED_TOP;
+
+    if (rm != NULL && bn_rshift_fixed_top(rm, snum, norm_shift) == 0)
+        goto err;
+
+    BN_CTX_end(ctx);
+    return 1;
+ err:
+    bn_check_top(rm);
+    BN_CTX_end(ctx);
+    return 0;
+}
+
+void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
+{
+    BN_ULONG *rr;
+
+    if (na < nb) {
+        int itmp;
+        BN_ULONG *ltmp;
+
+        itmp = na;
+        na = nb;
+        nb = itmp;
+        ltmp = a;
+        a = b;
+        b = ltmp;
+
+    }
+    rr = &(r[na]);
+    if (nb <= 0) {
+        (void)bn_mul_words(r, a, na, 0);
+        return;
+    } else
+        rr[0] = bn_mul_words(r, a, na, b[0]);
+
+    for (;;) {
+        if (--nb <= 0)
+            return;
+        rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]);
+        if (--nb <= 0)
+            return;
+        rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]);
+        if (--nb <= 0)
+            return;
+        rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]);
+        if (--nb <= 0)
+            return;
+        rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]);
+        rr += 4;
+        r += 4;
+        b += 4;
+    }
+}
+
+int bn_mul_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
+{
+    int ret = 0;
+    int top, al, bl;
+    BIGNUM *rr;
+#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
+    int i;
+#endif
+#ifdef BN_RECURSION
+    BIGNUM *t = NULL;
+    int j = 0, k;
+#endif
+
+    bn_check_top(a);
+    bn_check_top(b);
+    bn_check_top(r);
+
+    al = a->top;
+    bl = b->top;
+
+    if ((al == 0) || (bl == 0)) {
+        BN_zero(r);
+        return 1;
+    }
+    top = al + bl;
+
+    BN_CTX_start(ctx);
+    if ((r == a) || (r == b)) {
+        if ((rr = BN_CTX_get(ctx)) == NULL)
+            goto err;
+    } else
+        rr = r;
+
+#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
+    i = al - bl;
+#endif
+#ifdef BN_MUL_COMBA
+    if (i == 0) {
+# if 0
+        if (al == 4) {
+            if (bn_wexpand(rr, 8) == NULL)
+                goto err;
+            rr->top = 8;
+            bn_mul_comba4(rr->d, a->d, b->d);
+            goto end;
+        }
+# endif
+        if (al == 8) {
+            if (bn_wexpand(rr, 16) == NULL)
+                goto err;
+            rr->top = 16;
+            bn_mul_comba8(rr->d, a->d, b->d);
+            goto end;
+        }
+    }
+#endif                          /* BN_MUL_COMBA */
+#ifdef BN_RECURSION
+    if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) {
+        if (i >= -1 && i <= 1) {
+            /*
+             * Find out the power of two lower or equal to the longest of the
+             * two numbers
+             */
+            if (i >= 0) {
+                j = BN_num_bits_word((BN_ULONG)al);
+            }
+            if (i == -1) {
+                j = BN_num_bits_word((BN_ULONG)bl);
+            }
+            j = 1 << (j - 1);
+            assert(j <= al || j <= bl);
+            k = j + j;
+            t = BN_CTX_get(ctx);
+            if (t == NULL)
+                goto err;
+            if (al > j || bl > j) {
+                if (bn_wexpand(t, k * 4) == NULL)
+                    goto err;
+                if (bn_wexpand(rr, k * 4) == NULL)
+                    goto err;
+                bn_mul_part_recursive(rr->d, a->d, b->d,
+                                      j, al - j, bl - j, t->d);
+            } else {            /* al <= j || bl <= j */
+
+                if (bn_wexpand(t, k * 2) == NULL)
+                    goto err;
+                if (bn_wexpand(rr, k * 2) == NULL)
+                    goto err;
+                bn_mul_recursive(rr->d, a->d, b->d, j, al - j, bl - j, t->d);
+            }
+            rr->top = top;
+            goto end;
+        }
+    }
+#endif                          /* BN_RECURSION */
+    if (bn_wexpand(rr, top) == NULL)
+        goto err;
+    rr->top = top;
+    bn_mul_normal(rr->d, a->d, al, b->d, bl);
+
+#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
+ end:
+#endif
+    rr->neg = a->neg ^ b->neg;
+    rr->flags |= BN_FLG_FIXED_TOP;
+    if (r != rr && BN_copy(r, rr) == NULL)
+        goto err;
+
+    ret = 1;
+ err:
+    bn_check_top(r);
+    BN_CTX_end(ctx);
+    return ret;
+}
+
+void bn_correct_top(BIGNUM *a)
+{
+    BN_ULONG *ftl;
+    int tmp_top = a->top;
+
+    if (tmp_top > 0) {
+        for (ftl = &(a->d[tmp_top]); tmp_top > 0; tmp_top--) {
+            ftl--;
+            if (*ftl != 0)
+                break;
+        }
+        a->top = tmp_top;
+    }
+    if (a->top == 0)
+        a->neg = 0;
+    a->flags &= ~BN_FLG_FIXED_TOP;
+    bn_pollute(a);
+}
+
+int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
+           BN_CTX *ctx)
+{
+    int ret;
+
+    if (BN_is_zero(divisor)) {
+       // ERR_raise(ERR_LIB_BN, BN_R_DIV_BY_ZERO);
+        // return 0;
+        assert(0);
+    }
+
+    /*
+     * Invalid zero-padding would have particularly bad consequences so don't
+     * just rely on bn_check_top() here (bn_check_top() works only for
+     * BN_DEBUG builds)
+     */
+    if (divisor->d[divisor->top - 1] == 0) {
+       // ERR_raise(ERR_LIB_BN, BN_R_NOT_INITIALIZED);
+        //return 0;
+        assert(0);
+    }
+
+    ret = bn_div_fixed_top(dv, rm, num, divisor, ctx);
+
+    if (ret) {
+        if (dv != NULL)
+            bn_correct_top(dv);
+        if (rm != NULL)
+            bn_correct_top(rm);
+    }
+
+    return ret;
+}
+
+
+
+
+
+void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
+{
+    assert(n >= 0);
+    if (n <= 0)
+        return;
+
+# ifndef OPENSSL_SMALL_FOOTPRINT
+    while (n & ~3) {
+        sqr64(r[0], r[1], a[0]);
+        sqr64(r[2], r[3], a[1]);
+        sqr64(r[4], r[5], a[2]);
+        sqr64(r[6], r[7], a[3]);
+        a += 4;
+        r += 8;
+        n -= 4;
+    }
+# endif
+    while (n) {
+        sqr64(r[0], r[1], a[0]);
+        a++;
+        r += 2;
+        n--;
+    }
+}
+
+
+/* tmp must have 2*n words */
+void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp)
+{
+    int i, j, max;
+    const BN_ULONG *ap;
+    BN_ULONG *rp;
+
+    max = n * 2;
+    ap = a;
+    rp = r;
+    rp[0] = rp[max - 1] = 0;
+    rp++;
+    j = n;
+
+    if (--j > 0) {
+        ap++;
+        rp[j] = bn_mul_words(rp, ap, j, ap[-1]);
+        rp += 2;
+    }
+
+    for (i = n - 2; i > 0; i--) {
+        j--;
+        ap++;
+        rp[j] = bn_mul_add_words(rp, ap, j, ap[-1]);
+        rp += 2;
+    }
+
+    bn_add_words(r, r, r, max);
+
+    /* There will not be a carry */
+
+    bn_sqr_words(tmp, a, n);
+
+    bn_add_words(r, r, tmp, max);
+}
+
+int bn_sqr_fixed_top(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx)
+{
+    int max, al;
+    int ret = 0;
+    BIGNUM *tmp, *rr;
+
+    bn_check_top(a);
+
+    al = a->top;
+    if (al <= 0) {
+        r->top = 0;
+        r->neg = 0;
+        return 1;
+    }
+
+    BN_CTX_start(ctx);
+    rr = (a != r) ? r : BN_CTX_get(ctx);
+    tmp = BN_CTX_get(ctx);
+    if (rr == NULL || tmp == NULL)
+        goto err;
+
+    max = 2 * al;               /* Non-zero (from above) */
+    if (bn_wexpand(rr, max) == NULL)
+        goto err;
+
+    if (al == 4) {
+#ifndef BN_SQR_COMBA
+        BN_ULONG t[8];
+        bn_sqr_normal(rr->d, a->d, 4, t);
+#else
+        bn_sqr_comba4(rr->d, a->d);
+#endif
+    } else if (al == 8) {
+#ifndef BN_SQR_COMBA
+        BN_ULONG t[16];
+        bn_sqr_normal(rr->d, a->d, 8, t);
+#else
+        bn_sqr_comba8(rr->d, a->d);
+#endif
+    } else {
+#if defined(BN_RECURSION)
+        if (al < BN_SQR_RECURSIVE_SIZE_NORMAL) {
+            BN_ULONG t[BN_SQR_RECURSIVE_SIZE_NORMAL * 2];
+            bn_sqr_normal(rr->d, a->d, al, t);
+        } else {
+            int j, k;
+
+            j = BN_num_bits_word((BN_ULONG)al);
+            j = 1 << (j - 1);
+            k = j + j;
+            if (al == j) {
+                if (bn_wexpand(tmp, k * 2) == NULL)
+                    goto err;
+                bn_sqr_recursive(rr->d, a->d, al, tmp->d);
+            } else {
+                if (bn_wexpand(tmp, max) == NULL)
+                    goto err;
+                bn_sqr_normal(rr->d, a->d, al, tmp->d);
+            }
+        }
+#else
+        if (bn_wexpand(tmp, max) == NULL)
+            goto err;
+        bn_sqr_normal(rr->d, a->d, al, tmp->d);
+#endif
+    }
+
+    rr->neg = 0;
+    rr->top = max;
+    rr->flags |= BN_FLG_FIXED_TOP;
+    if (r != rr && BN_copy(r, rr) == NULL)
+        goto err;
+
+    ret = 1;
+ err:
+    bn_check_top(rr);
+    bn_check_top(tmp);
+    BN_CTX_end(ctx);
+    return ret;
+}
+
+int BN_rshift(BIGNUM *r, const BIGNUM *a, int n)
+{
+    int ret = 0;
+
+    if (n < 0) {
+       // ERR_raise(ERR_LIB_BN, BN_R_INVALID_SHIFT);
+        assert(0);
+        //return 0;
+    }
+
+    ret = bn_rshift_fixed_top(r, a, n);
+
+    bn_correct_top(r);
+    bn_check_top(r);
+
+    return ret;
+}
+
+int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
+{
+    int ret = bn_mul_fixed_top(r, a, b, ctx);
+
+    bn_correct_top(r);
+    bn_check_top(r);
+
+    return ret;
+}
+
+int BN_mask_bits(BIGNUM *a, int n)
+{
+    int b, w;
+
+    bn_check_top(a);
+    if (n < 0)
+        return 0;
+
+    w = n / BN_BITS2;
+    b = n % BN_BITS2;
+    if (w >= a->top)
+        return 0;
+    if (b == 0)
+        a->top = w;
+    else {
+        a->top = w + 1;
+        a->d[w] &= ~(BN_MASK2 << b);
+    }
+    bn_correct_top(a);
+    return 1;
+}
+
+static int bn_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
+{
+    BIGNUM *n;
+    BN_ULONG *ap, *np, *rp, n0, v, carry;
+    int nl, max, i;
+    unsigned int rtop;
+
+    n = &(mont->N);
+    nl = n->top;
+    if (nl == 0) {
+        ret->top = 0;
+        return 1;
+    }
+
+    max = (2 * nl);             /* carry is stored separately */
+    if (bn_wexpand(r, max) == NULL)
+        return 0;
+
+    r->neg ^= n->neg;
+    np = n->d;
+    rp = r->d;
+
+    /* clear the top words of T */
+    for (rtop = r->top, i = 0; i < max; i++) {
+        v = (BN_ULONG)0 - ((i - rtop) >> (8 * sizeof(rtop) - 1));
+        rp[i] &= v;
+    }
+
+    r->top = max;
+    r->flags |= BN_FLG_FIXED_TOP;
+    n0 = mont->n0[0];
+
+    /*
+     * Add multiples of |n| to |r| until R = 2^(nl * BN_BITS2) divides it. On
+     * input, we had |r| < |n| * R, so now |r| < 2 * |n| * R. Note that |r|
+     * includes |carry| which is stored separately.
+     */
+    for (carry = 0, i = 0; i < nl; i++, rp++) {
+        v = bn_mul_add_words(rp, np, nl, (rp[0] * n0) & BN_MASK2);
+        v = (v + carry + rp[nl]) & BN_MASK2;
+        carry |= (v != rp[nl]);
+        carry &= (v <= rp[nl]);
+        rp[nl] = v;
+    }
+
+    if (bn_wexpand(ret, nl) == NULL)
+        return 0;
+    ret->top = nl;
+    ret->flags |= BN_FLG_FIXED_TOP;
+    ret->neg = r->neg;
+
+    rp = ret->d;
+
+    /*
+     * Shift |nl| words to divide by R. We have |ap| < 2 * |n|. Note that |ap|
+     * includes |carry| which is stored separately.
+     */
+    ap = &(r->d[nl]);
+
+    carry -= bn_sub_words(rp, ap, np, nl);
+    /*
+     * |carry| is -1 if |ap| - |np| underflowed or zero if it did not. Note
+     * |carry| cannot be 1. That would imply the subtraction did not fit in
+     * |nl| words, and we know at most one subtraction is needed.
+     */
+    for (i = 0; i < nl; i++) {
+        rp[i] = (carry & ap[i]) | (~carry & rp[i]);
+        ap[i] = 0;
+    }
+
+    return 1;
+}
+
+int bn_from_mont_fixed_top(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
+                           BN_CTX *ctx)
+{
+    int retn = 0;
+#ifdef MONT_WORD
+    BIGNUM *t;
+
+    BN_CTX_start(ctx);
+    if ((t = BN_CTX_get(ctx)) && BN_copy(t, a)) {
+        retn = bn_from_montgomery_word(ret, t, mont);
+    }
+    BN_CTX_end(ctx);
+#else                           /* !MONT_WORD */
+    BIGNUM *t1, *t2;
+
+    BN_CTX_start(ctx);
+    t1 = BN_CTX_get(ctx);
+    t2 = BN_CTX_get(ctx);
+    if (t2 == NULL)
+        goto err;
+
+    if (!BN_copy(t1, a))
+        goto err;
+    BN_mask_bits(t1, mont->ri);
+
+    if (!BN_mul(t2, t1, &mont->Ni, ctx))
+        goto err;
+    BN_mask_bits(t2, mont->ri);
+
+    if (!BN_mul(t1, t2, &mont->N, ctx))
+        goto err;
+    if (!BN_add(t2, a, t1))
+        goto err;
+    if (!BN_rshift(ret, t2, mont->ri))
+        goto err;
+
+    if (BN_ucmp(ret, &(mont->N)) >= 0) {
+        if (!BN_usub(ret, ret, &(mont->N)))
+            goto err;
+    }
+    retn = 1;
+    bn_check_top(ret);
+ err:
+    BN_CTX_end(ctx);
+#endif                          /* MONT_WORD */
+    return retn;
+}
+
+int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
+                       BN_CTX *ctx)
+{
+    int retn;
+
+    retn = bn_from_mont_fixed_top(ret, a, mont, ctx);
+    bn_correct_top(ret);
+    bn_check_top(ret);
+
+    return retn;
+}
+
+int bn_mul_mont_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
+                          BN_MONT_CTX *mont, BN_CTX *ctx)
+{
+    BIGNUM *tmp;
+    int ret = 0;
+    int num = mont->N.top;
+
+#if defined(OPENSSL_BN_ASM_MONT) && defined(MONT_WORD)
+    if (num > 1 && num <= BN_SOFT_LIMIT && a->top == num && b->top == num) {
+        if (bn_wexpand(r, num) == NULL)
+            return 0;
+        if (bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num)) {
+            r->neg = a->neg ^ b->neg;
+            r->top = num;
+            r->flags |= BN_FLG_FIXED_TOP;
+            return 1;
+        }
+    }
+#endif
+
+    if ((a->top + b->top) > 2 * num)
+        return 0;
+
+    BN_CTX_start(ctx);
+    tmp = BN_CTX_get(ctx);
+    if (tmp == NULL)
+        goto err;
+
+    bn_check_top(tmp);
+    if (a == b) {
+        if (!bn_sqr_fixed_top(tmp, a, ctx))
+            goto err;
+    } else {
+        if (!bn_mul_fixed_top(tmp, a, b, ctx))
+            goto err;
+    }
+    /* reduce from aRR to aR */
+#ifdef MONT_WORD
+    if (!bn_from_montgomery_word(r, tmp, mont))
+        goto err;
+#else
+    if (!BN_from_montgomery(r, tmp, mont, ctx))
+        goto err;
+#endif
+    ret = 1;
+ err:
+    BN_CTX_end(ctx);
+    return ret;
+}
+
+
+int bn_to_mont_fixed_top(BIGNUM *r, const BIGNUM *a, BN_MONT_CTX *mont,
+                         BN_CTX *ctx)
+{
+    return bn_mul_mont_fixed_top(r, a, &(mont->RR), mont, ctx);
+}
+
+static int BN_STACK_push(BN_STACK *st, unsigned int idx)
+{
+    if (st->depth == st->size) {
+        /* Need to expand */
+        unsigned int newsize =
+            st->size ? (st->size * 3 / 2) : BN_CTX_START_FRAMES;
+        unsigned int *newitems;
+
+        if ((newitems = OPENSSL_malloc(sizeof(*newitems) * newsize)) == NULL)
+            return 0;
+        if (st->depth)
+            memcpy(newitems, st->indexes, sizeof(*newitems) * st->depth);
+        OPENSSL_free(st->indexes);
+        st->indexes = newitems;
+        st->size = newsize;
+    }
+    st->indexes[(st->depth)++] = idx;
+    return 1;
+}
+
+static unsigned int BN_STACK_pop(BN_STACK *st)
+{
+    return st->indexes[--(st->depth)];
+}
+
+static void BN_STACK_init(BN_STACK *st)
+{
+    st->indexes = NULL;
+    st->depth = st->size = 0;
+}
+
+static void BN_STACK_finish(BN_STACK *st)
+{
+    OPENSSL_free(st->indexes);
+    st->indexes = NULL;
+}
+
+static void BN_POOL_init(BN_POOL *p)
+{
+    p->head = p->current = p->tail = NULL;
+    p->used = p->size = 0;
+}
+
+typedef void *(*memset_t)(void *, int, size_t);
+
+static volatile memset_t memset_func = memset;
+
+void OPENSSL_cleanse(void *ptr, size_t len)
+{
+    memset_func(ptr, 0, len);
+}
+
+
+void BN_clear_free(BIGNUM *a)
+{
+    if (a == NULL)
+        return;
+    if (a->d != NULL && !BN_get_flags(a, BN_FLG_STATIC_DATA))
+        bn_free_d(a, 1);
+    if (BN_get_flags(a, BN_FLG_MALLOCED)) {
+        OPENSSL_cleanse(a, sizeof(*a));
+        OPENSSL_free(a);
+    }
+}
+
+static void BN_POOL_finish(BN_POOL *p)
+{
+    unsigned int loop;
+    BIGNUM *bn;
+
+    while (p->head) {
+        for (loop = 0, bn = p->head->vals; loop++ < BN_CTX_POOL_SIZE; bn++)
+            if (bn->d)
+                BN_clear_free(bn);
+        p->current = p->head->next;
+        OPENSSL_free(p->head);
+        p->head = p->current;
+    }
+}
+
+void BN_set_flags(BIGNUM *b, int n)
+{
+    b->flags |= n;
+}
+
+static BIGNUM *BN_POOL_get(BN_POOL *p, int flag)
+{
+    BIGNUM *bn;
+    unsigned int loop;
+
+    /* Full; allocate a new pool item and link it in. */
+    if (p->used == p->size) {
+        BN_POOL_ITEM *item;
+
+        if ((item = OPENSSL_malloc(sizeof(*item))) == NULL)
+            return NULL;
+        for (loop = 0, bn = item->vals; loop++ < BN_CTX_POOL_SIZE; bn++) {
+            bn_init(bn);
+            if ((flag & BN_FLG_SECURE) != 0)
+                BN_set_flags(bn, BN_FLG_SECURE);
+        }
+        item->prev = p->tail;
+        item->next = NULL;
+
+        if (p->head == NULL)
+            p->head = p->current = p->tail = item;
+        else {
+            p->tail->next = item;
+            p->tail = item;
+            p->current = item;
+        }
+        p->size += BN_CTX_POOL_SIZE;
+        p->used++;
+        /* Return the first bignum from the new pool */
+        return item->vals;
+    }
+
+    if (!p->used)
+        p->current = p->head;
+    else if ((p->used % BN_CTX_POOL_SIZE) == 0)
+        p->current = p->current->next;
+    return p->current->vals + ((p->used++) % BN_CTX_POOL_SIZE);
+}
+
+static void BN_POOL_release(BN_POOL *p, unsigned int num)
+{
+    unsigned int offset = (p->used - 1) % BN_CTX_POOL_SIZE;
+
+    p->used -= num;
+    while (num--) {
+        bn_check_top(p->current->vals + offset);
+        if (offset == 0) {
+            offset = BN_CTX_POOL_SIZE - 1;
+            p->current = p->current->prev;
+        } else
+            offset--;
+    }
+}
+
+const BIGNUM *BN_value_one(void)
+{
+    static const BN_ULONG data_one = 1L;
+    static const BIGNUM const_one =
+        { (BN_ULONG *)&data_one, 1, 1, 0, BN_FLG_STATIC_DATA };
+
+    return &const_one;
+}
+
+int BN_is_bit_set(const BIGNUM *a, int n)
+{
+    int i, j;
+
+    bn_check_top(a);
+    if (n < 0)
+        return 0;
+    i = n / BN_BITS2;
+    j = n % BN_BITS2;
+    if (a->top <= i)
+        return 0;
+    return (int)(((a->d[i]) >> j) & ((BN_ULONG)1));
+}
+
+int BN_set_bit(BIGNUM *a, int n)
+{
+    int i, j, k;
+
+    if (n < 0)
+        return 0;
+
+    i = n / BN_BITS2;
+    j = n % BN_BITS2;
+    if (a->top <= i) {
+        if (bn_wexpand(a, i + 1) == NULL)
+            return 0;
+        for (k = a->top; k < i + 1; k++)
+            a->d[k] = 0;
+        a->top = i + 1;
+        a->flags &= ~BN_FLG_FIXED_TOP;
+    }
+
+    a->d[i] |= (((BN_ULONG)1) << j);
+    bn_check_top(a);
+    return 1;
+}
+
+BN_CTX *BN_CTX_new_ex(OSSL_LIB_CTX *ctx)
+{
+    BN_CTX *ret;
+
+    if ((ret = OPENSSL_zalloc(sizeof(*ret))) == NULL)
+        return NULL;
+    /* Initialise the structure */
+    BN_POOL_init(&ret->pool);
+    BN_STACK_init(&ret->stack);
+    ret->libctx = ctx;
+    return ret;
+}
+
+BIGNUM *BN_new(void)
+{
+    BIGNUM *ret;
+
+    if ((ret = OPENSSL_zalloc(sizeof(*ret))) == NULL)
+        return NULL;
+    ret->flags = BN_FLG_MALLOCED;
+    bn_check_top(ret);
+    return ret;
+}
+
+int BN_is_odd(const BIGNUM *a)
+{
+    return (a->top > 0) && (a->d[0] & 1);
+}
+
+int BN_lshift1(BIGNUM *r, const BIGNUM *a)
+{
+    register BN_ULONG *ap, *rp, t, c;
+    int i;
+
+    bn_check_top(r);
+    bn_check_top(a);
+
+    if (r != a) {
+        r->neg = a->neg;
+        if (bn_wexpand(r, a->top + 1) == NULL)
+            return 0;
+        r->top = a->top;
+    } else {
+        if (bn_wexpand(r, a->top + 1) == NULL)
+            return 0;
+    }
+    ap = a->d;
+    rp = r->d;
+    c = 0;
+    for (i = 0; i < a->top; i++) {
+        t = *(ap++);
+        *(rp++) = ((t << 1) | c) & BN_MASK2;
+        c = t >> (BN_BITS2 - 1);
+    }
+    *rp = c;
+    r->top += c;
+    bn_check_top(r);
+    return 1;
+}
+
+int BN_rshift1(BIGNUM *r, const BIGNUM *a)
+{
+    BN_ULONG *ap, *rp, t, c;
+    int i;
+
+    bn_check_top(r);
+    bn_check_top(a);
+
+    if (BN_is_zero(a)) {
+        BN_zero(r);
+        return 1;
+    }
+    i = a->top;
+    ap = a->d;
+    if (a != r) {
+        if (bn_wexpand(r, i) == NULL)
+            return 0;
+        r->neg = a->neg;
+    }
+    rp = r->d;
+    r->top = i;
+    t = ap[--i];
+    rp[i] = t >> 1;
+    c = t << (BN_BITS2 - 1);
+    r->top -= (t == 1);
+    while (i > 0) {
+        t = ap[--i];
+        rp[i] = ((t >> 1) & BN_MASK2) | c;
+        c = t << (BN_BITS2 - 1);
+    }
+    if (!r->top)
+        r->neg = 0; /* don't allow negative zero */
+    bn_check_top(r);
+    return 1;
+}
+
+int BN_is_word(const BIGNUM *a, const BN_ULONG w)
+{
+    return BN_abs_is_word(a, w) && (!w || !a->neg);
+}
+
+int BN_is_one(const BIGNUM *a)
+{
+    return BN_abs_is_word(a, 1) && !a->neg;
+}
+
+int BN_lshift(BIGNUM *r, const BIGNUM *a, int n)
+{
+    int ret;
+
+    if (n < 0) {
+      //  ERR_raise(ERR_LIB_BN, BN_R_INVALID_SHIFT);
+        //return 0;
+        assert(0);
+    }
+
+    ret = bn_lshift_fixed_top(r, a, n);
+
+    bn_correct_top(r);
+    bn_check_top(r);
+
+    return ret;
+}
+
+int BN_mul_word(BIGNUM *a, BN_ULONG w)
+{
+    BN_ULONG ll;
+
+    bn_check_top(a);
+    w &= BN_MASK2;
+    if (a->top) {
+        if (w == 0)
+            BN_zero(a);
+        else {
+            ll = bn_mul_words(a->d, a->d, a->top, w);
+            if (ll) {
+                if (bn_wexpand(a, a->top + 1) == NULL)
+                    return 0;
+                a->d[a->top++] = ll;
+            }
+        }
+    }
+    bn_check_top(a);
+    return 1;
+}
+
+/*
+ * This is an internal function, we assume all callers pass valid arguments:
+ * all pointers passed here are assumed non-NULL.
+ */
+BIGNUM *int_bn_mod_inverse(BIGNUM *in,
+                           const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx,
+                           int *pnoinv)
+{
+    BIGNUM *A, *B, *X, *Y, *M, *D, *T, *R = NULL;
+    BIGNUM *ret = NULL;
+    int sign;
+
+    /* This is invalid input so we don't worry about constant time here */
+    if (BN_abs_is_word(n, 1) || BN_is_zero(n)) {
+        *pnoinv = 1;
+        return NULL;
+    }
+
+    *pnoinv = 0;
+/*
+    if ((BN_get_flags(a, BN_FLG_CONSTTIME) != 0)
+        || (BN_get_flags(n, BN_FLG_CONSTTIME) != 0)) {
+        return bn_mod_inverse_no_branch(in, a, n, ctx, pnoinv);
+    }
+*/
+    bn_check_top(a);
+    bn_check_top(n);
+
+    BN_CTX_start(ctx);
+    A = BN_CTX_get(ctx);
+    B = BN_CTX_get(ctx);
+    X = BN_CTX_get(ctx);
+    D = BN_CTX_get(ctx);
+    M = BN_CTX_get(ctx);
+    Y = BN_CTX_get(ctx);
+    T = BN_CTX_get(ctx);
+    if (T == NULL)
+        goto err;
+
+    if (in == NULL)
+        R = BN_new();
+    else
+        R = in;
+    if (R == NULL)
+        goto err;
+
+    if (!BN_one(X))
+        goto err;
+    BN_zero(Y);
+    if (BN_copy(B, a) == NULL)
+        goto err;
+    if (BN_copy(A, n) == NULL)
+        goto err;
+    A->neg = 0;
+    if (B->neg || (BN_ucmp(B, A) >= 0)) {
+        if (!BN_nnmod(B, B, A, ctx))
+            goto err;
+    }
+    sign = -1;
+    /*-
+     * From  B = a mod |n|,  A = |n|  it follows that
+     *
+     *      0 <= B < A,
+     *     -sign*X*a  ==  B   (mod |n|),
+     *      sign*Y*a  ==  A   (mod |n|).
+     */
+
+    if (BN_is_odd(n) && (BN_num_bits(n) <= 2048)) {
+        /*
+         * Binary inversion algorithm; requires odd modulus. This is faster
+         * than the general algorithm if the modulus is sufficiently small
+         * (about 400 .. 500 bits on 32-bit systems, but much more on 64-bit
+         * systems)
+         */
+        int shift;
+
+        while (!BN_is_zero(B)) {
+            /*-
+             *      0 < B < |n|,
+             *      0 < A <= |n|,
+             * (1) -sign*X*a  ==  B   (mod |n|),
+             * (2)  sign*Y*a  ==  A   (mod |n|)
+             */
+
+            /*
+             * Now divide B by the maximum possible power of two in the
+             * integers, and divide X by the same value mod |n|. When we're
+             * done, (1) still holds.
+             */
+            shift = 0;
+            while (!BN_is_bit_set(B, shift)) { /* note that 0 < B */
+                shift++;
+
+                if (BN_is_odd(X)) {
+                    if (!BN_uadd(X, X, n))
+                        goto err;
+                }
+                /*
+                 * now X is even, so we can easily divide it by two
+                 */
+                if (!BN_rshift1(X, X))
+                    goto err;
+            }
+            if (shift > 0) {
+                if (!BN_rshift(B, B, shift))
+                    goto err;
+            }
+
+            /*
+             * Same for A and Y.  Afterwards, (2) still holds.
+             */
+            shift = 0;
+            while (!BN_is_bit_set(A, shift)) { /* note that 0 < A */
+                shift++;
+
+                if (BN_is_odd(Y)) {
+                    if (!BN_uadd(Y, Y, n))
+                        goto err;
+                }
+                /* now Y is even */
+                if (!BN_rshift1(Y, Y))
+                    goto err;
+            }
+            if (shift > 0) {
+                if (!BN_rshift(A, A, shift))
+                    goto err;
+            }
+
+            /*-
+             * We still have (1) and (2).
+             * Both  A  and  B  are odd.
+             * The following computations ensure that
+             *
+             *     0 <= B < |n|,
+             *      0 < A < |n|,
+             * (1) -sign*X*a  ==  B   (mod |n|),
+             * (2)  sign*Y*a  ==  A   (mod |n|),
+             *
+             * and that either  A  or  B  is even in the next iteration.
+             */
+            if (BN_ucmp(B, A) >= 0) {
+                /* -sign*(X + Y)*a == B - A  (mod |n|) */
+                if (!BN_uadd(X, X, Y))
+                    goto err;
+                /*
+                 * NB: we could use BN_mod_add_quick(X, X, Y, n), but that
+                 * actually makes the algorithm slower
+                 */
+                if (!BN_usub(B, B, A))
+                    goto err;
+            } else {
+                /*  sign*(X + Y)*a == A - B  (mod |n|) */
+                if (!BN_uadd(Y, Y, X))
+                    goto err;
+                /*
+                 * as above, BN_mod_add_quick(Y, Y, X, n) would slow things down
+                 */
+                if (!BN_usub(A, A, B))
+                    goto err;
+            }
+        }
+    } else {
+        /* general inversion algorithm */
+
+        while (!BN_is_zero(B)) {
+            BIGNUM *tmp;
+
+            /*-
+             *      0 < B < A,
+             * (*) -sign*X*a  ==  B   (mod |n|),
+             *      sign*Y*a  ==  A   (mod |n|)
+             */
+
+            /* (D, M) := (A/B, A%B) ... */
+            if (BN_num_bits(A) == BN_num_bits(B)) {
+                if (!BN_one(D))
+                    goto err;
+                if (!BN_sub(M, A, B))
+                    goto err;
+            } else if (BN_num_bits(A) == BN_num_bits(B) + 1) {
+                /* A/B is 1, 2, or 3 */
+                if (!BN_lshift1(T, B))
+                    goto err;
+                if (BN_ucmp(A, T) < 0) {
+                    /* A < 2*B, so D=1 */
+                    if (!BN_one(D))
+                        goto err;
+                    if (!BN_sub(M, A, B))
+                        goto err;
+                } else {
+                    /* A >= 2*B, so D=2 or D=3 */
+                    if (!BN_sub(M, A, T))
+                        goto err;
+                    if (!BN_add(D, T, B))
+                        goto err; /* use D (:= 3*B) as temp */
+                    if (BN_ucmp(A, D) < 0) {
+                        /* A < 3*B, so D=2 */
+                        if (!BN_set_word(D, 2))
+                            goto err;
+                        /*
+                         * M (= A - 2*B) already has the correct value
+                         */
+                    } else {
+                        /* only D=3 remains */
+                        if (!BN_set_word(D, 3))
+                            goto err;
+                        /*
+                         * currently M = A - 2*B, but we need M = A - 3*B
+                         */
+                        if (!BN_sub(M, M, B))
+                            goto err;
+                    }
+                }
+            } else {
+                if (!BN_div(D, M, A, B, ctx))
+                    goto err;
+            }
+
+            /*-
+             * Now
+             *      A = D*B + M;
+             * thus we have
+             * (**)  sign*Y*a  ==  D*B + M   (mod |n|).
+             */
+
+            tmp = A;    /* keep the BIGNUM object, the value does not matter */
+
+            /* (A, B) := (B, A mod B) ... */
+            A = B;
+            B = M;
+            /* ... so we have  0 <= B < A  again */
+
+            /*-
+             * Since the former  M  is now  B  and the former  B  is now  A,
+             * (**) translates into
+             *       sign*Y*a  ==  D*A + B    (mod |n|),
+             * i.e.
+             *       sign*Y*a - D*A  ==  B    (mod |n|).
+             * Similarly, (*) translates into
+             *      -sign*X*a  ==  A          (mod |n|).
+             *
+             * Thus,
+             *   sign*Y*a + D*sign*X*a  ==  B  (mod |n|),
+             * i.e.
+             *        sign*(Y + D*X)*a  ==  B  (mod |n|).
+             *
+             * So if we set  (X, Y, sign) := (Y + D*X, X, -sign), we arrive back at
+             *      -sign*X*a  ==  B   (mod |n|),
+             *       sign*Y*a  ==  A   (mod |n|).
+             * Note that  X  and  Y  stay non-negative all the time.
+             */
+
+            /*
+             * most of the time D is very small, so we can optimize tmp := D*X+Y
+             */
+            if (BN_is_one(D)) {
+                if (!BN_add(tmp, X, Y))
+                    goto err;
+            } else {
+                if (BN_is_word(D, 2)) {
+                    if (!BN_lshift1(tmp, X))
+                        goto err;
+                } else if (BN_is_word(D, 4)) {
+                    if (!BN_lshift(tmp, X, 2))
+                        goto err;
+                } else if (D->top == 1) {
+                    if (!BN_copy(tmp, X))
+                        goto err;
+                    if (!BN_mul_word(tmp, D->d[0]))
+                        goto err;
+                } else {
+                    if (!BN_mul(tmp, D, X, ctx))
+                        goto err;
+                }
+                if (!BN_add(tmp, tmp, Y))
+                    goto err;
+            }
+
+            M = Y;      /* keep the BIGNUM object, the value does not matter */
+            Y = X;
+            X = tmp;
+            sign = -sign;
+        }
+    }
+
+    /*-
+     * The while loop (Euclid's algorithm) ends when
+     *      A == gcd(a,n);
+     * we have
+     *       sign*Y*a  ==  A  (mod |n|),
+     * where  Y  is non-negative.
+     */
+
+    if (sign < 0) {
+        if (!BN_sub(Y, n, Y))
+            goto err;
+    }
+    /* Now  Y*a  ==  A  (mod |n|).  */
+
+    if (BN_is_one(A)) {
+        /* Y*a == 1  (mod |n|) */
+        if (!Y->neg && BN_ucmp(Y, n) < 0) {
+            if (!BN_copy(R, Y))
+                goto err;
+        } else {
+            if (!BN_nnmod(R, Y, n, ctx))
+                goto err;
+        }
+    } else {
+        *pnoinv = 1;
+        goto err;
+    }
+    ret = R;
+ err:
+    if ((ret == NULL) && (in == NULL))
+       // BN_free(R);
+        return 0;
+    BN_CTX_end(ctx);
+    bn_check_top(ret);
+    return ret;
+}
+
+void BN_set_negative(BIGNUM *a, int b)
+{
+    if (b && !BN_is_zero(a))
+        a->neg = 1;
+    else
+        a->neg = 0;
+}
+
+
+BIGNUM *BN_mod_inverse(BIGNUM *in,
+                       const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx)
+{
+    BN_CTX *new_ctx = NULL;
+    BIGNUM *rv;
+    int noinv = 0;
+
+    if (ctx == NULL) {
+        ctx = new_ctx = BN_CTX_new_ex(NULL);
+        if (ctx == NULL) {
+            //ERR_raise(ERR_LIB_BN, ERR_R_BN_LIB);
+            assert(0);
+            return NULL;
+        }
+    }
+
+    rv = int_bn_mod_inverse(in, a, n, ctx, &noinv);
+    if (noinv)
+       // ERR_raise(ERR_LIB_BN, BN_R_NO_INVERSE);
+        assert(0);
+        return 0;
+   // BN_CTX_free(new_ctx);
+    return rv;
+}
+
+
+
+int BN_sub_word(BIGNUM *a, BN_ULONG w)
+{
+    int i;
+
+    bn_check_top(a);
+    w &= BN_MASK2;
+
+    /* degenerate case: w is zero */
+    if (!w)
+        return 1;
+    /* degenerate case: a is zero */
+    if (BN_is_zero(a)) {
+        i = BN_set_word(a, w);
+        if (i != 0)
+            BN_set_negative(a, 1);
+        return i;
+    }
+    /* handle 'a' when negative */
+    if (a->neg) {
+        a->neg = 0;
+        i = BN_add_word(a, w);
+        a->neg = 1;
+        return i;
+    }
+
+    if ((a->top == 1) && (a->d[0] < w)) {
+        a->d[0] = w - a->d[0];
+        a->neg = 1;
+        return 1;
+    }
+    i = 0;
+    for (;;) {
+        if (a->d[i] >= w) {
+            a->d[i] -= w;
+            break;
+        } else {
+            a->d[i] = (a->d[i] - w) & BN_MASK2;
+            i++;
+            w = 1;
+        }
+    }
+    if ((a->d[i] == 0) && (i == (a->top - 1)))
+        a->top--;
+    bn_check_top(a);
+    return 1;
+}
+
+int BN_add_word(BIGNUM *a, BN_ULONG w)
+{
+    BN_ULONG l;
+    int i;
+
+    bn_check_top(a);
+    w &= BN_MASK2;
+
+    /* degenerate case: w is zero */
+    if (!w)
+        return 1;
+    /* degenerate case: a is zero */
+    if (BN_is_zero(a))
+        return BN_set_word(a, w);
+    /* handle 'a' when negative */
+    if (a->neg) {
+        a->neg = 0;
+        i = BN_sub_word(a, w);
+        if (!BN_is_zero(a))
+            a->neg = !(a->neg);
+        return i;
+    }
+    for (i = 0; w != 0 && i < a->top; i++) {
+        a->d[i] = l = (a->d[i] + w) & BN_MASK2;
+        w = (w > l) ? 1 : 0;
+    }
+    if (w && i == a->top) {
+        if (bn_wexpand(a, a->top + 1) == NULL)
+            return 0;
+        a->top++;
+        a->d[i] = w;
+    }
+    bn_check_top(a);
+    return 1;
+}
+
+
+
+int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
+{
+    int i, ret = 0;
+    BIGNUM *Ri, *R;
+
+    if (BN_is_zero(mod))
+        return 0;
+
+    BN_CTX_start(ctx);
+    if ((Ri = BN_CTX_get(ctx)) == NULL)
+        goto err;
+    R = &(mont->RR);            /* grab RR as a temp */
+    if (!BN_copy(&(mont->N), mod))
+        goto err;               /* Set N */
+    if (BN_get_flags(mod, BN_FLG_CONSTTIME) != 0)
+        BN_set_flags(&(mont->N), BN_FLG_CONSTTIME);
+    mont->N.neg = 0;
+
+#ifdef MONT_WORD
+    {
+        BIGNUM tmod;
+        BN_ULONG buf[2];
+
+        bn_init(&tmod);
+        tmod.d = buf;
+        tmod.dmax = 2;
+        tmod.neg = 0;
+
+        if (BN_get_flags(mod, BN_FLG_CONSTTIME) != 0)
+            BN_set_flags(&tmod, BN_FLG_CONSTTIME);
+
+        mont->ri = (BN_num_bits(mod) + (BN_BITS2 - 1)) / BN_BITS2 * BN_BITS2;
+
+# if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)
+        /*
+         * Only certain BN_BITS2<=32 platforms actually make use of n0[1],
+         * and we could use the #else case (with a shorter R value) for the
+         * others.  However, currently only the assembler files do know which
+         * is which.
+         */
+
+        BN_zero(R);
+        if (!(BN_set_bit(R, 2 * BN_BITS2)))
+            goto err;
+
+        tmod.top = 0;
+        if ((buf[0] = mod->d[0]))
+            tmod.top = 1;
+        if ((buf[1] = mod->top > 1 ? mod->d[1] : 0))
+            tmod.top = 2;
+
+        if (BN_is_one(&tmod))
+            BN_zero(Ri);
+        else if ((BN_mod_inverse(Ri, R, &tmod, ctx)) == NULL)
+            goto err;
+        if (!BN_lshift(Ri, Ri, 2 * BN_BITS2))
+            goto err;           /* R*Ri */
+        if (!BN_is_zero(Ri)) {
+            if (!BN_sub_word(Ri, 1))
+                goto err;
+        } else {                /* if N mod word size == 1 */
+
+            if (bn_expand(Ri, (int)sizeof(BN_ULONG) * 2) == NULL)
+                goto err;
+            /* Ri-- (mod double word size) */
+            Ri->neg = 0;
+            Ri->d[0] = BN_MASK2;
+            Ri->d[1] = BN_MASK2;
+            Ri->top = 2;
+        }
+        if (!BN_div(Ri, NULL, Ri, &tmod, ctx))
+            goto err;
+        /*
+         * Ni = (R*Ri-1)/N, keep only couple of least significant words:
+         */
+        mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
+        mont->n0[1] = (Ri->top > 1) ? Ri->d[1] : 0;
+# else
+        BN_zero(R);
+        if (!(BN_set_bit(R, BN_BITS2)))
+            goto err;           /* R */
+
+        buf[0] = mod->d[0];     /* tmod = N mod word size */
+        buf[1] = 0;
+        tmod.top = buf[0] != 0 ? 1 : 0;
+        /* Ri = R^-1 mod N */
+        if (BN_is_one(&tmod))
+            BN_zero(Ri);
+        else if ((BN_mod_inverse(Ri, R, &tmod, ctx)) == NULL)
+            goto err;
+        if (!BN_lshift(Ri, Ri, BN_BITS2))
+            goto err;           /* R*Ri */
+        if (!BN_is_zero(Ri)) {
+            if (!BN_sub_word(Ri, 1))
+                goto err;
+        } else {                /* if N mod word size == 1 */
+
+            if (!BN_set_word(Ri, BN_MASK2))
+                goto err;       /* Ri-- (mod word size) */
+        }
+        if (!BN_div(Ri, NULL, Ri, &tmod, ctx))
+            goto err;
+        /*
+         * Ni = (R*Ri-1)/N, keep only least significant word:
+         */
+        mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
+        mont->n0[1] = 0;
+# endif
+    }
+#else                           /* !MONT_WORD */
+    {                           /* bignum version */
+        mont->ri = BN_num_bits(&mont->N);
+        BN_zero(R);
+        if (!BN_set_bit(R, mont->ri))
+            goto err;           /* R = 2^ri */
+        /* Ri = R^-1 mod N */
+        if ((BN_mod_inverse(Ri, R, &mont->N, ctx)) == NULL)
+            goto err;
+        if (!BN_lshift(Ri, Ri, mont->ri))
+            goto err;           /* R*Ri */
+        if (!BN_sub_word(Ri, 1))
+            goto err;
+        /*
+         * Ni = (R*Ri-1) / N
+         */
+        if (!BN_div(&(mont->Ni), NULL, Ri, &mont->N, ctx))
+            goto err;
+    }
+#endif
+
+    /* setup RR for conversions */
+    BN_zero(&(mont->RR));
+    if (!BN_set_bit(&(mont->RR), mont->ri * 2))
+        goto err;
+    if (!BN_mod(&(mont->RR), &(mont->RR), &(mont->N), ctx))
+        goto err;
+
+    for (i = mont->RR.top, ret = mont->N.top; i < ret; i++)
+        mont->RR.d[i] = 0;
+    mont->RR.top = ret;
+    mont->RR.flags |= BN_FLG_FIXED_TOP;
+
+    ret = 1;
+ err:
+    BN_CTX_end(ctx);
+    return ret;
+}
+
+
+
+// FINALLYYYYYYY
+
+
+int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
+                    const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
+{
+    int i, j, bits, ret = 0, wstart, wend, window;
+    int start = 1;
+    BIGNUM *d, *r;
+    const BIGNUM *aa;
+    /* Table of variables obtained from 'ctx' */
+    BIGNUM *val[TABLE_SIZE];
+    BN_MONT_CTX *mont = NULL;
+
+    bn_check_top(a);
+    bn_check_top(p);
+    bn_check_top(m);
+
+    if (!BN_is_odd(m)) {
+     //   ERR_raise(ERR_LIB_BN, BN_R_CALLED_WITH_EVEN_MODULUS);
+        assert(0);
+        return 0;
+    }
+
+    
+    /*
+    if (m->top <= BN_CONSTTIME_SIZE_LIMIT
+        && (BN_get_flags(p, BN_FLG_CONSTTIME) != 0
+            || BN_get_flags(a, BN_FLG_CONSTTIME) != 0
+            || BN_get_flags(m, BN_FLG_CONSTTIME) != 0)) {
+        return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
+    }
+*/
+    bits = BN_num_bits(p);
+    if (bits == 0) {
+        /* x**0 mod 1, or x**0 mod -1 is still zero. */
+        if (BN_abs_is_word(m, 1)) {
+            ret = 1;
+            BN_zero(rr);
+        } else {
+            ret = BN_one(rr);
+        }
+        return ret;
+    }
+
+    BN_CTX_start(ctx);
+    d = BN_CTX_get(ctx);
+    r = BN_CTX_get(ctx);
+    val[0] = BN_CTX_get(ctx);
+    if (val[0] == NULL)
+        goto err;
+
+    /*
+     * If this is not done, things will break in the montgomery part
+     */
+
+    if (in_mont != NULL)
+        mont = in_mont;
+    else {
+        if ((mont = BN_MONT_CTX_new()) == NULL)
+            goto err;
+        if (!BN_MONT_CTX_set(mont, m, ctx))
+            goto err;
+    }
+
+    if (a->neg || BN_ucmp(a, m) >= 0) {
+        if (!BN_nnmod(val[0], a, m, ctx))
+            goto err;
+        aa = val[0];
+    } else
+        aa = a;
+    if (!bn_to_mont_fixed_top(val[0], aa, mont, ctx))
+        goto err;               /* 1 */
+
+    window = BN_window_bits_for_exponent_size(bits);
+    if (window > 1) {
+        if (!bn_mul_mont_fixed_top(d, val[0], val[0], mont, ctx))
+            goto err;           /* 2 */
+        j = 1 << (window - 1);
+        for (i = 1; i < j; i++) {
+            if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
+                !bn_mul_mont_fixed_top(val[i], val[i - 1], d, mont, ctx))
+                goto err;
+        }
+    }
+
+    start = 1;                  /* This is used to avoid multiplication etc
+                                 * when there is only the value '1' in the
+                                 * buffer. */
+    wstart = bits - 1;          /* The top bit of the window */
+    wend = 0;                   /* The bottom bit of the window */
+
+#if 1                           /* by Shay Gueron's suggestion */
+    j = m->top;                 /* borrow j */
+    if (m->d[j - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
+        if (bn_wexpand(r, j) == NULL)
+            goto err;
+        /* 2^(top*BN_BITS2) - m */
+        r->d[0] = (0 - m->d[0]) & BN_MASK2;
+        for (i = 1; i < j; i++)
+            r->d[i] = (~m->d[i]) & BN_MASK2;
+        r->top = j;
+        r->flags |= BN_FLG_FIXED_TOP;
+    } else
+#endif
+    if (!bn_to_mont_fixed_top(r, BN_value_one(), mont, ctx))
+        goto err;
+    for (;;) {
+        int wvalue;             /* The 'value' of the window */
+
+        if (BN_is_bit_set(p, wstart) == 0) {
+            if (!start) {
+                if (!bn_mul_mont_fixed_top(r, r, r, mont, ctx))
+                    goto err;
+            }
+            if (wstart == 0)
+                break;
+            wstart--;
+            continue;
+        }
+        /*
+         * We now have wstart on a 'set' bit, we now need to work out how bit
+         * a window to do.  To do this we need to scan forward until the last
+         * set bit before the end of the window
+         */
+        wvalue = 1;
+        wend = 0;
+        for (i = 1; i < window; i++) {
+            if (wstart - i < 0)
+                break;
+            if (BN_is_bit_set(p, wstart - i)) {
+                wvalue <<= (i - wend);
+                wvalue |= 1;
+                wend = i;
+            }
+        }
+
+        /* wend is the size of the current window */
+        j = wend + 1;
+        /* add the 'bytes above' */
+        if (!start)
+            for (i = 0; i < j; i++) {
+                if (!bn_mul_mont_fixed_top(r, r, r, mont, ctx))
+                    goto err;
+            }
+
+        /* wvalue will be an odd number < 2^window */
+        if (!bn_mul_mont_fixed_top(r, r, val[wvalue >> 1], mont, ctx))
+            goto err;
+
+        /* move the 'window' down further */
+        wstart -= wend + 1;
+        start = 0;
+        if (wstart < 0)
+            break;
+    }
+    /*
+     * Done with zero-padded intermediate BIGNUMs. Final BN_from_montgomery
+     * removes padding [if any] and makes return value suitable for public
+     * API consumer.
+     */
+#if defined(SPARC_T4_MONT)
+    if (OPENSSL_sparcv9cap_P[0] & (SPARCV9_VIS3 | SPARCV9_PREFER_FPU)) {
+        j = mont->N.top;        /* borrow j */
+        val[0]->d[0] = 1;       /* borrow val[0] */
+        for (i = 1; i < j; i++)
+            val[0]->d[i] = 0;
+        val[0]->top = j;
+        if (!BN_mod_mul_montgomery(rr, r, val[0], mont, ctx))
+            goto err;
+    } else
+#endif
+    if (!BN_from_montgomery(rr, r, mont, ctx))
+        goto err;
+    ret = 1;
+ err:
+    if (in_mont == NULL)
+      //  BN_MONT_CTX_free(mont);
+        return 0;
+    BN_CTX_end(ctx);
+    bn_check_top(rr);
+    return ret;
+}
+
+int ossl_ctype_check(int c, unsigned int mask)
+{
+    const int max = sizeof(ctype_char_map) / sizeof(*ctype_char_map);
+    const int a = ossl_toascii(c);
+
+    return a >= 0 && a < max && (ctype_char_map[a] & mask) != 0;
+}
+
+int OPENSSL_hexchar2int(unsigned char c)
+{
+#ifdef CHARSET_EBCDIC
+    c = os_toebcdic[c];
+#endif
+
+    switch (c) {
+    case '0':
+        return 0;
+    case '1':
+        return 1;
+    case '2':
+        return 2;
+    case '3':
+        return 3;
+    case '4':
+          return 4;
+    case '5':
+          return 5;
+    case '6':
+          return 6;
+    case '7':
+          return 7;
+    case '8':
+          return 8;
+    case '9':
+          return 9;
+    case 'a': case 'A':
+          return 0x0A;
+    case 'b': case 'B':
+          return 0x0B;
+    case 'c': case 'C':
+          return 0x0C;
+    case 'd': case 'D':
+          return 0x0D;
+    case 'e': case 'E':
+          return 0x0E;
+    case 'f': case 'F':
+          return 0x0F;
+    }
+    return -1;
+}
+
+
+int BN_hex2bn(BIGNUM **bn, const char *a)
+{
+    BIGNUM *ret = NULL;
+    BN_ULONG l = 0;
+    int neg = 0, h, m, i, j, k, c;
+    int num;
+
+    if (a == NULL || *a == '\0')
+        return 0;
+
+    if (*a == '-') {
+        neg = 1;
+        a++;
+    }
+
+    for (i = 0; i <= INT_MAX / 4 && ossl_isxdigit(a[i]); i++)
+        continue;
+
+    if (i == 0 || i > INT_MAX / 4)
+        return 0;
+
+    num = i + neg;
+    if (bn == NULL)
+        return num;
+
+    /* a is the start of the hex digits, and it is 'i' long */
+    if (*bn == NULL) {
+        if ((ret = BN_new()) == NULL)
+            return 0;
+    } else {
+        ret = *bn;
+        if (BN_get_flags(ret, BN_FLG_STATIC_DATA)) {
+          //  ERR_raise(ERR_LIB_BN, ERR_R_PASSED_INVALID_ARGUMENT);
+            assert(0);
+            return 0;
+        }
+        BN_zero(ret);
+    }
+
+    /* i is the number of hex digits */
+    if (bn_expand(ret, i * 4) == NULL)
+        goto err;
+
+    j = i;                      /* least significant 'hex' */
+    m = 0;
+    h = 0;
+    while (j > 0) {
+        m = (BN_BYTES * 2 <= j) ? BN_BYTES * 2 : j;
+        l = 0;
+        for (;;) {
+            c = a[j - m];
+            k = OPENSSL_hexchar2int(c);
+            if (k < 0)
+                k = 0;          /* paranoia */
+            l = (l << 4) | k;
+
+            if (--m <= 0) {
+                ret->d[h++] = l;
+                break;
+            }
+        }
+        j -= BN_BYTES * 2;
+    }
+    ret->top = h;
+    bn_correct_top(ret);
+
+    *bn = ret;
+    bn_check_top(ret);
+    /* Don't set the negative flag if it's zero. */
+    if (ret->top != 0)
+        ret->neg = neg;
+    return num;
+ err:
+    if (*bn == NULL)
+       // BN_free(ret);
+        return 0;
+    return 0;
+}
+
+
+
+// USAGE?
+
+//int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, BN_CTX *ctx)
+
+//BN_mod_exp_mont(r, a, p, m, ctx, NULL);
+
+void test(char * aa, char *pp, char *mm) {
+    
+    OSSL_LIB_CTX ossl; // should be initialised
+    
+    BN_CTX ctx = *BN_CTX_new_ex(&ossl);
+    
+    BIGNUM *a = BN_new();
+    BIGNUM *p = BN_new();
+    BIGNUM *m = BN_new();
+    
+    BIGNUM *r = BN_new();
+    
+    BN_hex2bn(&a, aa);
+    BN_hex2bn(&p, pp);
+    BN_hex2bn(&m, mm);
+    
+    BN_mod_exp_mont(r, a, p, m, &ctx, NULL);
+    
+    return;
+    
+}
+
+
diff --git a/openssl/openssl-test.h b/openssl/openssl-test.h
@@ -0,0 +1,531 @@
+//
+//  openssl-test.h
+//  hello
+//
+//  Created by Cedric Zwahlen on 07.10.2023.
+//
+
+#ifndef openssl_test_h
+#define openssl_test_h
+
+#include <stdio.h>
+#include <assert.h>
+
+/* How many bignums are in each "pool item"; */
+#define BN_CTX_POOL_SIZE        16
+/* The stack frame info is resizing, set a first-time expansion size; */
+#define BN_CTX_START_FRAMES     32
+
+
+
+#define SIXTY_FOUR_BIT // on gpu that would be 32
+
+# ifdef SIXTY_FOUR_BIT
+#  define BN_ULONG        unsigned long long
+#  define BN_BYTES        8
+# endif
+
+# ifdef THIRTY_TWO_BIT
+#  define BN_ULONG        unsigned int
+#  define BN_BYTES        4
+# endif
+
+
+# define CRYPTO_EX_INDEX__COUNT          16
+
+
+typedef struct bignum_st BIGNUM;
+
+struct bignum_st {
+    BN_ULONG *d;                /*
+                                 * Pointer to an array of 'BN_BITS2' bit
+                                 * chunks. These chunks are organised in
+                                 * a least significant chunk first order.
+                                 */
+    int top;                    /* Index of last used d +1. */
+    /* The next are internal book keeping for bn_expand. */
+    int dmax;                   /* Size of the d array. */
+    int neg;                    /* one if the number is negative */
+    int flags;
+};
+
+
+
+
+
+typedef struct crypto_ex_data_st CRYPTO_EX_DATA;
+
+typedef struct ex_callback_st EX_CALLBACK;
+
+typedef void CRYPTO_RWLOCK;
+
+typedef void CRYPTO_EX_new (void *parent, void *ptr, CRYPTO_EX_DATA *ad,
+                           int idx, long argl, void *argp);
+typedef void CRYPTO_EX_free (void *parent, void *ptr, CRYPTO_EX_DATA *ad,
+                             int idx, long argl, void *argp);
+typedef int CRYPTO_EX_dup (CRYPTO_EX_DATA *to, const CRYPTO_EX_DATA *from,
+                           void *from_d, int idx, long argl, void *argp);
+/* __owur */ int CRYPTO_get_ex_new_index(int class_index, long argl, void *argp,
+                            CRYPTO_EX_new *new_func, CRYPTO_EX_dup *dup_func,
+                            CRYPTO_EX_free *free_func);
+
+
+
+typedef struct ossl_lib_ctx_st OSSL_LIB_CTX;
+
+
+
+
+struct ex_callback_st {
+    long argl;                  /* Arbitrary long */
+    void *argp;                 /* Arbitrary void * */
+    int priority;               /* Priority ordering for freeing */
+    CRYPTO_EX_new *new_func;
+    CRYPTO_EX_free *free_func;
+    CRYPTO_EX_dup *dup_func;
+};
+
+
+typedef struct ex_callbacks_st {
+  //  STACK_OF(EX_CALLBACK) *meth;
+    
+    void * meth;
+} EX_CALLBACKS;
+
+
+
+typedef struct bignum_pool_item {
+    /* The bignum values */
+    BIGNUM vals[BN_CTX_POOL_SIZE];
+    /* Linked-list admin */
+    struct bignum_pool_item *prev, *next;
+} BN_POOL_ITEM;
+
+
+typedef struct bignum_ctx_stack {
+    /* Array of indexes into the bignum stack */
+    unsigned int *indexes;
+    /* Number of stack frames, and the size of the allocated array */
+    unsigned int depth, size;
+} BN_STACK;
+static void BN_STACK_init(BN_STACK *);
+static void BN_STACK_finish(BN_STACK *);
+static int BN_STACK_push(BN_STACK *, unsigned int);
+static unsigned int BN_STACK_pop(BN_STACK *);
+
+
+
+
+typedef struct bignum_pool {
+    /* Linked-list admin */
+    BN_POOL_ITEM *head, *current, *tail;
+    /* Stack depth and allocation size */
+    unsigned used, size;
+} BN_POOL;
+static void BN_POOL_init(BN_POOL *);
+static void BN_POOL_finish(BN_POOL *);
+static BIGNUM *BN_POOL_get(BN_POOL *, int);
+static void BN_POOL_release(BN_POOL *, unsigned int);
+
+
+
+typedef struct bignum_ctx BN_CTX;
+
+struct bignum_ctx {
+    /* The bignum bundles */
+    BN_POOL pool;
+    /* The "stack frames", if you will */
+    BN_STACK stack;
+    /* The number of bignums currently assigned */
+    unsigned int used;
+    /* Depth of stack overflow */
+    int err_stack;
+    /* Block "gets" until an "end" (compatibility behaviour) */
+    int too_many;
+    /* Flags. */
+    int flags;
+    /* The library context */
+    OSSL_LIB_CTX *libctx;
+};
+
+
+#  define BN_FLG_FIXED_TOP 0
+#  define bn_pollute(a)
+#  define bn_check_top(a)
+#  define bn_fix_top(a)           bn_correct_top(a)
+#  define bn_check_size(bn, bits)
+#  define bn_wcheck_size(bn, words)
+
+
+
+#define BN_CONSTTIME_SIZE_LIMIT (INT_MAX / BN_BYTES / 256)
+
+#define BN_FLG_CONSTTIME        0x04
+# define BN_FLG_SECURE           0x08
+
+# define BN_BITS2       (BN_BYTES * 8)
+# define BN_BITS        (BN_BITS2 * 2)
+# define BN_TBIT        ((BN_ULONG)1 << (BN_BITS2 - 1))
+
+#  define BN_MASK2        (0xffffffffffffffffLL)
+
+#  define BN_zero(a)      (BN_set_word((a),0))
+
+#   define ossl_inline inline
+
+
+#define INT_MAX   __INT_MAX__
+
+
+# define BN_BITS2       (BN_BYTES * 8)
+
+
+#define OPENSSL_zalloc malloc
+#define OPENSSL_malloc malloc
+#define OPENSSL_free free
+
+
+
+# define OPENSSL_clear_free(addr, num) \
+        CRYPTO_clear_free(addr, num, OPENSSL_FILE, OPENSSL_LINE)
+
+
+
+# define BN_one(a)       (BN_set_word((a),1))
+
+# define BN_FLG_MALLOCED         0x01
+# define BN_FLG_STATIC_DATA      0x02
+
+#  define BN_BITS4        16
+#  define BN_MASK2l       (0xffff)
+#  define BN_MASK2h1      (0xffff8000L)
+#  define BN_MASK2h       (0xffff0000L)
+
+#  define LBITS(a)        ((a)&BN_MASK2l)
+#  define HBITS(a)        (((a)>>BN_BITS4)&BN_MASK2l)
+#  define L2HBITS(a)      (((a)<<BN_BITS4)&BN_MASK2)
+
+#  define LLBITS(a)       ((a)&BN_MASKl)
+#  define LHBITS(a)       (((a)>>BN_BITS2)&BN_MASKl)
+#  define LL2HBITS(a)     ((BN_ULLONG)((a)&BN_MASKl)<<BN_BITS2)
+
+#  define mul64(l,h,bl,bh) \
+        { \
+        BN_ULONG m,m1,lt,ht; \
+ \
+        lt=l; \
+        ht=h; \
+        m =(bh)*(lt); \
+        lt=(bl)*(lt); \
+        m1=(bl)*(ht); \
+        ht =(bh)*(ht); \
+        m=(m+m1)&BN_MASK2; ht += L2HBITS((BN_ULONG)(m < m1)); \
+        ht+=HBITS(m); \
+        m1=L2HBITS(m); \
+        lt=(lt+m1)&BN_MASK2; ht += (lt < m1); \
+        (l)=lt; \
+        (h)=ht; \
+        }
+
+
+#  define mul(r,a,bl,bh,c) { \
+        BN_ULONG l,h; \
+ \
+        h= (a); \
+        l=LBITS(h); \
+        h=HBITS(h); \
+        mul64(l,h,(bl),(bh)); \
+ \
+        /* non-multiply part */ \
+        l+=(c); h += ((l&BN_MASK2) < (c)); \
+        (c)=h&BN_MASK2; \
+        (r)=l&BN_MASK2; \
+        }
+
+
+#  define mul_add(r,a,bl,bh,c) { \
+        BN_ULONG l,h; \
+ \
+        h= (a); \
+        l=LBITS(h); \
+        h=HBITS(h); \
+        mul64(l,h,(bl),(bh)); \
+ \
+        /* non-multiply part */ \
+        l=(l+(c))&BN_MASK2; h += (l < (c)); \
+        (c)=(r); \
+        l=(l+(c))&BN_MASK2; h += (l < (c)); \
+        (c)=h&BN_MASK2; \
+        (r)=l; \
+        }
+
+#  define sqr64(lo,ho,in) \
+        { \
+        BN_ULONG l,h,m; \
+ \
+        h=(in); \
+        l=LBITS(h); \
+        h=HBITS(h); \
+        m =(l)*(h); \
+        l*=l; \
+        h*=h; \
+        h+=(m&BN_MASK2h1)>>(BN_BITS4-1); \
+        m =(m&BN_MASK2l)<<(BN_BITS4+1); \
+        l=(l+m)&BN_MASK2; h += (l < m); \
+        (lo)=l; \
+        (ho)=h; \
+        }
+
+
+# define BN_window_bits_for_exponent_size(b) \
+                ((b) > 671 ? 6 : \
+                 (b) > 239 ? 5 : \
+                 (b) >  79 ? 4 : \
+                 (b) >  23 ? 3 : 1)
+
+int BN_add_word(BIGNUM *a, BN_ULONG w);
+int BN_sub_word(BIGNUM *a, BN_ULONG w);
+
+#  define ossl_toascii(c)       (c)
+
+# define ossl_isxdigit(c)       (ossl_ctype_check((c), CTYPE_MASK_xdigit))
+
+/* maximum precomputation table size for *variable* sliding windows */
+#define TABLE_SIZE      32
+
+# define BN_mod(rem,m,d,ctx) BN_div(NULL,(rem),(m),(d),(ctx))
+
+int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
+           BN_CTX *ctx);
+
+# define CTYPE_MASK_lower       0x1
+# define CTYPE_MASK_upper       0x2
+# define CTYPE_MASK_digit       0x4
+# define CTYPE_MASK_space       0x8
+# define CTYPE_MASK_xdigit      0x10
+# define CTYPE_MASK_blank       0x20
+# define CTYPE_MASK_cntrl       0x40
+# define CTYPE_MASK_graph       0x80
+# define CTYPE_MASK_print       0x100
+# define CTYPE_MASK_punct       0x200
+# define CTYPE_MASK_base64      0x400
+# define CTYPE_MASK_asn1print   0x800
+
+static const unsigned short ctype_char_map[128] = {
+   /* 00 nul */ CTYPE_MASK_cntrl,
+   /* 01 soh */ CTYPE_MASK_cntrl,
+   /* 02 stx */ CTYPE_MASK_cntrl,
+   /* 03 etx */ CTYPE_MASK_cntrl,
+   /* 04 eot */ CTYPE_MASK_cntrl,
+   /* 05 enq */ CTYPE_MASK_cntrl,
+   /* 06 ack */ CTYPE_MASK_cntrl,
+   /* 07 \a  */ CTYPE_MASK_cntrl,
+   /* 08 \b  */ CTYPE_MASK_cntrl,
+   /* 09 \t  */ CTYPE_MASK_blank | CTYPE_MASK_cntrl | CTYPE_MASK_space,
+   /* 0A \n  */ CTYPE_MASK_cntrl | CTYPE_MASK_space,
+   /* 0B \v  */ CTYPE_MASK_cntrl | CTYPE_MASK_space,
+   /* 0C \f  */ CTYPE_MASK_cntrl | CTYPE_MASK_space,
+   /* 0D \r  */ CTYPE_MASK_cntrl | CTYPE_MASK_space,
+   /* 0E so  */ CTYPE_MASK_cntrl,
+   /* 0F si  */ CTYPE_MASK_cntrl,
+   /* 10 dle */ CTYPE_MASK_cntrl,
+   /* 11 dc1 */ CTYPE_MASK_cntrl,
+   /* 12 dc2 */ CTYPE_MASK_cntrl,
+   /* 13 dc3 */ CTYPE_MASK_cntrl,
+   /* 14 dc4 */ CTYPE_MASK_cntrl,
+   /* 15 nak */ CTYPE_MASK_cntrl,
+   /* 16 syn */ CTYPE_MASK_cntrl,
+   /* 17 etb */ CTYPE_MASK_cntrl,
+   /* 18 can */ CTYPE_MASK_cntrl,
+   /* 19 em  */ CTYPE_MASK_cntrl,
+   /* 1A sub */ CTYPE_MASK_cntrl,
+   /* 1B esc */ CTYPE_MASK_cntrl,
+   /* 1C fs  */ CTYPE_MASK_cntrl,
+   /* 1D gs  */ CTYPE_MASK_cntrl,
+   /* 1E rs  */ CTYPE_MASK_cntrl,
+   /* 1F us  */ CTYPE_MASK_cntrl,
+   /* 20     */ CTYPE_MASK_blank | CTYPE_MASK_print | CTYPE_MASK_space
+                | CTYPE_MASK_asn1print,
+   /* 21  !  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
+   /* 22  "  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
+   /* 23  #  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
+   /* 24  $  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
+   /* 25  %  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
+   /* 26  &  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
+   /* 27  '  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct
+                | CTYPE_MASK_asn1print,
+   /* 28  (  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct
+                | CTYPE_MASK_asn1print,
+   /* 29  )  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct
+                | CTYPE_MASK_asn1print,
+   /* 2A  *  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
+   /* 2B  +  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 2C  ,  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct
+                | CTYPE_MASK_asn1print,
+   /* 2D  -  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct
+                | CTYPE_MASK_asn1print,
+   /* 2E  .  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct
+                | CTYPE_MASK_asn1print,
+   /* 2F  /  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 30  0  */ CTYPE_MASK_digit | CTYPE_MASK_graph | CTYPE_MASK_print
+                | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 31  1  */ CTYPE_MASK_digit | CTYPE_MASK_graph | CTYPE_MASK_print
+                | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 32  2  */ CTYPE_MASK_digit | CTYPE_MASK_graph | CTYPE_MASK_print
+                | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 33  3  */ CTYPE_MASK_digit | CTYPE_MASK_graph | CTYPE_MASK_print
+                | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 34  4  */ CTYPE_MASK_digit | CTYPE_MASK_graph | CTYPE_MASK_print
+                | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 35  5  */ CTYPE_MASK_digit | CTYPE_MASK_graph | CTYPE_MASK_print
+                | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 36  6  */ CTYPE_MASK_digit | CTYPE_MASK_graph | CTYPE_MASK_print
+                | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 37  7  */ CTYPE_MASK_digit | CTYPE_MASK_graph | CTYPE_MASK_print
+                | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 38  8  */ CTYPE_MASK_digit | CTYPE_MASK_graph | CTYPE_MASK_print
+                | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 39  9  */ CTYPE_MASK_digit | CTYPE_MASK_graph | CTYPE_MASK_print
+                | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 3A  :  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct
+                | CTYPE_MASK_asn1print,
+   /* 3B  ;  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
+   /* 3C  <  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
+   /* 3D  =  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 3E  >  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
+   /* 3F  ?  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct
+                | CTYPE_MASK_asn1print,
+   /* 40  @  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
+   /* 41  A  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
+                | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 42  B  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
+                | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 43  C  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
+                | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 44  D  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
+                | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 45  E  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
+                | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 46  F  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
+                | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 47  G  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 48  H  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 49  I  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 4A  J  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 4B  K  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 4C  L  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 4D  M  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 4E  N  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 4F  O  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 50  P  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 51  Q  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 52  R  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 53  S  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 54  T  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 55  U  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 56  V  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 57  W  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 58  X  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 59  Y  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 5A  Z  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 5B  [  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
+   /* 5C  \  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
+   /* 5D  ]  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
+   /* 5E  ^  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
+   /* 5F  _  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
+   /* 60  `  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
+   /* 61  a  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
+                | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 62  b  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
+                | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 63  c  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
+                | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 64  d  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
+                | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 65  e  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
+                | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 66  f  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
+                | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 67  g  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 68  h  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 69  i  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 6A  j  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 6B  k  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 6C  l  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 6D  m  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 6E  n  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 6F  o  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 70  p  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 71  q  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 72  r  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 73  s  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 74  t  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 75  u  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 76  v  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 77  w  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 78  x  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 79  y  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 7A  z  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
+                | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
+   /* 7B  {  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
+   /* 7C  |  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
+   /* 7D  }  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
+   /* 7E  ~  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
+   /* 7F del */ CTYPE_MASK_cntrl
+};
+
+
+
+// maybe I should not use this is not 
+
+#define MONT_WORD
+//#define BN_MUL_COMBA
+
+
+
+void test(char * aa, char *pp, char *mm);
+
+#endif /* openssl_test_h */
+
diff --git a/openssl/readme.txt b/openssl/readme.txt
@@ -0,0 +1,24 @@
+This folder contains parts of code from the openssl project (version 3.2). 
+
+https://github.com/openssl/openssl
+
+-----
+
+Use the program
+
+on Ubuntu, run these three commands:
+
+    cmake -S . -B ./build
+
+    cmake --build ./build --config Debug
+
+    ./build/openssltest
+
+Libgcrypt is required to run the program.
+
+-----
+
+Comments
+
+Though it does compile, the montgomery multiplication algorithm I have copied from openssl does not run as intended.
+I don't quite know why.
+\ No newline at end of file
diff --git a/other/.DS_Store b/other/.DS_Store
Binary files differ.
diff --git a/other/CMakeLists.txt b/other/CMakeLists.txt
@@ -0,0 +1,15 @@
+cmake_minimum_required(VERSION 3.1) # 3.1 << C_STANDARD 11
+
+project(HelloOpenCL LANGUAGES C)
+
+find_package(OpenCL REQUIRED)
+
+add_executable(${PROJECT_NAME} Main.c)
+
+target_link_libraries(${PROJECT_NAME} PRIVATE OpenCL::OpenCL)
+
+set_target_properties(${PROJECT_NAME} PROPERTIES C_STANDARD 11
+                                                 C_STANDARD_REQUIRED ON
+                                                 C_EXTENSIONS OFF)
+
+target_compile_definitions(${PROJECT_NAME} PRIVATE CL_TARGET_OPENCL_VERSION=100)
+\ No newline at end of file
diff --git a/other/Main.c b/other/Main.c
@@ -0,0 +1,20 @@
+// C standard includes
+#include <stdio.h>
+
+// OpenCL includes
+#include <CL/cl.h>
+
+int main()
+{
+    cl_int CL_err = CL_SUCCESS;
+    cl_uint numPlatforms = 0;
+
+    CL_err = clGetPlatformIDs( 0, NULL, &numPlatforms );
+
+    if (CL_err == CL_SUCCESS)
+        printf("%u platform(s) found\n", numPlatforms);
+    else
+        printf("clGetPlatformIDs(%i)\n", CL_err);
+
+    return 0;
+}
diff --git a/other/readme.txt b/other/readme.txt
@@ -0,0 +1,31 @@
+a test if linking against OpenCL works
+
+to compile manually
+
+gcc -Wall -Wextra -D CL_TARGET_OPENCL_VERSION=100 Main.c -o HelloOpenCL -lOpenCL
+
+
+with cmake...
+
+cmake -S . -B ./build
+
+then...
+
+cmake --build ./build --config Release
+
+and to run it:
+
+./build/HelloOpenCL
+
+
+––––– 
+
+If only it was that easy. If you get the -1001 error, and run your system in a VM – bad luck.
+It seems difficult for VM's to facilitate two way communication with gpus, which I cannot solve.
+
+interestingly, at least to test whether it would build and run on systems other than macos, use 
+
+sudo apt-get install libpocl2
+
+this allows opencl to run on the cpu. 
+I believe, if it is run natively, the program may be able to choose which cards to use.
+\ No newline at end of file
diff --git a/source/.DS_Store b/source/.DS_Store
Binary files differ.
diff --git a/source/big-int-test.c b/source/big-int-test.c
@@ -0,0 +1,1080 @@
+//
+//  bigNum_test.c
+//  hello
+//
+//  Created by Cedric Zwahlen on 25.09.23.
+//
+
+#include "big-int-test.h"
+
+
+
+// MARK: min functionality
+
+int mpModulo(DIGIT_T r[], const DIGIT_T u[], size_t udigits,
+             DIGIT_T v[], size_t vdigits)
+{
+    /*    Computes r = u mod v
+        where r, v are multiprecision integers of length vdigits
+        and u is a multiprecision integer of length udigits.
+        r may overlap v.
+
+        Note that r here is only vdigits long,
+        whereas in mpDivide it is udigits long.
+
+        Use remainder from mpDivide function.
+    */
+
+    size_t nn = max(udigits, vdigits);
+/* Allocate temp storage */
+//#ifdef NO_ALLOCS
+    // [v2.6] increased to two times
+    DIGIT_T qq[MAX_FIXED_DIGITS*2];
+    DIGIT_T rr[MAX_FIXED_DIGITS*2];
+  //  assert(nn <= (MAX_FIXED_DIGITS*2));
+/*#else
+    DIGIT_T *qq, *rr;
+    qq = mpAlloc(udigits);
+    rr = mpAlloc(nn);
+#endif
+*/
+
+    /* rr[nn] = u mod v */
+    mpDivide(qq, rr, u, udigits, v, vdigits);
+
+    /* Final r is only vdigits long */
+    mpSetEqual(r, rr, vdigits);
+
+    mpDESTROY(rr, udigits);
+    mpDESTROY(qq, udigits);
+
+    return 0;
+}
+
+int mpModMult(DIGIT_T a[], const DIGIT_T x[], const DIGIT_T y[],
+              DIGIT_T m[], size_t ndigits)
+{    /*    Computes a = (x * y) mod m */
+    
+/* Double-length temp variable p */
+// #ifdef NO_ALLOCS
+    DIGIT_T p[MAX_FIXED_DIGITS * 2];
+//    assert(ndigits <= MAX_FIXED_DIGITS);
+/*#else
+    DIGIT_T *p;
+    p = mpAlloc(ndigits * 2);
+#endif
+*/
+    /* Calc p[2n] = x * y */
+    mpMultiply(p, x, y, ndigits);
+
+    /* Then modulo (NOTE: a is OK at only ndigits long) */
+    mpModulo(a, p, ndigits * 2, m, ndigits);
+
+    mpDESTROY(p, ndigits * 2);
+
+    return 0;
+}
+
+int mpMultiply(DIGIT_T w[], const DIGIT_T u[], const DIGIT_T v[], size_t ndigits)
+{
+    /*    Computes product w = u * v
+        where u, v are multiprecision integers of ndigits each
+        and w is a multiprecision integer of 2*ndigits
+
+        Ref: Knuth Vol 2 Ch 4.3.1 p 268 Algorithm M.
+    */
+
+    DIGIT_T k, t[2];
+    size_t i, j, m, n;
+
+   // assert(w != u && w != v);
+
+    m = n = ndigits;
+
+    /* Step M1. Initialise */
+    for (i = 0; i < 2 * m; i++)
+        w[i] = 0;
+
+    for (j = 0; j < n; j++)
+    {
+        /* Step M2. Zero multiplier? */
+        if (v[j] == 0)
+        {
+            w[j + m] = 0;
+        }
+        else
+        {
+            /* Step M3. Initialise i */
+            k = 0;
+            for (i = 0; i < m; i++)
+            {
+                /* Step M4. Multiply and add */
+                /* t = u_i * v_j + w_(i+j) + k */
+                spMultiply(t, u[i], v[j]);
+
+                t[0] += k;
+                if (t[0] < k)
+                    t[1]++;
+                t[0] += w[i+j];
+                if (t[0] < w[i+j])
+                    t[1]++;
+
+                w[i+j] = t[0];
+                k = t[1];
+            }
+            /* Step M5. Loop on i, set w_(j+m) = k */
+            w[j+m] = k;
+        }
+    }    /* Step M6. Loop on j */
+
+    return 0;
+}
+
+DIGIT_T mpAdd(DIGIT_T w[], const DIGIT_T u[], const DIGIT_T v[], size_t ndigits)
+{
+    /*    Calculates w = u + v
+        where w, u, v are multiprecision integers of ndigits each
+        Returns carry if overflow. Carry = 0 or 1.
+
+        Ref: Knuth Vol 2 Ch 4.3.1 p 266 Algorithm A.
+    */
+
+    DIGIT_T k;
+    size_t j;
+
+  //  assert(w != v);
+
+    /* Step A1. Initialise */
+    k = 0;
+
+    for (j = 0; j < ndigits; j++)
+    {
+        /*    Step A2. Add digits w_j = (u_j + v_j + k)
+            Set k = 1 if carry (overflow) occurs
+        */
+        w[j] = u[j] + k;
+        if (w[j] < k)
+            k = 1;
+        else
+            k = 0;
+        
+        w[j] += v[j];
+        if (w[j] < v[j])
+            k++;
+
+    }    /* Step A3. Loop on j */
+
+    return k;    /* w_n = k */
+}
+
+int mpDivide(DIGIT_T q[], DIGIT_T r[], const DIGIT_T u[],
+    size_t udigits, DIGIT_T v[], size_t vdigits)
+{    /*    Computes quotient q = u / v and remainder r = u mod v
+        where q, r, u are multiple precision digits
+        all of udigits and the divisor v is vdigits.
+
+        Ref: Knuth Vol 2 Ch 4.3.1 p 272 Algorithm D.
+
+        Do without extra storage space, i.e. use r[] for
+        normalised u[], unnormalise v[] at end, and cope with
+        extra digit Uj+n added to u after normalisation.
+
+        WARNING: this trashes q and r first, so cannot do
+        u = u / v or v = u mod v.
+        It also changes v temporarily so cannot make it const.
+    */
+    size_t shift;
+    int n, m, j;
+    DIGIT_T bitmask, overflow;
+    DIGIT_T qhat, rhat, t[2];
+    DIGIT_T *uu, *ww;
+    int qhatOK, cmp;
+
+    /* Clear q and r */
+    mpSetZero(q, udigits);
+    mpSetZero(r, udigits);
+
+    /* Work out exact sizes of u and v */
+    n = (int)mpSizeof(v, vdigits);
+    m = (int)mpSizeof(u, udigits);
+    m -= n;
+
+    /* Catch special cases */
+    if (n == 0)
+        return -1;    /* Error: divide by zero */
+
+    if (n == 1)
+    {    /* Use short division instead */
+        r[0] = mpShortDiv(q, u, v[0], udigits);
+        return 0;
+    }
+
+    if (m < 0)
+    {    /* v > u, so just set q = 0 and r = u */
+        mpSetEqual(r, u, udigits);
+        return 0;
+    }
+
+    if (m == 0)
+    {    /* u and v are the same length */
+        cmp = mpCompare(u, v, (size_t)n);
+        if (cmp < 0)
+        {    /* v > u, as above */
+            mpSetEqual(r, u, udigits);
+            return 0;
+        }
+        else if (cmp == 0)
+        {    /* v == u, so set q = 1 and r = 0 */
+            mpSetDigit(q, 1, udigits);
+            return 0;
+        }
+    }
+
+    /*    In Knuth notation, we have:
+        Given
+        u = (Um+n-1 ... U1U0)
+        v = (Vn-1 ... V1V0)
+        Compute
+        q = u/v = (QmQm-1 ... Q0)
+        r = u mod v = (Rn-1 ... R1R0)
+    */
+
+    /*    Step D1. Normalise */
+    /*    Requires high bit of Vn-1
+        to be set, so find most signif. bit then shift left,
+        i.e. d = 2^shift, u' = u * d, v' = v * d.
+    */
+    bitmask = HIBITMASK;
+    for (shift = 0; shift < BITS_PER_DIGIT; shift++)
+    {
+        if (v[n-1] & bitmask)
+            break;
+        bitmask >>= 1;
+    }
+
+    /* Normalise v in situ - NB only shift non-zero digits */
+    overflow = mpShiftLeft(v, v, shift, n);
+
+    /* Copy normalised dividend u*d into r */
+    overflow = mpShiftLeft(r, u, shift, n + m);
+    uu = r;    /* Use ptr to keep notation constant */
+
+    t[0] = overflow;    /* Extra digit Um+n */
+
+    /* Step D2. Initialise j. Set j = m */
+    for (j = m; j >= 0; j--)
+    {
+        /* Step D3. Set Qhat = [(b.Uj+n + Uj+n-1)/Vn-1]
+           and Rhat = remainder */
+        qhatOK = 0;
+        t[1] = t[0];    /* This is Uj+n */
+        t[0] = uu[j+n-1];
+        overflow = spDivide(&qhat, &rhat, t, v[n-1]);
+
+        /* Test Qhat */
+        if (overflow)
+        {    /* Qhat == b so set Qhat = b - 1 */
+            qhat = MAX_DIGIT;
+            rhat = uu[j+n-1];
+            rhat += v[n-1];
+            if (rhat < v[n-1])    /* Rhat >= b, so no re-test */
+                qhatOK = 1;
+        }
+        /* [VERSION 2: Added extra test "qhat && "] */
+        if (qhat && !qhatOK && QhatTooBig(qhat, rhat, v[n-2], uu[j+n-2]))
+        {    /* If Qhat.Vn-2 > b.Rhat + Uj+n-2
+               decrease Qhat by one, increase Rhat by Vn-1
+            */
+            qhat--;
+            rhat += v[n-1];
+            /* Repeat this test if Rhat < b */
+            if (!(rhat < v[n-1]))
+                if (QhatTooBig(qhat, rhat, v[n-2], uu[j+n-2]))
+                    qhat--;
+        }
+
+
+        /* Step D4. Multiply and subtract */
+        ww = &uu[j];
+        overflow = mpMultSub(t[1], ww, v, qhat, (size_t)n);
+
+        /* Step D5. Test remainder. Set Qj = Qhat */
+        q[j] = qhat;
+        if (overflow)
+        {    /* Step D6. Add back if D4 was negative */
+            q[j]--;
+            overflow = mpAdd(ww, ww, v, (size_t)n);
+        }
+
+        t[0] = uu[j+n-1];    /* Uj+n on next round */
+
+    }    /* Step D7. Loop on j */
+
+    /* Clear high digits in uu */
+    for (j = n; j < m+n; j++)
+        uu[j] = 0;
+
+    /* Step D8. Unnormalise. */
+
+    mpShiftRight(r, r, shift, n);
+    mpShiftRight(v, v, shift, n);
+
+    return 0;
+}
+
+void mpSetDigit(DIGIT_T a[], DIGIT_T d, size_t ndigits)
+{    /* Sets a = d where d is a single digit */
+    size_t i;
+    
+    for (i = 1; i < ndigits; i++)
+    {
+        a[i] = 0;
+    }
+    a[0] = d;
+}
+
+DIGIT_T mpShortDiv(DIGIT_T q[], const DIGIT_T u[], DIGIT_T v,
+                   size_t ndigits)
+{
+    /*    Calculates quotient q = u div v
+        Returns remainder r = u mod v
+        where q, u are multiprecision integers of ndigits each
+        and r, v are single precision digits.
+
+        Makes no assumptions about normalisation.
+        
+        Ref: Knuth Vol 2 Ch 4.3.1 Exercise 16 p625
+    */
+    size_t j;
+    DIGIT_T t[2], r;
+    size_t shift;
+    DIGIT_T bitmask, overflow, *uu;
+
+    if (ndigits == 0) return 0;
+    if (v == 0)    return 0;    /* Divide by zero error */
+
+    /*    Normalise first */
+    /*    Requires high bit of V
+        to be set, so find most signif. bit then shift left,
+        i.e. d = 2^shift, u' = u * d, v' = v * d.
+    */
+    bitmask = HIBITMASK;
+    for (shift = 0; shift < BITS_PER_DIGIT; shift++)
+    {
+        if (v & bitmask)
+            break;
+        bitmask >>= 1;
+    }
+
+    v <<= shift;
+    overflow = mpShiftLeft(q, u, shift, ndigits);
+    uu = q;
+    
+    /* Step S1 - modified for extra digit. */
+    r = overflow;    /* New digit Un */
+    j = ndigits;
+    while (j--)
+    {
+        /* Step S2. */
+        t[1] = r;
+        t[0] = uu[j];
+        overflow = spDivide(&q[j], &r, t, v);
+    }
+
+    /* Unnormalise */
+    r >>= shift;
+    
+    return r;
+}
+
+static int QhatTooBig(DIGIT_T qhat, DIGIT_T rhat,
+                      DIGIT_T vn2, DIGIT_T ujn2)
+{    /*    Returns true if Qhat is too big
+        i.e. if (Qhat * Vn-2) > (b.Rhat + Uj+n-2)
+    */
+    DIGIT_T t[2];
+
+    spMultiply(t, qhat, vn2);
+    if (t[1] < rhat)
+        return 0;
+    else if (t[1] > rhat)
+        return 1;
+    else if (t[0] > ujn2)
+        return 1;
+
+    return 0;
+}
+
+static DIGIT_T mpMultSub(DIGIT_T wn, DIGIT_T w[], const DIGIT_T v[],
+                       DIGIT_T q, size_t n)
+{    /*    Compute w = w - qv
+        where w = (WnW[n-1]...W[0])
+        return modified Wn.
+    */
+    DIGIT_T k, t[2];
+    size_t i;
+
+    if (q == 0)    /* No change */
+        return wn;
+
+    k = 0;
+
+    for (i = 0; i < n; i++)
+    {
+        spMultiply(t, q, v[i]);
+        w[i] -= k;
+        if (w[i] > MAX_DIGIT - k)
+            k = 1;
+        else
+            k = 0;
+        w[i] -= t[0];
+        if (w[i] > MAX_DIGIT - t[0])
+            k++;
+        k += t[1];
+    }
+
+    /* Cope with Wn not stored in array w[0..n-1] */
+    wn -= k;
+
+    return wn;
+}
+
+DIGIT_T mpShiftLeft(DIGIT_T a[], const DIGIT_T *b,
+    size_t shift, size_t ndigits)
+{    /* Computes a = b << shift */
+    /* [v2.1] Modified to cope with shift > BITS_PERDIGIT */
+    size_t i, y, nw, bits;
+    DIGIT_T mask, carry, nextcarry;
+
+    /* Do we shift whole digits? */
+    if (shift >= BITS_PER_DIGIT)
+    {
+        nw = shift / BITS_PER_DIGIT;
+        i = ndigits;
+        while (i--)
+        {
+            if (i >= nw)
+                a[i] = b[i-nw];
+            else
+                a[i] = 0;
+        }
+        /* Call again to shift bits inside digits */
+        bits = shift % BITS_PER_DIGIT;
+        carry = b[ndigits-nw] << bits;
+        if (bits)
+            carry |= mpShiftLeft(a, a, bits, ndigits);
+        return carry;
+    }
+    else
+    {
+        bits = shift;
+    }
+
+    /* Construct mask = high bits set */
+    mask = ~(~(DIGIT_T)0 >> bits);
+    
+    y = BITS_PER_DIGIT - bits;
+    carry = 0;
+    for (i = 0; i < ndigits; i++)
+    {
+        nextcarry = (b[i] & mask) >> y;
+        a[i] = b[i] << bits | carry;
+        carry = nextcarry;
+    }
+
+    return carry;
+}
+
+DIGIT_T mpShiftRight(DIGIT_T a[], const DIGIT_T b[], size_t shift, size_t ndigits)
+{    /* Computes a = b >> shift */
+    /* [v2.1] Modified to cope with shift > BITS_PERDIGIT */
+    size_t i, y, nw, bits;
+    DIGIT_T mask, carry, nextcarry;
+
+    /* Do we shift whole digits? */
+    if (shift >= BITS_PER_DIGIT)
+    {
+        nw = shift / BITS_PER_DIGIT;
+        for (i = 0; i < ndigits; i++)
+        {
+            if ((i+nw) < ndigits)
+                a[i] = b[i+nw];
+            else
+                a[i] = 0;
+        }
+        /* Call again to shift bits inside digits */
+        bits = shift % BITS_PER_DIGIT;
+        carry = b[nw-1] >> bits;
+        if (bits)
+            carry |= mpShiftRight(a, a, bits, ndigits);
+        return carry;
+    }
+    else
+    {
+        bits = shift;
+    }
+
+    /* Construct mask to set low bits */
+    /* (thanks to Jesse Chisholm for suggesting this improved technique) */
+    mask = ~(~(DIGIT_T)0 << bits);
+    
+    y = BITS_PER_DIGIT - bits;
+    carry = 0;
+    i = ndigits;
+    while (i--)
+    {
+        nextcarry = (b[i] & mask) << y;
+        a[i] = b[i] >> bits | carry;
+        carry = nextcarry;
+    }
+
+    return carry;
+}
+
+
+int spMultiply(uint32_t p[2], uint32_t x, uint32_t y)
+{
+    /* Use a 64-bit temp for product */
+    uint64_t t = (uint64_t)x * (uint64_t)y;
+    /* then split into two parts */
+    p[1] = (uint32_t)(t >> 32);
+    p[0] = (uint32_t)(t & 0xFFFFFFFF);
+
+    return 0;
+}
+
+uint32_t spDivide(uint32_t *pq, uint32_t *pr, const uint32_t u[2], uint32_t v)
+{
+    uint64_t uu, q;
+    uu = (uint64_t)u[1] << 32 | (uint64_t)u[0];
+    q = uu / (uint64_t)v;
+    //r = uu % (uint64_t)v;
+    *pr = (uint32_t)(uu - q * v);
+    *pq = (uint32_t)(q & 0xFFFFFFFF);
+    return (uint32_t)(q >> 32);
+}
+
+int mpCompare(const DIGIT_T a[], const DIGIT_T b[], size_t ndigits)
+{
+    /* if (ndigits == 0) return 0; // deleted [v2.5] */
+
+    while (ndigits--)
+    {
+        if (a[ndigits] > b[ndigits])
+            return 1;    /* GT */
+        if (a[ndigits] < b[ndigits])
+            return -1;    /* LT */
+    }
+
+    return 0;    /* EQ */
+}
+
+void mpSetEqual(DIGIT_T a[], const DIGIT_T b[], size_t ndigits)
+{    /* Sets a = b */
+    size_t i;
+    
+    for (i = 0; i < ndigits; i++)
+    {
+        a[i] = b[i];
+    }
+}
+
+volatile DIGIT_T mpSetZero(volatile DIGIT_T a[], size_t ndigits)
+{    /* Sets a = 0 */
+
+    /* Prevent optimiser ignoring this */
+    volatile DIGIT_T optdummy;
+    volatile DIGIT_T *p = a;
+
+    while (ndigits--)
+        a[ndigits] = 0;
+    
+    optdummy = *p;
+    return optdummy;
+}
+
+size_t mpSizeof(const DIGIT_T a[], size_t ndigits)
+{
+    while(ndigits--)
+    {
+        if (a[ndigits] != 0)
+            return (++ndigits);
+    }
+    return 0;
+}
+
+// MARK: HELPERS
+
+
+size_t mpConvToOctets(const DIGIT_T a[], size_t ndigits, unsigned char *c, size_t nbytes)
+/* Convert big digit a into string of octets, in big-endian order,
+   padding on the left to nbytes or truncating if necessary.
+   Return number of octets required excluding leading zero bytes.
+*/
+{
+    int j, k, len;
+    DIGIT_T t;
+    size_t i, noctets, nbits;
+
+    nbits = mpBitLength(a, ndigits);
+    noctets = (nbits + 7) / 8;
+
+    len = (int)nbytes;
+
+    for (i = 0, j = len - 1; i < ndigits && j >= 0; i++)
+    {
+        t = a[i];
+        for (k = 0; j >= 0 && k < BITS_PER_DIGIT; j--, k += 8)
+            c[j] = (unsigned char)(t >> k);
+    }
+
+    for ( ; j >= 0; j--)
+        c[j] = 0;
+
+    return (size_t)noctets;
+}
+
+
+
+size_t mpConvFromOctets(DIGIT_T a[], size_t ndigits, const unsigned char *c, size_t nbytes)
+/* Converts nbytes octets into big digit a of max size ndigits
+   Returns actual number of digits set (may be larger than mpSizeof)
+*/
+{
+    size_t i;
+    int j, k;
+    DIGIT_T t;
+
+    mpSetZero(a, ndigits);
+
+    /* Read in octets, least significant first */
+    /* i counts into big_d, j along c, and k is # bits to shift */
+    for (i = 0, j = (int)nbytes - 1; i < ndigits && j >= 0; i++)
+    {
+        t = 0;
+        for (k = 0; j >= 0 && k < BITS_PER_DIGIT; j--, k += 8)
+            t |= ((DIGIT_T)c[j]) << k;
+        a[i] = t;
+    }
+
+    return i;
+}
+
+
+size_t mpConvFromHex(DIGIT_T a[], size_t ndigits, const char *s)
+/* Convert a string in hexadecimal format to a big digit.
+   Return actual number of digits set (may be larger than mpSizeof).
+   Just ignores invalid characters in s.
+*/
+{
+//#ifdef NO_ALLOCS
+    uint8_t newdigits[MAX_ALLOC_SIZE*2];    // [v2.6] increased
+/*#else
+    uint8_t *newdigits;
+#endif*/
+    size_t newlen;
+    size_t n;
+    unsigned long t;
+    size_t i, j;
+
+    mpSetZero(a, ndigits);
+
+    /* Create some temp storage for int values */
+    n = strlen(s);
+    if (0 == n) return 0;
+    newlen = uiceil(n * 0.5);    /* log(16)/log(256)=0.5 */
+    ALLOC_BYTES(newdigits, newlen);
+
+    /* Work through zero-terminated string */
+    for (i = 0; s[i]; i++)
+    {
+        t = s[i];
+        if ((t >= '0') && (t <= '9')) t = (t - '0');
+        else if ((t >= 'a') && (t <= 'f')) t = (t - 'a' + 10);
+        else if ((t >= 'A') && (t <= 'F')) t = (t - 'A' + 10);
+        else continue;
+        for (j = newlen; j > 0; j--)
+        {
+            t += (unsigned long)newdigits[j-1] << 4;
+            newdigits[j-1] = (unsigned char)(t & 0xFF);
+            t >>= 8;
+        }
+    }
+
+    /* Convert bytes to big digits */
+    n = mpConvFromOctets(a, ndigits, newdigits, newlen);
+
+    /* Clean up */
+    FREE_BYTES(newdigits, newlen);
+
+    return n;
+}
+
+static size_t uiceil(double x)
+/* Returns ceil(x) as a non-negative integer or 0 if x < 0 */
+{
+    size_t c;
+
+    if (x < 0) return 0;
+    c = (size_t)x;
+    if ((x - c) > 0.0)
+        c++;
+
+    return c;
+}
+
+volatile uint8_t zeroise_bytes(volatile void *v, size_t n)
+{    /* Zeroise byte array b and make sure optimiser does not ignore this */
+    volatile uint8_t optdummy;
+    volatile uint8_t *b = (uint8_t*)v;
+    while(n--)
+        b[n] = 0;
+    optdummy = *b;
+    return optdummy;
+}
+
+void mpFail(char *msg)
+{
+    perror(msg);
+    printf("the program should stop here");
+}
+
+size_t mpBitLength(const DIGIT_T d[], size_t ndigits)
+/* Returns no of significant bits in d */
+{
+    size_t n, i, bits;
+    DIGIT_T mask;
+
+    if (!d || ndigits == 0)
+        return 0;
+
+    n = mpSizeof(d, ndigits);
+    if (0 == n) return 0;
+
+    for (i = 0, mask = HIBITMASK; mask > 0; mask >>= 1, i++)
+    {
+        if (d[n-1] & mask)
+            break;
+    }
+
+    bits = n * BITS_PER_DIGIT - i;
+
+    return bits;
+}
+
+void mpPrintHex(const char *prefix, const DIGIT_T *a, size_t len, const char *suffix)
+{
+    if (prefix) printf("%s", prefix);
+    /* Trim leading digits which are zero */
+    while (len--)
+    {
+        if (a[len] != 0)
+            break;
+    }
+    len++;
+    if (0 == len) len = 1;
+    /* print first digit without leading zeros */
+    printf("%" PRIxBIGD, a[--len]);
+    while (len--)
+    {
+        printf("%08" PRIxBIGD, a[len]);
+    }
+    if (suffix) printf("%s", suffix);
+}
+
+
+int mpModExpO(DIGIT_T yout[], const DIGIT_T x[], const DIGIT_T e[], DIGIT_T m[], size_t ndigits)
+{    /*    Computes y = x^e mod m */
+    /*    "Classic" binary left-to-right method */
+    /*  [v2.2] removed const restriction on m[] to avoid using an extra alloc'd var
+        (m is changed in-situ during the divide operation then restored) */
+    DIGIT_T mask;
+    size_t n;
+    size_t nn = ndigits * 2;
+    /* Create some double-length temps */
+//#ifdef NO_ALLOCS
+    DIGIT_T t1[MAX_FIXED_DIGITS * 2];
+    DIGIT_T t2[MAX_FIXED_DIGITS * 2];
+    DIGIT_T y[MAX_FIXED_DIGITS * 2];
+    assert(ndigits <= MAX_FIXED_DIGITS);
+/*#else
+    DIGIT_T *t1, *t2, *y;
+    t1 = mpAlloc(nn);
+    t2 = mpAlloc(nn);
+    y  = mpAlloc(nn);
+#endif
+    */
+    assert(ndigits != 0);
+
+    n = mpSizeof(e, ndigits);
+    /* Catch e==0 => x^0=1 */
+    if (0 == n)
+    {
+        mpSetDigit(yout, 1, ndigits);
+        goto done;
+    }
+    /* Find second-most significant bit in e */
+    for (mask = HIBITMASK; mask > 0; mask >>= 1)
+    {
+        if (e[n-1] & mask)
+            break;
+    }
+    mpNEXTBITMASK(mask, n);
+
+    /* Set y = x */
+    mpSetEqual(y, x, ndigits);
+
+    /* For bit j = k-2 downto 0 */
+    while (n)
+    {
+        /* Square y = y * y mod n */
+        mpMODSQUARETEMP(y, m, ndigits, t1, t2);
+        if (e[n-1] & mask)
+        {    /*    if e(j) == 1 then multiply
+                y = y * x mod n */
+            mpMODMULTTEMP(y, x, m, ndigits, t1, t2);
+        }
+        
+        /* Move to next bit */
+        mpNEXTBITMASK(mask, n);
+    }
+
+    /* Return y */
+    mpSetEqual(yout, y, ndigits);
+
+done:
+    mpDESTROY(t1, nn);
+    mpDESTROY(t2, nn);
+    mpDESTROY(y, ndigits);
+
+    return 0;
+}
+
+int mpSquare(DIGIT_T w[], const DIGIT_T x[], size_t ndigits)
+/* New in Version 2.0 */
+{
+    /*    Computes square w = x * x
+        where x is a multiprecision integer of ndigits
+        and w is a multiprecision integer of 2*ndigits
+
+        Ref: Menezes p596 Algorithm 14.16 with errata.
+    */
+
+    DIGIT_T k, p[2], u[2], cbit, carry;
+    size_t i, j, t, i2, cpos;
+
+    assert(w != x);
+
+    t = ndigits;
+
+    /* 1. For i from 0 to (2t-1) do: w_i = 0 */
+    i2 = t << 1;
+    for (i = 0; i < i2; i++)
+        w[i] = 0;
+
+    carry = 0;
+    cpos = i2-1;
+    /* 2. For i from 0 to (t-1) do: */
+    for (i = 0; i < t; i++)
+    {
+        /* 2.1 (uv) = w_2i + x_i * x_i, w_2i = v, c = u
+           Careful, w_2i may be double-prec
+        */
+        i2 = i << 1; /* 2*i */
+        spMultiply(p, x[i], x[i]);
+        p[0] += w[i2];
+        if (p[0] < w[i2])
+            p[1]++;
+        k = 0;    /* p[1] < b, so no overflow here */
+        if (i2 == cpos && carry)
+        {
+            p[1] += carry;
+            if (p[1] < carry)
+                k++;
+            carry = 0;
+        }
+        w[i2] = p[0];
+        u[0] = p[1];
+        u[1] = k;
+
+        /* 2.2 for j from (i+1) to (t-1) do:
+           (uv) = w_{i+j} + 2x_j * x_i + c,
+           w_{i+j} = v, c = u,
+           u is double-prec
+           w_{i+j} is dbl if [i+j] == cpos
+        */
+        k = 0;
+        for (j = i+1; j < t; j++)
+        {
+            /* p = x_j * x_i */
+            spMultiply(p, x[j], x[i]);
+            /* p = 2p <=> p <<= 1 */
+            cbit = (p[0] & HIBITMASK) != 0;
+            k =  (p[1] & HIBITMASK) != 0;
+            p[0] <<= 1;
+            p[1] <<= 1;
+            p[1] |= cbit;
+            /* p = p + c */
+            p[0] += u[0];
+            if (p[0] < u[0])
+            {
+                p[1]++;
+                if (p[1] == 0)
+                    k++;
+            }
+            p[1] += u[1];
+            if (p[1] < u[1])
+                k++;
+            /* p = p + w_{i+j} */
+            p[0] += w[i+j];
+            if (p[0] < w[i+j])
+            {
+                p[1]++;
+                if (p[1] == 0)
+                    k++;
+            }
+            if ((i+j) == cpos && carry)
+            {    /* catch overflow from last round */
+                p[1] += carry;
+                if (p[1] < carry)
+                    k++;
+                carry = 0;
+            }
+            /* w_{i+j} = v, c = u */
+            w[i+j] = p[0];
+            u[0] = p[1];
+            u[1] = k;
+        }
+        /* 2.3 w_{i+t} = u */
+        w[i+t] = u[0];
+        /* remember overflow in w_{i+t} */
+        carry = u[1];
+        cpos = i+t;
+    }
+
+    /* (NB original step 3 deleted in Menezes errata) */
+
+    /* Return w */
+
+    return 0;
+}
+
+
+size_t mpConvToHex(const DIGIT_T a[], size_t ndigits, char *s, size_t smax)
+/* Convert big digit a into a string in hexadecimal format,
+   where s has max size smax.
+   Return number of chars set excluding leading zeroes.
+*/
+{
+    return conv_to_base(a, ndigits, s, smax, 16);
+}
+
+
+static size_t conv_to_base(const DIGIT_T a[], size_t ndigits, char *s, size_t smax, int base)
+/* Convert big digit a into a string in given base format,
+   where s has max size smax.
+   Return number of chars set excluding leading zeroes.
+   smax can be 0 to find out the required length.
+*/
+{
+
+    uint8_t bytes[MAX_ALLOC_SIZE], newdigits[MAX_ALLOC_SIZE*3]; // [v2.6] increased
+
+    const char DEC_DIGITS[] = "0123456789";
+    const char HEX_DIGITS[] = "0123456789abcdef";
+    size_t newlen, nbytes, nchars;
+    size_t n;
+    unsigned long t;
+    size_t i, j, isig;
+    const char *digits;
+    double factor;
+
+    switch (base)
+    {
+    case 10:
+        digits = DEC_DIGITS;
+        factor = 2.40824;    /* log(256)/log(10)=2.40824 */
+        break;
+    case 16:
+        digits = HEX_DIGITS;
+        factor = 2.0;    /* log(256)/log(16)=2.0 */
+        break;
+    default:
+        assert (10 == base || 16 == base);
+        return 0;
+    }
+
+    /* Set up output string with null chars */
+    if (smax > 0 && s)
+    {
+        memset(s, '0', smax-1);
+        s[smax-1] = '\0';
+    }
+
+    /* Catch zero input value (return 1 not zero) */
+    if (mpIsZero(a, ndigits))
+    {
+        if (smax > 0 && s)
+            s[1] = '\0';
+        return 1;
+    }
+
+    /* First, we convert to 8-bit octets (bytes), which are easier to handle */
+    nbytes = ndigits * BITS_PER_DIGIT / 8;
+    ALLOC_BYTES(bytes, nbytes);
+
+    n = mpConvToOctets(a, ndigits, bytes, nbytes);
+
+    /* Create some temp storage for int values */
+    newlen = uiceil(n * factor);
+    ALLOC_BYTES(newdigits, newlen);
+
+    for (i = 0; i < nbytes; i++)
+    {
+        t = bytes[i];
+        for (j = newlen; j > 0; j--)
+        {
+            t += (unsigned long)newdigits[j-1] * 256;
+            newdigits[j-1] = (unsigned char)(t % base);
+            t /= base;
+        }
+    }
+
+    /* Find index of leading significant digit */
+    for (isig = 0; isig < newlen; isig++)
+        if (newdigits[isig])
+            break;
+
+    nchars = newlen - isig;
+
+    /* Convert to a null-terminated string of decimal chars */
+    /* up to limit, unless user has specified null or size == 0 */
+    if (smax > 0 && s)
+    {
+        for (i = 0; i < nchars && i < smax-1; i++)
+        {
+            s[i] = digits[newdigits[isig+i]];
+        }
+        s[i] = '\0';
+    }
+
+    FREE_BYTES(bytes, nbytes);
+    FREE_BYTES(newdigits, newlen);
+
+    return nchars;
+}
+
+int mpIsZero(const DIGIT_T a[], size_t ndigits)
+{
+    size_t i;
+
+    /* if (ndigits == 0) return -1; // deleted [v2.5] */
+
+    for (i = 0; i < ndigits; i++)    /* Start at lsb */
+    {
+        if (a[i] != 0)
+            return 0;    /* False */
+    }
+
+    return (!0);    /* True */
+}
diff --git a/source/big-int-test.h b/source/big-int-test.h
@@ -0,0 +1,127 @@
+//
+//  Created by Cedric Zwahlen on 25.09.23.
+//
+
+#ifndef big_int_test_h
+#define big_int_test_h
+
+#include <stdio.h>
+
+
+
+
+#include <inttypes.h>
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <string.h> // only used for the convert from hex function
+#include <assert.h>
+
+// MARK: definitions
+
+typedef uint32_t DIGIT_T; // for gpu might need to be half? is that half?
+
+typedef uint16_t HALF_DIGIT_T;
+
+/* Sizes to match */
+
+
+
+// MARK: MACROS
+
+#define mpDESTROY(b, n) do{if(b)mpSetZero(b,n);}while(0)
+#define max(a,b)            (((a) > (b)) ? (a) : (b))
+
+// only for that string conversion
+#define ALLOC_BYTES(b,n) do{assert((n)<=sizeof((b)));zeroise_bytes((b),(n));}while(0)
+#define FREE_BYTES(b,n) zeroise_bytes((b),(n))
+
+
+#define MAX_DIGIT 0xFFFFFFFFUL
+#define MAX_HALF_DIGIT 0xFFFFUL    /* NB 'L' */
+#define BITS_PER_DIGIT 32
+#define HIBITMASK 0x80000000UL
+
+#define MAX_FIXED_BIT_LENGTH 8192
+#define MAX_FIXED_DIGITS ((MAX_FIXED_BIT_LENGTH + BITS_PER_DIGIT - 1) / BITS_PER_DIGIT)
+
+#define MAX_ALLOC_SIZE (MAX_FIXED_DIGITS*BYTES_PER_DIGIT)
+
+#define BYTES_PER_DIGIT (BITS_PER_DIGIT / 8)
+
+#define PRIuBIGD PRIu32
+#define PRIxBIGD PRIx32
+#define PRIXBIGD PRIX32
+
+/* MACROS TO DO MODULAR SQUARING AND MULTIPLICATION USING PRE-ALLOCATED TEMPS */
+/* Required lengths |y|=|t1|=|t2|=2*n, |m|=n; but final |y|=n */
+/* Square: y = (y * y) mod m */
+#define mpMODSQUARETEMP(y,m,n,t1,t2) do{mpSquare(t1,y,n);mpDivide(t2,y,t1,n*2,m,n);}while(0)
+/* Mult:   y = (y * x) mod m */
+#define mpMODMULTTEMP(y,x,m,n,t1,t2) do{mpMultiply(t1,x,y,n);mpDivide(t2,y,t1,n*2,m,n);}while(0)
+/* Mult:   w = (y * x) mod m */
+#define mpMODMULTXYTEMP(w,y,x,m,n,t1,t2) do{mpMultiply(t1,x,y,(n));mpDivide(t2,w,t1,(n)*2,m,(n));}while(0)
+
+#define mpNEXTBITMASK(mask, n) do{if(mask==1){mask=HIBITMASK;n--;}else{mask>>=1;}}while(0)
+
+int mpModulo(DIGIT_T r[], const DIGIT_T u[], size_t udigits, DIGIT_T v[], size_t vdigits);
+
+int mpModMult(DIGIT_T a[], const DIGIT_T x[], const DIGIT_T y[], DIGIT_T m[], size_t ndigits);
+
+int mpMultiply(DIGIT_T w[], const DIGIT_T u[], const DIGIT_T v[], size_t ndigits);
+DIGIT_T mpAdd(DIGIT_T w[], const DIGIT_T u[], const DIGIT_T v[], size_t ndigits);
+int mpDivide(DIGIT_T q[], DIGIT_T r[], const DIGIT_T u[], size_t udigits, DIGIT_T v[], size_t vdigits);
+static int QhatTooBig(DIGIT_T qhat, DIGIT_T rhat, DIGIT_T vn2, DIGIT_T ujn2);
+static DIGIT_T mpMultSub(DIGIT_T wn, DIGIT_T w[], const DIGIT_T v[], DIGIT_T q, size_t n);
+DIGIT_T mpShiftLeft(DIGIT_T a[], const DIGIT_T *b, size_t shift, size_t ndigits);
+
+
+void mpSetDigit(DIGIT_T a[], DIGIT_T d, size_t ndigits);
+
+int mpCompare(const DIGIT_T a[], const DIGIT_T b[], size_t ndigits);
+
+
+DIGIT_T mpShiftRight(DIGIT_T a[], const DIGIT_T b[], size_t shift, size_t ndigits);
+int spMultiply(uint32_t p[2], uint32_t x, uint32_t y);
+uint32_t spDivide(uint32_t *pq, uint32_t *pr, const uint32_t u[2], uint32_t v);
+
+int mpSquare(DIGIT_T w[], const DIGIT_T x[], size_t ndigits);
+
+size_t mpBitLength(const DIGIT_T d[], size_t ndigits);
+
+size_t mpConvToOctets(const DIGIT_T a[], size_t ndigits, unsigned char *c, size_t nbytes);
+
+DIGIT_T mpShortDiv(DIGIT_T q[], const DIGIT_T u[], DIGIT_T v,
+                   size_t ndigits);
+
+void mpSetEqual(DIGIT_T a[], const DIGIT_T b[], size_t ndigits);
+
+
+
+size_t mpSizeof(const DIGIT_T a[], size_t ndigits);
+
+volatile DIGIT_T mpSetZero(volatile DIGIT_T a[], size_t ndigits);
+
+
+void mpPrintDecimal(const char *prefix, const DIGIT_T *a, size_t ndigits, const char *suffix);
+
+
+size_t mpConvFromOctets(DIGIT_T a[], size_t ndigits, const unsigned char *c, size_t nbytes);
+size_t mpConvFromHex(DIGIT_T a[], size_t ndigits, const char *s);
+
+static size_t uiceil(double x);
+volatile uint8_t zeroise_bytes(volatile void *v, size_t n);
+void mpFail(char *msg);
+
+void mpPrintHex(const char *prefix, const DIGIT_T *a, size_t len, const char *suffix);
+
+int mpModExpO(DIGIT_T yout[], const DIGIT_T x[], const DIGIT_T e[], DIGIT_T m[], size_t ndigits);
+
+static size_t conv_to_base(const DIGIT_T a[], size_t ndigits, char *s, size_t smax, int base);
+
+int mpIsZero(const DIGIT_T a[], size_t ndigits);
+
+size_t mpConvToHex(const DIGIT_T a[], size_t ndigits, char *s, size_t smax);
+
+#endif /* big_int_test_h */
diff --git a/source/lib-gpu-verify.c b/source/lib-gpu-verify.c
@@ -0,0 +1,589 @@
+//
+// File:       hello.c
+//
+// Abstract:   A simple "Hello World" compute example showing basic usage of OpenCL which
+//             calculates the mathematical square (X[i] = pow(X[i],2)) for a buffer of
+//             floating point values.
+//             
+//
+// Version:    <1.0>
+//
+// Disclaimer: IMPORTANT:  This Apple software is supplied to you by Apple Inc. ("Apple")
+//             in consideration of your agreement to the following terms, and your use,
+//             installation, modification or redistribution of this Apple software
+//             constitutes acceptance of these terms.  If you do not agree with these
+//             terms, please do not use, install, modify or redistribute this Apple
+//             software.
+//
+//             In consideration of your agreement to abide by the following terms, and
+//             subject to these terms, Apple grants you a personal, non - exclusive
+//             license, under Apple's copyrights in this original Apple software ( the
+//             "Apple Software" ), to use, reproduce, modify and redistribute the Apple
+//             Software, with or without modifications, in source and / or binary forms;
+//             provided that if you redistribute the Apple Software in its entirety and
+//             without modifications, you must retain this notice and the following text
+//             and disclaimers in all such redistributions of the Apple Software. Neither
+//             the name, trademarks, service marks or logos of Apple Inc. may be used to
+//             endorse or promote products derived from the Apple Software without specific
+//             prior written permission from Apple.  Except as expressly stated in this
+//             notice, no other rights or licenses, express or implied, are granted by
+//             Apple herein, including but not limited to any patent rights that may be
+//             infringed by your derivative works or by other works in which the Apple
+//             Software may be incorporated.
+//
+//             The Apple Software is provided by Apple on an "AS IS" basis.  APPLE MAKES NO
+//             WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED
+//             WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A
+//             PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION
+//             ALONE OR IN COMBINATION WITH YOUR PRODUCTS.
+//
+//             IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR
+//             CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+//             SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+//             INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION
+//             AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER
+//             UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR
+//             OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright ( C ) 2008 Apple Inc. All Rights Reserved.
+//
+
+////////////////////////////////////////////////////////////////////////////////
+
+
+
+#include "big-int-test.h"
+
+#include "rsa-test.h"
+#include "opencl-test.h"
+
+
+
+//
+//
+//#include <stdio.h>
+//#include <string.h>
+//#include <time.h>
+//#include "more-tests.h"
+//
+// 
+//#define LUT_SIZE 2049
+//
+//static struct bn lut[LUT_SIZE];
+//
+///*
+// * Performs bitwise Montgomery modular multiplication ( X*Y*R^(-1) mod M)
+// *
+// * Parameters:
+// *         x,y,m - bignums
+// *         mBits - # of bits in m
+// *         out      - bignum result
+// */
+//
+//void montMult(struct bn*  x, struct bn*  y, struct bn*  m, int mBits, struct bn*  out){
+//
+//    struct bn t;
+//    bignum_init(&t);
+//
+//    int i;
+//    for(i = mBits; i > 0 ; i--){                    //efficient loop exit
+//
+//        int t0Bit = bignum_getbit(&t,0);
+//        int xiBit = bignum_getbit(x, mBits - i);    //loop exit requires subtraction here
+//        int y0Bit = bignum_getbit(y,0);
+//        int op = t0Bit + (xiBit * y0Bit);
+//
+//        if(xiBit == 1){
+//            bignum_add(&t, y, &t);
+//        }
+//
+//        if(op == 1){
+//            bignum_add(&t, m, &t);
+//        }
+//
+//        bignum_rshift(&t,&t, 1);
+//    }
+//
+//    if(bignum_cmp(&t, m) >= 0){
+//        bignum_sub(&t,m,&t);
+//    }
+//
+//    bignum_assign(out,&t);
+//}
+//
+//
+//
+///*mod exp, no LUT */
+//
+//void modExp(struct bn*  x, struct bn*   e, int eBits, struct bn*  m, int mBits, struct bn*  r2m,  struct bn*   out){
+//
+//    struct bn z,one;
+//    struct bn parr[3];
+//    struct bn zarr[3];
+//
+//    //reduce z?
+//    bignum_from_int(&z, 1);
+//    montMult(&z,r2m,m, mBits, &zarr[1]);
+//
+//    //reduce x, assign to p
+//    montMult(x,r2m,m, mBits,&parr[1]);
+//
+//    struct bn tm;
+//
+//    int i = 0;
+//    for(; i < eBits; i++){
+//
+//        bignum_assign(&tm, &parr[1]);
+//        montMult(&tm,&parr[1],m, mBits, &parr[2]);
+//
+//        if(bignum_getbit(e, i) == 1){
+//            montMult(&zarr[1],&parr[1],m,mBits,&zarr[2]);
+//        }else{
+//            bignum_assign(&zarr[2],&zarr[1]);
+//        }
+//
+//        //printf("num bits p: %d, num bits z: %d\n", bignum_numbits(&parr[1]), bignum_numbits(&zarr[1]));
+//        bignum_assign(&parr[1], &parr[2]);
+//        bignum_assign(&zarr[1], &zarr[2]);
+//    }
+//
+//    bignum_from_int(&one, 1);
+//    montMult(&zarr[1], &one, m, mBits, out);
+//}
+//
+//
+///* Mod Exp using precomputed LUT */
+//
+//void modExpLUT(struct bn*  x, struct bn*  e, int eBits, struct bn*  m, int mBits, struct bn*  r2m, struct bn*   out){
+//
+//    struct bn z,one;
+//    struct bn parr[3];
+//    struct bn zarr[3];
+//
+//    //reduce z?
+//    bignum_from_int(&z, 1);
+//    montMult(&z,r2m,m, mBits, &zarr[1]);
+//
+//    bignum_assign(&parr[1],&lut[0]);
+//
+//    int b = 1;
+//    int i = 0;
+//    for(; i < eBits; i++){
+//        bignum_assign(&parr[2],&lut[i+1]);
+//
+//        if(bignum_getbit(e, i) == 1){
+//            montMult(&zarr[1],&lut[i],m,mBits,&zarr[2]);
+//        }else{
+//            bignum_assign(&zarr[2],&zarr[1]);
+//        }
+//
+//        //printf("num bits p: %d, num bits z: %d\n", bignum_numbits(&parr[1]), bignum_numbits(&zarr[1]));
+//        bignum_assign(&parr[1], &parr[2]);
+//        bignum_assign(&zarr[1], &zarr[2]);
+//        b++;
+//    }
+//
+//    bignum_from_int(&one, 1);
+//    montMult(&zarr[1], &one, m, mBits, out);
+//}
+//
+///*
+//void genLUT(struct bn* valToDec, struct bn* m, int mBits, struct bn* r2m) {
+//
+//    struct bn two;
+//    bignum_from_int(&two, 2);
+//
+//    bignum_assign(&lut[0],valToDec);
+//
+//    int i;
+//    for (i = 1; i < (LUT_SIZE + 1); i++) {
+//        struct bn temp;
+//        bignum_assign(&temp,&lut[i-1]);
+//        struct bn tmp, tmp1, tmp2;
+//        bignum_assign(&tmp2, &two);
+//        bignum_pow(&temp, &tmp2, &tmp);
+//        bignum_mod(&tmp, m, &tmp1);
+//        bignum_assign(&lut[i],&tmp1);
+//    }
+//
+//    int a = 0;
+//    FILE *f;
+//    f = fopen("LUT.txt", "a");
+//
+//    for(; a < (mBits + 1); a++){
+//        montMult(&lut[a],r2m,m, mBits, &lut[a]);
+//        int size = 8192;
+//        char str[size];
+//        bignum_to_string(&lut[a],str, size);
+//        fprintf(f,"%s\n", str);
+//    }
+//
+//    fclose(f);
+//}
+//*/
+//
+//int parseLUT(int start, int num){
+//
+//    FILE *f;
+//    f = fopen("./LUT.txt", "r");
+//    char str[num+1];
+//    static char* zpad[8] = {"", "0", "00", "000", "0000", "00000", "000000", "0000000"};
+//    //printf("Parse LUT: %d to %d\n", start, num + start);
+//
+//    //puts("");
+//    int i = 0;
+//    int a = 0;
+//    for(; i < (num + start); i++){
+//        fscanf(f, "%s\n", str);
+//
+//        if(i >= start){
+//
+//            int len = strlen(str);
+//            char* dup;
+//            if((len & 1) == 1){
+//                sprintf(str, "%s%s", zpad[1], (dup = strdup(str)));
+//                len++;
+//            }
+//
+//            int lenMod8 = len -((len >> 3) << 3);
+//            if(lenMod8 != 0){
+//                sprintf(str, "%s%s", zpad[lenMod8], (dup = strdup(str)));
+//                len += lenMod8;
+//            }
+//            bignum_from_string(&lut[i-start],str,len);
+//        }
+//        a++;
+//    }
+//    return i;
+//}
+//
+//int main(void) {
+//
+//    /* ----------- 12-bit Test -------------- */
+//
+//    struct bn n,e,d,r2m;
+//    bignum_from_int(&n, 3233);             //modulus
+//    bignum_from_int(&e, 17);            //public
+//    bignum_from_int(&d, 2753);            //private
+//    bignum_from_int(&r2m, 1179);        //R^2m mod M
+//
+//    struct bn valToDec;                    //value to decrypt/encrypt
+//    bignum_from_int(&valToDec, 855);
+//
+//    int nBits = bignum_numbits(&n);
+//    int dBits = bignum_numbits(&d);
+//
+//    struct bn result;
+//    bignum_init(&result);
+//
+//    clock_t before = clock();
+//    modExp(&valToDec, &d, dBits, &n, nBits, &r2m, &result);
+//    clock_t after = clock();
+//
+//    double msec = (double)(after - before) / CLOCKS_PER_SEC;
+//
+//    //print result and timing
+//    printf("-------Test 1--------\n");
+//    printf(" RSA Keysize: %3d [bits]\n",dBits);
+//    printf("  RSA Result: ");
+//    bignum_print(&result);
+//    printf("Time(no LUT):  %.5f [sec]\n", msec);
+//
+//    bignum_init(&result);
+//
+//    //genLUT(&valToDec, &n, nBits, &r2m);
+//    int lutSeek = parseLUT(0,dBits+1);
+//    //printf("Lutseek: %d\n", lutSeek);
+//
+//    before = clock();
+//    modExpLUT(&valToDec, &d, dBits, &n, nBits, &r2m, &result);
+//    after = clock();
+//
+//    msec = (double)(after - before) / CLOCKS_PER_SEC;
+//
+//    //print result and timing
+//    printf("  RSA Result: ");
+//    bignum_print(&result);
+//    printf("   Time(LUT):  %.5f [sec]\n", msec);
+//
+//    /* ----------- 512-bit Modulus Test -------------- */
+//
+//
+//    struct bn n512, pub, priv, v2Dec;
+//    int e1 = 65537;
+//    bignum_from_int(&pub,e1);
+//    char str1[] = "758463d46999c11496449db8dddd1e407de2e9a8f33612f454866acddd759da8173d4e3fe8c4eaf121f86f87ac8e1d58f54e2c6a80bcf8c404884795252224ad";
+//    bignum_from_string(&n512,str1, 128);
+//    char str2[] = "68827b718d1452d4e72a5085f6b14dd516df34e3ae9fb94d96da0fa3d33e651cc244b0275a24ab0753b5c01eac2f8f0d700c587bbd6d8aeb6a4e99e1a9372655";
+//    bignum_from_string(&priv,str2, 128);
+//    char str3[] = "45462476f31c3dfde5ac5fde4862d33d917f52255d80555b543584a32b71762a1fc719a341c0e925e9fff02a657764ae78b143d324cfc8892695c55801237885";
+//    bignum_from_string(&v2Dec,str3,128);
+//    char r2ms[] = "47395beb0ae85106f9f8548040a9b165d9a37499d0d98a14a5bcd0b943d0549be18b2ced65bfc42db40331f3ec67faf9cccf19e51d3ef7a09e03ebb1855d5e5e";
+//    bignum_from_string(&r2m,r2ms,128);
+//
+//    nBits = bignum_numbits(&n512);
+//    dBits = bignum_numbits(&priv);
+//
+//    bignum_init(&result);
+//
+//    clock_t before1 = clock();
+//    modExp(&v2Dec, &priv,dBits, &n512, nBits, &r2m, &result);
+//    clock_t after1 = clock();
+//
+//    double msec1 = (float)(after1 - before1) / CLOCKS_PER_SEC;
+//
+//    //print result and timing
+//    printf("-------Test 2--------\n");
+//    printf(" RSA Keysize: %4d [bits]\n",dBits);
+//    printf("  RSA Result: ");
+//    bignum_print(&result);
+//    printf("Time(no LUT):  %.5f [sec]\n", msec1);
+//
+//    bignum_init(&result);
+//
+//    //genLUT(&v2Dec, &n512, nBits, &r2m);
+//    lutSeek = parseLUT(lutSeek,dBits+1);
+//
+//
+//    before1 = clock();
+//    modExpLUT(&v2Dec, &priv,dBits, &n512, nBits, &r2m, &result);
+//    after1 = clock();
+//
+//    msec1 = (float)(after1 - before1) / CLOCKS_PER_SEC;
+//
+//    //print result and timing
+//    printf("  RSA Result: ");
+//    bignum_print(&result);
+//    printf("   Time(LUT):  %.5f [sec]\n", msec1);
+//
+//
+//    /* ----------- 1024-bit Modulus Test -------------- */
+//
+//    struct bn n1024, pub1, priv1, v2Dec1;
+//
+//    bignum_from_int(&pub1,e1);
+//    char str4[] = "79eec1e33a41bf4592557bb1991b1830d4b445f55e3c9e683afc7a7f4abf05549a5e7ea811f8c3faf58450c2eafce1a25c5eb49821d0f930247ef2c6a6e426f01f91a6090292a433d84b93a1e6c5ba933c48f48923aa727f3de18c5fa4f1c0f7cce43cf407f94ee1d316d572b4428c7399158b76fa15f8b3dfbb36bd5f4bc5d1";
+//    bignum_from_string(&n1024,str4, 256);
+//    char str5[] = "233c05371e4c85731b382c88438ffacb918b8e73bb099554d546c43728684ea805fbac69f0d78bfa671c17225c393b1269d2cc28f20cab1568566edd4cb8bd2f59e4b25f4b3787af54e002216bc42a34a2bdbd7bfe4ddab35dde5256fc7bfbc1b39f641c86e99950768214e69b18f806b0d200908484eb7cf6e817ab57400861";
+//    bignum_from_string(&priv1,str5, 256);
+//    char str6[] = "4e29e645da6efddda068a8dcfceea970a5e86f7b518655cd3fba103d6899618a6b7caa86df16f28f7bdadbe2ad250794c9f20c9c42338624ab077f9f9ae3733a5c3bf8b4686b56cfe635be0010bf734fdc2a4f2ce5cf920fd4e79c6b7330a8fc2025e61d33dd8b3056390a2226d9d9eaec37f7aea1682f25120c260ecb165823";
+//    bignum_from_string(&v2Dec1,str6,256);
+//    char r2ms1[] = "1a32ca1d9343f9ac08567501d91b0b29540e5e6914aaf46c460b92007b6264ca7a4be15e5346933dd2865022a2535729ea817c215f80714384b8235705b88bc3a295fe00ae789bd241d5816e5d617c362a2ed1bdd8b45ca26f558a987de829afe0253c33b6a7bab59c35429c29c4ab63a0ab16c7f8c4b9319f6f1947266522a5";
+//    bignum_from_string(&r2m,r2ms1,256);
+//
+//    dBits = bignum_numbits(&priv1);
+//    nBits = bignum_numbits(&n1024);
+//
+//    bignum_init(&result);
+//
+//    clock_t before2 = clock();
+//    modExp(&v2Dec1, &priv1,dBits, &n1024, nBits, &r2m, &result);
+//    clock_t after2 = clock();
+//
+//    msec1 = (float)(after2 - before2) / CLOCKS_PER_SEC;
+//
+//    //print result and timing
+//    printf("-------Test 3--------\n");
+//    printf(" RSA Keysize: %5d [bits]\n",dBits);
+//    printf("  RSA Result: ");
+//    bignum_print(&result);
+//    printf("Time(no LUT):  %.5f [sec]\n", msec1);
+//    bignum_init(&result);
+//
+//    //genLUT(&v2Dec1, &n1024, nBits, &r2m);
+//    lutSeek = parseLUT(lutSeek,dBits+2);
+//    before2 = clock();
+//    modExpLUT(&v2Dec1, &priv1,dBits, &n1024, nBits,&r2m, &result);
+//    after2 = clock();
+//
+//    msec1 = (float)(after2 - before2) / CLOCKS_PER_SEC;
+//
+//    //print result and timing
+//    printf("  RSA Result: ");
+//    bignum_print(&result);
+//    printf("   Time(LUT):  %.5f [sec]\n", msec1);
+//
+//
+//    /* ----------- 2048-bit Modulus Test -------------- */
+//
+//    struct bn n2048, pub2, priv2, v2Dec2;
+//
+//    bignum_from_int(&pub2,e1);
+//    char str7[] = "bc07d529450214ef63a8d61966987e8ca0594d9a7ec4f1881117b4f8ecbdc74b8769f6c98bfe931c9474116be8bd36527acfd95f6633d12cc8a960ab3d3e7a0b4b3e4990b594ee61af3b56315337501225525fb997b65c38118d614601dcb8bd631673a510498f2c3dab44d723d8b6daa697d0108e7fcb4d27525f386e7fcd9ce29c4ab12c4258aa77872259a25804791a1eaef54b65226ec84765442ac839db30467d86910e700d802807de1f4fef5235738d66359cb0a2707cb9cd90e90bb1f2d0d807aafbd048b1ddbb156d4984cfbbaa9a435b9230d213140dd5be64b5e594945d474665eaf5267fc598a5f75b99f83b029971b80c4149891d43abe62b95";
+//    bignum_from_string(&n2048,str7, 512);
+//    char str8[] = "9b78ea83264133684182400d5eaca6aec68330cc97176712f7f71f3758210f61df44f9beead78372753987922f2e0c75a480aa1edc95e9d65ad0da529ce044ef83b6ac03507125ae75c2dd61098ac9d54730d65fd21702278633dd8392549c18548f22ee100a92aca50d316da68131a897691dac22f77df57c96fa8ee1a7212db313396410a5c9c8a31f6f940724cff2b2db5eb078eedad92b6ff29a8636fcd370e99773e96168f34839693f84b7a083597bfbe0f674c79b2348b038ca730cada30bcf2dd9cde27dd555891d3cc10b7831b23e7cda163570635727f11d569492a201f55c56d9a92d46f71b6ecea30f28f8c040f834a2da43f72a1ec927df9441";
+//    bignum_from_string(&priv2,str8, 512);
+//    char str9[] = "4bd1a139b2ae5bcae58410ae32ce65e41ef226d30bb2d020e1cb02387f13985d4f18d154444954a4831a26870d1671f54f3ff87efe3adc66cac098160a524674740948d6ba8466054cce1d27018bbd5dd9c4b58def7d62cf8c0d6621ad846324b72a0414c56843075c4199c5963f55977ea6437a501afe3eebf150b1e2cbbbcde4be89e1ce8e72f8a334297224418e29ad44882a99ba59c5eb481b5faeeebd423b5bdab6c7edb288e8ab42f01a18cef3521c3cbcad8fde05e2f189070725c13716112b7497bb27250fe4141b41de67e0b0fe1763e0831ecf692ad1ff18e5f0186a6e7729ec84e7b9e2483838be73cbe4fea67fa186b329bfd1434dad528524a4";
+//    bignum_from_string(&v2Dec2,str9,512);
+//    char r2ms2[]= "10e2f70a5f5ea34371cd7f6d36ce95604746f2aa503bc45369201212a4006df4433827b085890ed3a614058df7af4caa9a988bb5cbc49179e0a4e76b046926b3f700532e1ed1d191985176c2cd7f9600f45eb96323d975060c44f06ef3bdaa220957e7905c5641276e7752e7e503f930cb49a4abe90cae46270a41e17964206bd6edaa7943b32237d2bfa4063060b388424944ec21c7c2f3bc29554214dee86c848116fc1fd28b60b0b438aa8bc8303c0788ea216bace026f78c09aa10b139a5ee415aa73888ac15157ab9a355eda90b7838e8cddb44a626d1c17a203eaf3c64be524f077df6892984a7198b9c3ba31228bb49259162572747ec51e5b49849ae";
+//    bignum_from_string(&r2m,r2ms2,512);
+//
+//    dBits = bignum_numbits(&priv2);
+//    nBits = bignum_numbits(&n2048);
+//
+//    bignum_init(&result);
+//
+//    clock_t before3 = clock();
+//    modExp(&v2Dec2, &priv2,dBits, &n2048, nBits, &r2m, &result);
+//    clock_t after3 = clock();
+//
+//    msec1 = (float)(after3 - before3) / CLOCKS_PER_SEC;
+//
+//    //print result and timing
+//    printf("-------Test 4--------\n");
+//    printf(" RSA Keysize: %6d [bits]\n",dBits);
+//    printf("  RSA Result: ");
+//    bignum_print(&result);
+//    printf("Time(no LUT):  %.5f [sec]\n", msec1);
+//    bignum_init(&result);
+//
+//    //genLUT(&v2Dec2, &n2048, nBits, &r2m);
+//    parseLUT(lutSeek, dBits+1);
+//    before3 = clock();
+//    modExpLUT(&v2Dec2, &priv2,dBits, &n2048, nBits, &r2m, &result);
+//    after3 = clock();
+//
+//    msec1 = (float)(after3 - before3) / CLOCKS_PER_SEC;
+//
+//    //print result and timing
+//    printf("  RSA Result: ");
+//    bignum_print(&result);
+//    printf("   Time(LUT):  %.5f [sec]\n", msec1);
+//
+//
+//
+//    return 0;
+//}
+//
+//
+//
+
+
+
+
+int main(int argc, char** argv)
+{
+    
+    //opencl_tests();
+    
+    rsa_tests();
+    
+   // montgomery_test();
+    
+    return 0;
+}
+ 
+ 
+
+//
+//
+//#define NEED_LIBGCRYPT_VERSION "1.10.2"
+//
+//int main(int argc, char** argv)
+//{
+//    
+//    gcry_control (GCRYCTL_DISABLE_HWF, "intel-cpu", NULL);
+//    
+//    /* Version check should be the very first call because it
+//     makes sure that important subsystems are initialized.
+//     #define NEED_LIBGCRYPT_VERSION to the minimum required version. */
+//    if (!gcry_check_version (NEED_LIBGCRYPT_VERSION))
+//    {
+//        fprintf (stderr, "libgcrypt is too old (need %s, have %s)\n",
+//                 NEED_LIBGCRYPT_VERSION, gcry_check_version (NULL));
+//        exit (2); }
+//    /* Disable secure memory.  */
+//    gcry_control (GCRYCTL_DISABLE_SECMEM, 0);
+//    /* ... If required, other initialization goes here.  */
+//    /* Tell Libgcrypt that initialization has completed. */
+//    gcry_control (GCRYCTL_INITIALIZATION_FINISHED, 0);
+//    
+//   
+//    char *template = "(genkey(rsa(nbits 4:2048)))";
+//        gcry_sexp_t parms;
+//        
+//        gcry_sexp_new(&parms, template, strlen(template), 1);
+//        
+//        // contains key material
+//        gcry_sexp_t key;
+//        
+//        gcry_pk_genkey(&key,parms);
+//        
+//      //  show_sexp("\n", key);
+//        
+//        // create a large number, we want to encrypt it
+//        
+//        
+//        char *val = "1234567890ABCDEF";
+//        gcry_mpi_t mpi = gcry_mpi_new((int)strlen(val) * 8);
+//        size_t scanned = 0;
+//        
+//        gcry_mpi_scan(&mpi, GCRYMPI_FMT_HEX, val, 0, &scanned);
+//        
+//        gcry_sexp_t toSign;
+//        size_t errOff = 0;
+//        char *dataformat = "(data (flags raw) (value %m))";
+//        
+//        gcry_sexp_build(&toSign,&errOff,dataformat,mpi);
+//        
+//       // show_sexp("\n", toSign);
+//        
+//        // MARK: RSA verification
+//        
+//        // use gcry_rsa_sign  – without padding?
+//        gcry_sexp_t resSign;
+//        
+//      //  clock_t start_0 = clock();
+//        /*Do something*/
+//        
+//        gcry_pk_sign(&resSign, toSign, key);
+//        
+//       // clock_t end_0 = clock();
+//       // float seconds_0 = (float)(end_0 - start_0) / CLOCKS_PER_SEC;
+//        
+//       // printf("\nSign 0 (gcrpypt):  %f sec\n", seconds_0);
+//        
+//        
+//        
+//       // show_sexp("\n", resSign);
+//        
+//        // measure time
+//        
+//        // do the same thing with our bigNum library – do we even get the same signature?
+//        
+//        // these must be freed manually
+//        gcry_mpi_t n_mpi;
+//        gcry_mpi_t e_mpi;
+//        gcry_mpi_t d_mpi;
+//        
+//        gcry_sexp_extract_param(key,NULL,"n e d",&n_mpi, &e_mpi, &d_mpi, NULL);
+//        
+//        gcry_mpi_t sig_mpi;
+//        
+//        gcry_sexp_extract_param(resSign,NULL,"s",&sig_mpi, NULL);
+//        
+//        
+//        // may be a lot shorter – these will contain the numbers in HEX string form – for use in my bigNum
+//        unsigned char *n = malloc(2048);
+//        unsigned char *e = malloc(2048);
+//        unsigned char *d = malloc(2048);
+//        size_t nL = 0;
+//        
+//        // check returns
+//        gcry_mpi_print(GCRYMPI_FMT_HEX,n,2048,&nL,n_mpi);
+//        gcry_mpi_print(GCRYMPI_FMT_HEX,e,2048,&nL,e_mpi);
+//        gcry_mpi_print(GCRYMPI_FMT_HEX,d,2048,&nL,d_mpi);
+//        
+//        unsigned char *sgn = malloc(2048);
+//        gcry_mpi_print(GCRYMPI_FMT_HEX,sgn,2048,&nL,sig_mpi);
+//        
+//    
+//        test(e,d,n);
+//    
+//    
+//    return 0;
+//}
+//
+//
+//
+//
diff --git a/source/opencl-test.c b/source/opencl-test.c
@@ -0,0 +1,211 @@
+//
+//  opencl-test.c
+//  hello
+//
+//  Created by Cedric Zwahlen on 28.09.2023.
+//
+
+#include "opencl-test.h"
+
+#define DATA_SIZE (1024)
+
+int opencl_tests(void) {
+    
+    int err;                            // error code returned from api calls
+      
+    float data[DATA_SIZE];              // original data set given to device
+    float results[DATA_SIZE];           // results returned from device
+    unsigned int correct;               // number of correct results returned
+
+    size_t global;                      // global domain size for our calculation
+    size_t local;                       // local domain size for our calculation
+
+    cl_device_id device_id;             // compute device id
+    cl_context context;                 // compute context
+    cl_command_queue commands;          // compute command queue
+    cl_program program;                 // compute program
+    cl_kernel kernel;                   // compute kernel
+    
+    cl_mem input;                       // device memory used for the input array
+    cl_mem output;                      // device memory used for the output array
+    
+    // Fill our data set with random float values
+    //
+    int i = 0;
+    unsigned int count = DATA_SIZE;
+    for(i = 0; i < count; i++)
+        data[i] = rand() / (float)RAND_MAX;
+    
+    // Connect to a compute device
+    //
+    int gpu = 1;
+    err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL);
+    if (err != CL_SUCCESS)
+    {
+        printf("Error: Failed to create a device group!\n");
+        return EXIT_FAILURE;
+    }
+    
+    size_t retSize = 0;
+    clGetDeviceInfo(device_id, CL_DRIVER_VERSION, 0, NULL, &retSize);
+
+    char driver_version[retSize];
+    clGetDeviceInfo(device_id, CL_DRIVER_VERSION, retSize, &driver_version, &retSize);
+    
+    //char *driver_version;
+    //clGetDeviceInfo(device_id, CL_DRIVER_VERSION, NULL, &driver_version, NULL);
+    printf("%s\n", driver_version);
+    
+  
+    // Create a compute context
+    //
+    context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
+    if (!context)
+    {
+        printf("Error: Failed to create a compute context!\n");
+        return EXIT_FAILURE;
+    }
+
+    // Create a command commands
+    //
+    commands = clCreateCommandQueue(context, device_id, 0, &err);
+    if (!commands)
+    {
+        printf("Error: Failed to create a command commands!\n");
+        return EXIT_FAILURE;
+    }
+    
+    // get the kernel from a file instead of a constant
+    
+
+    FILE *fp = fopen("rsa-kernel.cl", "r");
+    fseek(fp, 0L, SEEK_END);
+    size_t sz = ftell(fp);
+    rewind(fp);
+    
+    char *kernelBuf = malloc(sz);
+    fread(kernelBuf, sizeof(char), sz, fp);
+    fclose(fp);
+
+    // Create the compute program from the source buffer
+    //
+    //program = clCreateProgramWithSource(context, 1, (const char **) & KernelSource, NULL, &err);
+    program = clCreateProgramWithSource(context, 1, (const char **) & kernelBuf, NULL, &err);
+    if (!program)
+    {
+        printf("Error: Failed to create compute program!\n");
+        return EXIT_FAILURE;
+    }
+
+    // Build the program executable
+    //
+    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        size_t len;
+        char buffer[2048];
+
+        printf("Error: Failed to build program executable!\n");
+        clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
+        printf("%s\n", buffer);
+        exit(1);
+    }
+
+    // Create the compute kernel in the program we wish to run
+    //
+    kernel = clCreateKernel(program, "square", &err);
+    if (!kernel || err != CL_SUCCESS)
+    {
+        printf("Error: Failed to create compute kernel!\n");
+        exit(1);
+    }
+
+    // Create the input and output arrays in device memory for our calculation
+    //
+    input = clCreateBuffer(context,  CL_MEM_READ_ONLY,  sizeof(float) * count, NULL, NULL);
+    output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * count, NULL, NULL);
+    if (!input || !output)
+    {
+        printf("Error: Failed to allocate device memory!\n");
+        exit(1);
+    }
+    
+    // Write our data set into the input array in device memory
+    //
+    err = clEnqueueWriteBuffer(commands, input, CL_TRUE, 0, sizeof(float) * count, data, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        printf("Error: Failed to write to source array!\n");
+        exit(1);
+    }
+
+    // Set the arguments to our compute kernel
+    //
+    err = 0;
+    err  = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input);
+    err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
+    err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count);
+    if (err != CL_SUCCESS)
+    {
+        printf("Error: Failed to set kernel arguments! %d\n", err);
+        exit(1);
+    }
+
+    // Get the maximum work group size for executing the kernel on the device
+    //
+    err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL);
+    if (err != CL_SUCCESS)
+    {
+        printf("Error: Failed to retrieve kernel work group info! %d\n", err);
+        exit(1);
+    }
+
+    // Execute the kernel over the entire range of our 1d input data set
+    // using the maximum number of work group items for this device
+    //
+    global = count;
+    err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
+    if (err)
+    {
+        printf("Error: Failed to execute kernel!\n");
+        return EXIT_FAILURE;
+    }
+
+    // Wait for the command commands to get serviced before reading back results
+    //
+    clFinish(commands);
+
+    // Read back the results from the device to verify the output
+    //
+    err = clEnqueueReadBuffer( commands, output, CL_TRUE, 0, sizeof(float) * count, results, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        printf("Error: Failed to read output array! %d\n", err);
+        exit(1);
+    }
+    
+    // Validate our results
+    //
+    correct = 0;
+    for(i = 0; i < count; i++)
+    {
+        if(results[i] == data[i] * data[i])
+            correct++;
+    }
+    
+    
+    // Print a brief summary detailing the results
+    //
+    printf("Computed '%d/%d' correct values!\n", correct, count);
+    
+    // Shutdown and cleanup
+    //
+    clReleaseMemObject(input);
+    clReleaseMemObject(output);
+    clReleaseProgram(program);
+    clReleaseKernel(kernel);
+    clReleaseCommandQueue(commands);
+    clReleaseContext(context);
+    
+    
+}
diff --git a/source/opencl-test.h b/source/opencl-test.h
@@ -0,0 +1,25 @@
+//
+//  opencl-test.h
+//  hello
+//
+//  Created by Cedric Zwahlen on 28.09.2023.
+//
+
+#ifndef opencl_test_h
+#define opencl_test_h
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <OpenCL/opencl.h>
+
+#include "big-int-test.h"
+
+int opencl_tests(void);
+
+#endif /* opencl_test_h */
diff --git a/source/rsa-test.c b/source/rsa-test.c
@@ -0,0 +1,276 @@
+//
+//  rsa-test.c
+//  hello
+//
+//  Created by Cedric Zwahlen on 28.09.2023.
+//
+
+#include "rsa-test.h"
+#include "big-int-test.h"
+
+
+#include "ctype.h"
+#include "time.h"
+
+
+
+#define NEED_LIBGCRYPT_VERSION "1.10.2"
+
+
+
+void rsa_tests(void) {
+    
+    // MARK: UNSAFE init
+    
+    // consider disabling optimizations, since they dont make for a fair comparison
+    
+    gcry_control (GCRYCTL_DISABLE_HWF, "intel-cpu", NULL);
+    
+    /* Version check should be the very first call because it
+     makes sure that important subsystems are initialized.
+     #define NEED_LIBGCRYPT_VERSION to the minimum required version. */
+    if (!gcry_check_version (NEED_LIBGCRYPT_VERSION))
+    {
+        fprintf (stderr, "libgcrypt is too old (need %s, have %s)\n",
+                 NEED_LIBGCRYPT_VERSION, gcry_check_version (NULL));
+        exit (2); }
+    /* Disable secure memory.  */
+    gcry_control (GCRYCTL_DISABLE_SECMEM, 0);
+    /* ... If required, other initialization goes here.  */
+    /* Tell Libgcrypt that initialization has completed. */
+    gcry_control (GCRYCTL_INITIALIZATION_FINISHED, 0);
+    
+    
+    char *template = "(genkey(rsa(nbits 4:2048)))";
+    gcry_sexp_t parms;
+    
+    gcry_sexp_new(&parms, template, strlen(template), 1);
+    
+    // contains key material
+    gcry_sexp_t key;
+    
+    gcry_pk_genkey(&key,parms);
+    
+  //  show_sexp("\n", key);
+    
+    // create a large number, we want to encrypt it
+    
+    
+    char *val = "1234567890ABCDEF1234567890969";
+    gcry_mpi_t mpi = gcry_mpi_new((int)strlen(val) * 8);
+    size_t scanned = 0;
+    
+    gcry_mpi_scan(&mpi, GCRYMPI_FMT_HEX, val, 0, &scanned);
+    
+    gcry_sexp_t toSign;
+    size_t errOff = 0;
+    char *dataformat = "(data (flags raw) (value %m))";
+    
+    gcry_sexp_build(&toSign,&errOff,dataformat,mpi);
+    
+   // show_sexp("\n", toSign);
+    
+    // MARK: RSA verification
+    
+    // use gcry_rsa_sign  – without padding?
+    gcry_sexp_t resSign;
+    
+   
+    
+    gcry_pk_sign(&resSign, toSign, key);
+    
+    
+   // show_sexp("\n", resSign);
+    
+    // measure time
+    
+    // do the same thing with our bigNum library – do we even get the same signature?
+    
+    // these must be freed manually
+    gcry_mpi_t n_mpi;
+    gcry_mpi_t e_mpi;
+    gcry_mpi_t d_mpi;
+    
+    gcry_sexp_extract_param(key,NULL,"n e d",&n_mpi, &e_mpi, &d_mpi, NULL);
+    
+    gcry_mpi_t sig_mpi;
+    
+    gcry_sexp_extract_param(resSign,NULL,"s",&sig_mpi, NULL);
+    
+    
+    // may be a lot shorter – these will contain the numbers in HEX string form – for use in my bigNum
+    unsigned char *n = malloc(2048);
+    unsigned char *e = malloc(2048);
+    unsigned char *d = malloc(2048);
+    size_t nL = 0;
+    
+    // check returns
+    gcry_mpi_print(GCRYMPI_FMT_HEX,n,2048,&nL,n_mpi);
+    gcry_mpi_print(GCRYMPI_FMT_HEX,e,2048,&nL,e_mpi);
+    gcry_mpi_print(GCRYMPI_FMT_HEX,d,2048,&nL,d_mpi);
+    
+    unsigned char *sgn = malloc(2048);
+    gcry_mpi_print(GCRYMPI_FMT_HEX,sgn,2048,&nL,sig_mpi);
+    
+  //  printf("%s",n);
+    
+    // use 'real time' – not clock time
+    
+   
+    
+    bigNum_tests(n, val, d);
+    
+    
+    struct timespec t1, t2;
+  
+    clock_gettime(CLOCK_REALTIME, &t1);
+    
+   
+    
+    if (verify(sgn, e, n, val)) {
+        
+        printf("\nverification failed\n");
+        
+    } else {
+        
+        printf("\nverification successful\n");
+        
+    }
+    
+    clock_gettime(CLOCK_REALTIME, &t2);
+    
+    
+    float seconds_1 = (t2.tv_nsec - t1.tv_nsec) / 1000;
+    
+    printf("\nSign 1 (my algorithm):  %f micro seconds\n", seconds_1);
+    
+    
+    clock_gettime(CLOCK_REALTIME, &t1);
+    // maybe we want to do more here
+    if (gcry_pk_verify(resSign, toSign, key)) {
+        
+        printf("\nverification failed\n");
+        
+    } else {
+        
+        printf("\nverification successful\n");
+        
+    }
+    
+    clock_gettime(CLOCK_REALTIME, &t2);
+    float seconds_0 = (t2.tv_nsec - t1.tv_nsec) / 1000;
+    printf("\nSign 0 (gcrpypt):  %f micro seconds\n", seconds_0);
+    
+    // try to put it onto the gpu
+    
+    return;
+    
+}
+
+
+//void bigNum_tests(void) {
+void bigNum_tests(unsigned char* nn,unsigned char* ee,unsigned char* dd) {
+    
+    
+    // MARK: BIG NUM TESTs
+    
+    DIGIT_T N [MAX_ALLOC_SIZE*2];
+    DIGIT_T e [MAX_ALLOC_SIZE*2];
+    DIGIT_T d [MAX_ALLOC_SIZE*2];
+    
+    DIGIT_T res [MAX_ALLOC_SIZE*2];
+    
+    mpSetZero(N, MAX_ALLOC_SIZE*2);
+    mpSetZero(e, MAX_ALLOC_SIZE*2);
+    mpSetZero(d, MAX_ALLOC_SIZE*2);
+    
+    mpSetZero(res, MAX_ALLOC_SIZE*2);
+    /*
+    
+    char* nn = "E08973398DD8F5F5E88776397F4EB005BB5383DE0FB7ABDC7DC775290D052E6D12DFA68626D4D26FAA5829FC97ECFA82510F3080BEB1509E4644F12CBBD832CFC6686F07D9B060ACBEEE34096A13F5F7050593DF5EBA3556D961FF197FC981E6F86CEA874070EFAC6D2C749F2DFA553AB9997702A648528C4EF357385774575F";
+    
+    char* ee = "010001";
+    
+    char* dd = "A403C327477634346CA686B57949014B2E8AD2C862B2C7D748096A8B91F736F275D6E8CD15906027314735644D95CD6763CEB49F56AC2F376E1CEE0EBF282DF439906F34D86E085BD5656AD841F313D72D395EFE33CBFF29E4030B3D05A28FB7F18EA27637B07957D32F2BDE8706227D04665EC91BAF8B1AC3EC9144AB7F21";
+    */
+    mpConvFromHex(N, strlen(nn), nn);
+    mpConvFromHex(e, strlen(ee), ee);
+    mpConvFromHex(d, strlen(dd), dd);
+    
+    size_t sz_n = mpSizeof(N, MAX_ALLOC_SIZE*2);
+    size_t sz_d = mpSizeof(d, MAX_ALLOC_SIZE*2);
+    
+    //mpModMult(res, e, d, N, max(sz_d,sz_n)); // that works :)
+    
+    mpModExpO(res, e, d, N, max(sz_d,sz_n));
+    
+    size_t sz_res = mpSizeof(res, MAX_ALLOC_SIZE*2);
+   
+    char* pref = "\nHEX:\n";
+    char* suf = "\n";
+   // mpPrintHex(pref, res, sz_res, suf);
+    
+    
+    
+}
+
+int verify(unsigned char* sign, unsigned char* ee, unsigned char* nn, unsigned char* mm) {
+    
+    
+    DIGIT_T N [MAX_ALLOC_SIZE*2];
+    DIGIT_T e [MAX_ALLOC_SIZE*2];
+    DIGIT_T s [MAX_ALLOC_SIZE*2];
+    
+    DIGIT_T res [MAX_ALLOC_SIZE*2];
+    
+    mpSetZero(N, MAX_ALLOC_SIZE*2);
+    mpSetZero(e, MAX_ALLOC_SIZE*2);
+    mpSetZero(s, MAX_ALLOC_SIZE*2);
+    
+    mpSetZero(res, MAX_ALLOC_SIZE*2);
+  
+    mpConvFromHex(N, strlen(nn), nn);
+    mpConvFromHex(e, strlen(ee), ee);
+    mpConvFromHex(s, strlen(sign), sign);
+    
+    size_t sz_n = mpSizeof(N, MAX_ALLOC_SIZE*2);
+    size_t sz_s = mpSizeof(s, MAX_ALLOC_SIZE*2);
+    
+    //mpModMult(res, e, d, N, max(sz_d,sz_n)); // that works :)
+    
+    mpModExpO(res, s, e, N, max(sz_s,sz_n));
+    
+    size_t sz_res = mpSizeof(res, MAX_ALLOC_SIZE*2);
+    
+    int sz_mm = strlen(mm) + 2;
+    
+    unsigned char comp[sz_mm];
+    
+    mpConvToHex(res, sz_res, comp, sz_mm);
+    
+    upper(comp);
+    
+    return strcmp(comp, mm);
+}
+
+static void show_sexp(const char *prefix, gcry_sexp_t a) {
+     char *buf;
+     size_t size;
+
+     if (prefix)
+         fputs(prefix, stderr);
+     size = gcry_sexp_sprint(a, GCRYSEXP_FMT_ADVANCED, NULL, 0);
+     buf = gcry_xmalloc(size);
+
+     gcry_sexp_sprint(a, GCRYSEXP_FMT_ADVANCED, buf, size);
+     fprintf(stderr, "%.*s", (int) size, buf);
+     gcry_free(buf);
+ }
+
+static void upper(unsigned char* str) {
+    
+    for(int i = 0; str[i]; i++){
+      str[i] = toupper(str[i]);
+    }
+    
+}
diff --git a/source/rsa-test.h b/source/rsa-test.h
@@ -0,0 +1,27 @@
+//
+//  rsa-test.h
+//  hello
+//
+//  Created by Cedric Zwahlen on 28.09.2023.
+//
+
+#ifndef rsa_test_h
+#define rsa_test_h
+
+#include <stdio.h>
+
+#include <gcrypt.h>
+
+void rsa_tests(void);
+
+static void show_sexp(const char *prefix, gcry_sexp_t a);
+
+void bigNum_tests(unsigned char* n, unsigned char* e, unsigned char* d);
+
+void montgomery_test(void);
+
+int verify(unsigned char* sign, unsigned char* ee, unsigned char* nn, unsigned char* mm);
+
+static void upper(unsigned char* str);
+
+#endif /* rsa_test_h */
diff --git a/xcode/.DS_Store b/xcode/.DS_Store
Binary files differ.
diff --git a/xcode/ReadMe.txt b/xcode/ReadMe.txt
@@ -0,0 +1 @@
+### OpenCL Hello World Example ###===========================================================================DESCRIPTION:A simple "Hello World" compute example showing basic usage of OpenCL whichcalculates the mathematical square (X[i] = pow(X[i],2)) for a buffer offloating point values.For simplicity, this example is intended to be run from the command line.If run from within XCode, open the Run Log (Command-Shift-R) to see the output.  Alternatively, run the applications from within a Terminal.app session to launch from the command line.===========================================================================BUILD REQUIREMENTS:Mac OS X v10.6 or later===========================================================================RUNTIME REQUIREMENTS:Mac OS X v10.6 or laterTo use the GPU as a compute device, use one of the following devices:- MacBook Pro w/NVidia GeForce 8600M - Mac Pro w/NVidia GeForce 8800GT===========================================================================PACKAGING LIST:ReadMe.txthello.chello.xcodeproj===========================================================================CHANGES FROM PREVIOUS VERSIONS:Version 1.0- First version.===========================================================================Copyright (C) 2008 Apple Inc. All rights reserved.
+\ No newline at end of file
diff --git a/xcode/lib-gpu-verify.xcodeproj/project.pbxproj b/xcode/lib-gpu-verify.xcodeproj/project.pbxproj
@@ -0,0 +1,328 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 54;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		6A8A795D2A89357400116D7D /* rsa-kernel.cl in Sources */ = {isa = PBXBuildFile; fileRef = 6A8A795C2A89357400116D7D /* rsa-kernel.cl */; };
+		6A8A795F2A89672700116D7D /* modexp.cl in Sources */ = {isa = PBXBuildFile; fileRef = 6A8A795E2A89672700116D7D /* modexp.cl */; };
+		6AF7487A2ADADEBD00D58E08 /* lib-gpu-verify.c in Sources */ = {isa = PBXBuildFile; fileRef = 6AF748792ADADEBD00D58E08 /* lib-gpu-verify.c */; };
+		6AF748822ADADF4500D58E08 /* big-int-test.c in Sources */ = {isa = PBXBuildFile; fileRef = 6AF7487D2ADADF4500D58E08 /* big-int-test.c */; };
+		6AF748832ADADF4500D58E08 /* rsa-test.c in Sources */ = {isa = PBXBuildFile; fileRef = 6AF7487F2ADADF4500D58E08 /* rsa-test.c */; };
+		6AF748862ADADFAD00D58E08 /* opencl-test.c in Sources */ = {isa = PBXBuildFile; fileRef = 6AF748852ADADFAD00D58E08 /* opencl-test.c */; };
+		C3770EFD0E6F1138009A5A77 /* OpenCL.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = C3770EFC0E6F1138009A5A77 /* OpenCL.framework */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXCopyFilesBuildPhase section */
+		C39444690DAFF5A0008FFE68 /* CopyFiles */ = {
+			isa = PBXCopyFilesBuildPhase;
+			buildActionMask = 2147483647;
+			dstPath = "";
+			dstSubfolderSpec = 16;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXCopyFilesBuildPhase section */
+
+/* Begin PBXFileReference section */
+		466E0F5F0C932E1A00ED01DB /* lib-gpu-verify */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "lib-gpu-verify"; sourceTree = BUILT_PRODUCTS_DIR; };
+		6A8A795C2A89357400116D7D /* rsa-kernel.cl */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.opencl; path = "rsa-kernel.cl"; sourceTree = "<group>"; };
+		6A8A795E2A89672700116D7D /* modexp.cl */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.opencl; path = modexp.cl; sourceTree = "<group>"; };
+		6AF748792ADADEBD00D58E08 /* lib-gpu-verify.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "lib-gpu-verify.c"; path = "../source/lib-gpu-verify.c"; sourceTree = "<group>"; };
+		6AF7487B2ADADF4500D58E08 /* big-int-test.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "big-int-test.h"; path = "../source/big-int-test.h"; sourceTree = "<group>"; };
+		6AF7487D2ADADF4500D58E08 /* big-int-test.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "big-int-test.c"; path = "../source/big-int-test.c"; sourceTree = "<group>"; };
+		6AF7487F2ADADF4500D58E08 /* rsa-test.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "rsa-test.c"; path = "../source/rsa-test.c"; sourceTree = "<group>"; };
+		6AF748802ADADF4500D58E08 /* rsa-test.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "rsa-test.h"; path = "../source/rsa-test.h"; sourceTree = "<group>"; };
+		6AF748842ADADFAD00D58E08 /* opencl-test.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "opencl-test.h"; path = "../source/opencl-test.h"; sourceTree = "<group>"; };
+		6AF748852ADADFAD00D58E08 /* opencl-test.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "opencl-test.c"; path = "../source/opencl-test.c"; sourceTree = "<group>"; };
+		C3770EFC0E6F1138009A5A77 /* OpenCL.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = OpenCL.framework; path = /System/Library/Frameworks/OpenCL.framework; sourceTree = "<absolute>"; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		466E0F5D0C932E1A00ED01DB /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				C3770EFD0E6F1138009A5A77 /* OpenCL.framework in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		466E0F490C93291B00ED01DB = {
+			isa = PBXGroup;
+			children = (
+				C3770EF30E6F10CF009A5A77 /* Frameworks */,
+				C3770EF10E6F10BB009A5A77 /* Sources */,
+				466E0F600C932E1A00ED01DB /* Products */,
+			);
+			sourceTree = "<group>";
+		};
+		466E0F600C932E1A00ED01DB /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				466E0F5F0C932E1A00ED01DB /* lib-gpu-verify */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		6A984F162AC5B18A00F530FD /* Headers */ = {
+			isa = PBXGroup;
+			children = (
+				6AF748802ADADF4500D58E08 /* rsa-test.h */,
+				6AF7487B2ADADF4500D58E08 /* big-int-test.h */,
+				6AF748842ADADFAD00D58E08 /* opencl-test.h */,
+			);
+			name = Headers;
+			sourceTree = "<group>";
+		};
+		C3770EF10E6F10BB009A5A77 /* Sources */ = {
+			isa = PBXGroup;
+			children = (
+				6A984F162AC5B18A00F530FD /* Headers */,
+				6A8A795C2A89357400116D7D /* rsa-kernel.cl */,
+				6A8A795E2A89672700116D7D /* modexp.cl */,
+				6AF748792ADADEBD00D58E08 /* lib-gpu-verify.c */,
+				6AF7487D2ADADF4500D58E08 /* big-int-test.c */,
+				6AF7487F2ADADF4500D58E08 /* rsa-test.c */,
+				6AF748852ADADFAD00D58E08 /* opencl-test.c */,
+			);
+			name = Sources;
+			sourceTree = "<group>";
+		};
+		C3770EF30E6F10CF009A5A77 /* Frameworks */ = {
+			isa = PBXGroup;
+			children = (
+				C3770EFC0E6F1138009A5A77 /* OpenCL.framework */,
+			);
+			name = Frameworks;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXNativeTarget section */
+		466E0F5E0C932E1A00ED01DB /* lib-gpu-verify */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 466E0F640C932E1A00ED01DB /* Build configuration list for PBXNativeTarget "lib-gpu-verify" */;
+			buildPhases = (
+				466E0F5C0C932E1A00ED01DB /* Sources */,
+				466E0F5D0C932E1A00ED01DB /* Frameworks */,
+				C39444690DAFF5A0008FFE68 /* CopyFiles */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = "lib-gpu-verify";
+			productName = hello;
+			productReference = 466E0F5F0C932E1A00ED01DB /* lib-gpu-verify */;
+			productType = "com.apple.product-type.tool";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		466E0F4B0C93291B00ED01DB /* Project object */ = {
+			isa = PBXProject;
+			attributes = {
+				BuildIndependentTargetsInParallel = YES;
+				LastUpgradeCheck = 1500;
+			};
+			buildConfigurationList = 466E0F4E0C93291B00ED01DB /* Build configuration list for PBXProject "lib-gpu-verify" */;
+			compatibilityVersion = "Xcode 2.4";
+			developmentRegion = English;
+			hasScannedForEncodings = 0;
+			knownRegions = (
+				English,
+				Japanese,
+				French,
+				German,
+			);
+			mainGroup = 466E0F490C93291B00ED01DB;
+			productRefGroup = 466E0F600C932E1A00ED01DB /* Products */;
+			projectDirPath = "";
+			projectRoot = "";
+			targets = (
+				466E0F5E0C932E1A00ED01DB /* lib-gpu-verify */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXSourcesBuildPhase section */
+		466E0F5C0C932E1A00ED01DB /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				6AF7487A2ADADEBD00D58E08 /* lib-gpu-verify.c in Sources */,
+				6A8A795D2A89357400116D7D /* rsa-kernel.cl in Sources */,
+				6A8A795F2A89672700116D7D /* modexp.cl in Sources */,
+				6AF748832ADADF4500D58E08 /* rsa-test.c in Sources */,
+				6AF748862ADADFAD00D58E08 /* opencl-test.c in Sources */,
+				6AF748822ADADF4500D58E08 /* big-int-test.c in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+		466E0F4C0C93291B00ED01DB /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				COPY_PHASE_STRIP = NO;
+				DEAD_CODE_STRIPPING = YES;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				ENABLE_TESTABILITY = YES;
+				ENABLE_USER_SCRIPT_SANDBOXING = YES;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				HEADER_SEARCH_PATHS = ../../inc;
+				LIBRARY_SEARCH_PATHS = ../../lib;
+				ONLY_ACTIVE_ARCH = YES;
+			};
+			name = Debug;
+		};
+		466E0F4D0C93291B00ED01DB /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				COPY_PHASE_STRIP = YES;
+				DEAD_CODE_STRIPPING = YES;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				ENABLE_USER_SCRIPT_SANDBOXING = YES;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				HEADER_SEARCH_PATHS = ../../inc;
+				LIBRARY_SEARCH_PATHS = ../../lib;
+			};
+			name = Release;
+		};
+		466E0F620C932E1A00ED01DB /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				COPY_PHASE_STRIP = NO;
+				DEAD_CODE_STRIPPING = YES;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_MODEL_TUNING = G5;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				HEADER_SEARCH_PATHS = (
+					../../inc,
+					/usr/local/include,
+				);
+				INSTALL_PATH = /usr/local/bin;
+				LIBRARY_SEARCH_PATHS = (
+					"$(inherited)",
+					"$(LIBRARY_SEARCH_PATHS_QUOTED_FOR_TARGET_1)",
+				);
+				LIBRARY_SEARCH_PATHS_QUOTED_FOR_TARGET_1 = "\"$(SRCROOT)/../lib\" \"/usr/local/lib\"";
+				MACOSX_DEPLOYMENT_TARGET = 13.5;
+				OTHER_LDFLAGS = "-lgcrypt";
+				PRODUCT_NAME = "lib-gpu-verify";
+				SYSTEM_FRAMEWORK_SEARCH_PATHS = "";
+				USE_HEADERMAP = NO;
+				ZERO_LINK = YES;
+			};
+			name = Debug;
+		};
+		466E0F630C932E1A00ED01DB /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				COPY_PHASE_STRIP = YES;
+				DEAD_CODE_STRIPPING = YES;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				GCC_MODEL_TUNING = G5;
+				HEADER_SEARCH_PATHS = (
+					../../inc,
+					/usr/local/include,
+				);
+				INSTALL_PATH = /usr/local/bin;
+				LIBRARY_SEARCH_PATHS = (
+					"$(inherited)",
+					"$(LIBRARY_SEARCH_PATHS_QUOTED_FOR_TARGET_1)",
+				);
+				LIBRARY_SEARCH_PATHS_QUOTED_FOR_TARGET_1 = "\"$(SRCROOT)/../lib\" \"/usr/local/lib\"";
+				MACOSX_DEPLOYMENT_TARGET = 13.5;
+				OTHER_LDFLAGS = "-lgcrypt";
+				PRODUCT_NAME = "lib-gpu-verify";
+				USE_HEADERMAP = NO;
+				ZERO_LINK = NO;
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		466E0F4E0C93291B00ED01DB /* Build configuration list for PBXProject "lib-gpu-verify" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				466E0F4C0C93291B00ED01DB /* Debug */,
+				466E0F4D0C93291B00ED01DB /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		466E0F640C932E1A00ED01DB /* Build configuration list for PBXNativeTarget "lib-gpu-verify" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				466E0F620C932E1A00ED01DB /* Debug */,
+				466E0F630C932E1A00ED01DB /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 466E0F4B0C93291B00ED01DB /* Project object */;
+}
diff --git a/xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/contents.xcworkspacedata
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Workspace
+   version = "1.0">
+   <FileRef
+      location = "self:/Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/lib-gpu-verify.xcodeproj">
+   </FileRef>
+</Workspace>
diff --git a/xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>IDEDidComputeMac32BitWarning</key>
+	<true/>
+</dict>
+</plist>
diff --git a/xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcuserdata/cedriczwahlen.xcuserdatad/UserInterfaceState.xcuserstate b/xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcuserdata/cedriczwahlen.xcuserdatad/UserInterfaceState.xcuserstate
Binary files differ.
diff --git a/xcode/lib-gpu-verify.xcodeproj/xcshareddata/xcschemes/lib-gpu-verify.xcscheme b/xcode/lib-gpu-verify.xcodeproj/xcshareddata/xcschemes/lib-gpu-verify.xcscheme
@@ -0,0 +1,79 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Scheme
+   LastUpgradeVersion = "1500"
+   version = "1.7">
+   <BuildAction
+      parallelizeBuildables = "YES"
+      buildImplicitDependencies = "YES">
+      <BuildActionEntries>
+         <BuildActionEntry
+            buildForTesting = "YES"
+            buildForRunning = "YES"
+            buildForProfiling = "YES"
+            buildForArchiving = "YES"
+            buildForAnalyzing = "YES">
+            <BuildableReference
+               BuildableIdentifier = "primary"
+               BlueprintIdentifier = "466E0F5E0C932E1A00ED01DB"
+               BuildableName = "lib-gpu-verify"
+               BlueprintName = "lib-gpu-verify"
+               ReferencedContainer = "container:lib-gpu-verify.xcodeproj">
+            </BuildableReference>
+         </BuildActionEntry>
+      </BuildActionEntries>
+   </BuildAction>
+   <TestAction
+      buildConfiguration = "Debug"
+      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
+      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
+      shouldUseLaunchSchemeArgsEnv = "YES"
+      shouldAutocreateTestPlan = "YES">
+   </TestAction>
+   <LaunchAction
+      buildConfiguration = "Debug"
+      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
+      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
+      launchStyle = "0"
+      useCustomWorkingDirectory = "YES"
+      customWorkingDirectory = "/Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example"
+      ignoresPersistentStateOnLaunch = "NO"
+      debugDocumentVersioning = "YES"
+      debugServiceExtension = "internal"
+      allowLocationSimulation = "YES"
+      viewDebuggingEnabled = "No">
+      <BuildableProductRunnable
+         runnableDebuggingMode = "0">
+         <BuildableReference
+            BuildableIdentifier = "primary"
+            BlueprintIdentifier = "466E0F5E0C932E1A00ED01DB"
+            BuildableName = "lib-gpu-verify"
+            BlueprintName = "lib-gpu-verify"
+            ReferencedContainer = "container:lib-gpu-verify.xcodeproj">
+         </BuildableReference>
+      </BuildableProductRunnable>
+   </LaunchAction>
+   <ProfileAction
+      buildConfiguration = "Release"
+      shouldUseLaunchSchemeArgsEnv = "YES"
+      savedToolIdentifier = ""
+      useCustomWorkingDirectory = "NO"
+      debugDocumentVersioning = "YES">
+      <BuildableProductRunnable
+         runnableDebuggingMode = "0">
+         <BuildableReference
+            BuildableIdentifier = "primary"
+            BlueprintIdentifier = "466E0F5E0C932E1A00ED01DB"
+            BuildableName = "lib-gpu-verify"
+            BlueprintName = "lib-gpu-verify"
+            ReferencedContainer = "container:lib-gpu-verify.xcodeproj">
+         </BuildableReference>
+      </BuildableProductRunnable>
+   </ProfileAction>
+   <AnalyzeAction
+      buildConfiguration = "Debug">
+   </AnalyzeAction>
+   <ArchiveAction
+      buildConfiguration = "Release"
+      revealArchiveInOrganizer = "YES">
+   </ArchiveAction>
+</Scheme>
diff --git a/xcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist b/xcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist
@@ -0,0 +1,684 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Bucket
+   uuid = "6398D026-4CA4-45B6-AB2F-69FCDCABE312"
+   type = "1"
+   version = "2.0">
+   <Breakpoints>
+      <BreakpointProxy
+         BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
+         <BreakpointContent
+            uuid = "B1F33327-CFAB-4924-80EE-C04139FE2BA8"
+            shouldBeEnabled = "No"
+            ignoreCount = "0"
+            continueAfterRunningActions = "No"
+            filePath = "hello.c"
+            startingColumnNumber = "9223372036854775807"
+            endingColumnNumber = "9223372036854775807"
+            startingLineNumber = "474"
+            endingLineNumber = "474"
+            landmarkName = "main(argc, argv)"
+            landmarkType = "9">
+            <Locations>
+               <Location
+                  uuid = "B1F33327-CFAB-4924-80EE-C04139FE2BA8 - 221db7784ae30d96"
+                  shouldBeEnabled = "Yes"
+                  ignoreCount = "0"
+                  continueAfterRunningActions = "No"
+                  symbolName = "main"
+                  moduleName = "hello"
+                  usesParentBreakpointCondition = "Yes"
+                  urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/hello.c"
+                  startingColumnNumber = "9223372036854775807"
+                  endingColumnNumber = "9223372036854775807"
+                  startingLineNumber = "339"
+                  endingLineNumber = "339"
+                  offsetFromSymbolStart = "2771">
+               </Location>
+               <Location
+                  uuid = "B1F33327-CFAB-4924-80EE-C04139FE2BA8 - 221db7784ae30d96"
+                  shouldBeEnabled = "Yes"
+                  ignoreCount = "0"
+                  continueAfterRunningActions = "No"
+                  symbolName = "main"
+                  moduleName = "hello"
+                  usesParentBreakpointCondition = "Yes"
+                  urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/hello.c"
+                  startingColumnNumber = "9223372036854775807"
+                  endingColumnNumber = "9223372036854775807"
+                  startingLineNumber = "339"
+                  endingLineNumber = "339"
+                  offsetFromSymbolStart = "2757">
+               </Location>
+            </Locations>
+         </BreakpointContent>
+      </BreakpointProxy>
+      <BreakpointProxy
+         BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
+         <BreakpointContent
+            uuid = "ABCAF350-AA73-4A6C-8638-AE4D8D9AF298"
+            shouldBeEnabled = "No"
+            ignoreCount = "0"
+            continueAfterRunningActions = "No"
+            filePath = "rsa-test.c"
+            startingColumnNumber = "9223372036854775807"
+            endingColumnNumber = "9223372036854775807"
+            startingLineNumber = "65"
+            endingLineNumber = "65"
+            landmarkName = "rsa_tests()"
+            landmarkType = "9">
+            <Locations>
+               <Location
+                  uuid = "ABCAF350-AA73-4A6C-8638-AE4D8D9AF298 - f37c0f4c30700625"
+                  shouldBeEnabled = "Yes"
+                  ignoreCount = "0"
+                  continueAfterRunningActions = "No"
+                  symbolName = "rsa_tests"
+                  moduleName = "hello"
+                  usesParentBreakpointCondition = "Yes"
+                  urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c"
+                  startingColumnNumber = "9223372036854775807"
+                  endingColumnNumber = "9223372036854775807"
+                  startingLineNumber = "56"
+                  endingLineNumber = "56"
+                  offsetFromSymbolStart = "246">
+               </Location>
+               <Location
+                  uuid = "ABCAF350-AA73-4A6C-8638-AE4D8D9AF298 - f37c0f4c307007c6"
+                  shouldBeEnabled = "Yes"
+                  ignoreCount = "0"
+                  continueAfterRunningActions = "No"
+                  symbolName = "rsa_tests"
+                  moduleName = "hello"
+                  usesParentBreakpointCondition = "Yes"
+                  urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c"
+                  startingColumnNumber = "9223372036854775807"
+                  endingColumnNumber = "9223372036854775807"
+                  startingLineNumber = "57"
+                  endingLineNumber = "57"
+                  offsetFromSymbolStart = "270">
+               </Location>
+            </Locations>
+         </BreakpointContent>
+      </BreakpointProxy>
+      <BreakpointProxy
+         BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
+         <BreakpointContent
+            uuid = "EEB8911E-E203-4453-B41E-78BD7D8FC6A9"
+            shouldBeEnabled = "No"
+            ignoreCount = "0"
+            continueAfterRunningActions = "No"
+            filePath = "rsa-test.c"
+            startingColumnNumber = "9223372036854775807"
+            endingColumnNumber = "9223372036854775807"
+            startingLineNumber = "173"
+            endingLineNumber = "173"
+            landmarkName = "rsa_tests()"
+            landmarkType = "9">
+            <Locations>
+               <Location
+                  uuid = "EEB8911E-E203-4453-B41E-78BD7D8FC6A9 - 706f0ea9b024a22d"
+                  shouldBeEnabled = "Yes"
+                  ignoreCount = "0"
+                  continueAfterRunningActions = "No"
+                  symbolName = "bigNum_tests"
+                  moduleName = "hello"
+                  usesParentBreakpointCondition = "Yes"
+                  urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c"
+                  startingColumnNumber = "9223372036854775807"
+                  endingColumnNumber = "9223372036854775807"
+                  startingLineNumber = "111"
+                  endingLineNumber = "111"
+                  offsetFromSymbolStart = "31">
+               </Location>
+               <Location
+                  uuid = "EEB8911E-E203-4453-B41E-78BD7D8FC6A9 - f37c0f4c30700c50"
+                  shouldBeEnabled = "Yes"
+                  ignoreCount = "0"
+                  continueAfterRunningActions = "No"
+                  symbolName = "rsa_tests"
+                  moduleName = "hello"
+                  usesParentBreakpointCondition = "Yes"
+                  urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c"
+                  startingColumnNumber = "9223372036854775807"
+                  endingColumnNumber = "9223372036854775807"
+                  startingLineNumber = "99"
+                  endingLineNumber = "99"
+                  offsetFromSymbolStart = "584">
+               </Location>
+               <Location
+                  uuid = "EEB8911E-E203-4453-B41E-78BD7D8FC6A9 - f37c0f4c30700d48"
+                  shouldBeEnabled = "Yes"
+                  ignoreCount = "0"
+                  continueAfterRunningActions = "No"
+                  symbolName = "rsa_tests"
+                  moduleName = "hello"
+                  usesParentBreakpointCondition = "Yes"
+                  urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c"
+                  startingColumnNumber = "9223372036854775807"
+                  endingColumnNumber = "9223372036854775807"
+                  startingLineNumber = "107"
+                  endingLineNumber = "107"
+                  offsetFromSymbolStart = "601">
+               </Location>
+               <Location
+                  uuid = "EEB8911E-E203-4453-B41E-78BD7D8FC6A9 - f37c0f4c30700d69"
+                  shouldBeEnabled = "Yes"
+                  ignoreCount = "0"
+                  continueAfterRunningActions = "No"
+                  symbolName = "rsa_tests"
+                  moduleName = "hello"
+                  usesParentBreakpointCondition = "Yes"
+                  urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c"
+                  startingColumnNumber = "9223372036854775807"
+                  endingColumnNumber = "9223372036854775807"
+                  startingLineNumber = "108"
+                  endingLineNumber = "108"
+                  offsetFromSymbolStart = "601">
+               </Location>
+               <Location
+                  uuid = "EEB8911E-E203-4453-B41E-78BD7D8FC6A9 - f37c0f4c30700f86"
+                  shouldBeEnabled = "Yes"
+                  ignoreCount = "0"
+                  continueAfterRunningActions = "No"
+                  symbolName = "rsa_tests"
+                  moduleName = "hello"
+                  usesParentBreakpointCondition = "Yes"
+                  urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c"
+                  startingColumnNumber = "9223372036854775807"
+                  endingColumnNumber = "9223372036854775807"
+                  startingLineNumber = "121"
+                  endingLineNumber = "121"
+                  offsetFromSymbolStart = "642">
+               </Location>
+               <Location
+                  uuid = "EEB8911E-E203-4453-B41E-78BD7D8FC6A9 - f37c0f4c307009f4"
+                  shouldBeEnabled = "Yes"
+                  ignoreCount = "0"
+                  continueAfterRunningActions = "No"
+                  symbolName = "rsa_tests"
+                  moduleName = "hello"
+                  usesParentBreakpointCondition = "Yes"
+                  urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c"
+                  startingColumnNumber = "9223372036854775807"
+                  endingColumnNumber = "9223372036854775807"
+                  startingLineNumber = "135"
+                  endingLineNumber = "135"
+                  offsetFromSymbolStart = "794">
+               </Location>
+               <Location
+                  uuid = "EEB8911E-E203-4453-B41E-78BD7D8FC6A9 - f37c0f4c307009f4"
+                  shouldBeEnabled = "Yes"
+                  ignoreCount = "0"
+                  continueAfterRunningActions = "No"
+                  symbolName = "rsa_tests"
+                  moduleName = "hello"
+                  usesParentBreakpointCondition = "Yes"
+                  urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c"
+                  startingColumnNumber = "9223372036854775807"
+                  endingColumnNumber = "9223372036854775807"
+                  startingLineNumber = "135"
+                  endingLineNumber = "135"
+                  offsetFromSymbolStart = "797">
+               </Location>
+               <Location
+                  uuid = "EEB8911E-E203-4453-B41E-78BD7D8FC6A9 - f37c0f4c307009b6"
+                  shouldBeEnabled = "Yes"
+                  ignoreCount = "0"
+                  continueAfterRunningActions = "No"
+                  symbolName = "rsa_tests"
+                  moduleName = "hello"
+                  usesParentBreakpointCondition = "Yes"
+                  urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c"
+                  startingColumnNumber = "9223372036854775807"
+                  endingColumnNumber = "9223372036854775807"
+                  startingLineNumber = "137"
+                  endingLineNumber = "137"
+                  offsetFromSymbolStart = "797">
+               </Location>
+            </Locations>
+         </BreakpointContent>
+      </BreakpointProxy>
+      <BreakpointProxy
+         BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
+         <BreakpointContent
+            uuid = "BE5A95CC-4FA1-4426-8251-E2724903FEE5"
+            shouldBeEnabled = "Yes"
+            ignoreCount = "0"
+            continueAfterRunningActions = "No"
+            filePath = "rsa-test.c"
+            startingColumnNumber = "9223372036854775807"
+            endingColumnNumber = "9223372036854775807"
+            startingLineNumber = "260"
+            endingLineNumber = "260"
+            landmarkName = "verify(sign, ee, nn, mm)"
+            landmarkType = "9">
+            <Locations>
+               <Location
+                  uuid = "BE5A95CC-4FA1-4426-8251-E2724903FEE5 - 93446d8fed0e0b39"
+                  shouldBeEnabled = "Yes"
+                  ignoreCount = "0"
+                  continueAfterRunningActions = "No"
+                  symbolName = "verify"
+                  moduleName = "hello"
+                  usesParentBreakpointCondition = "Yes"
+                  urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c"
+                  startingColumnNumber = "9223372036854775807"
+                  endingColumnNumber = "9223372036854775807"
+                  startingLineNumber = "214"
+                  endingLineNumber = "214"
+                  offsetFromSymbolStart = "492">
+               </Location>
+               <Location
+                  uuid = "BE5A95CC-4FA1-4426-8251-E2724903FEE5 - 93446d8fed0e0bbd"
+                  shouldBeEnabled = "Yes"
+                  ignoreCount = "0"
+                  continueAfterRunningActions = "No"
+                  symbolName = "verify"
+                  moduleName = "hello"
+                  usesParentBreakpointCondition = "Yes"
+                  urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c"
+                  startingColumnNumber = "9223372036854775807"
+                  endingColumnNumber = "9223372036854775807"
+                  startingLineNumber = "218"
+                  endingLineNumber = "218"
+                  offsetFromSymbolStart = "523">
+               </Location>
+               <Location
+                  uuid = "BE5A95CC-4FA1-4426-8251-E2724903FEE5 - 93446d8fed0e0401"
+                  shouldBeEnabled = "Yes"
+                  ignoreCount = "0"
+                  continueAfterRunningActions = "No"
+                  symbolName = "verify"
+                  moduleName = "hello"
+                  usesParentBreakpointCondition = "Yes"
+                  urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c"
+                  startingColumnNumber = "9223372036854775807"
+                  endingColumnNumber = "9223372036854775807"
+                  startingLineNumber = "222"
+                  endingLineNumber = "222"
+                  offsetFromSymbolStart = "535">
+               </Location>
+               <Location
+                  uuid = "BE5A95CC-4FA1-4426-8251-E2724903FEE5 - 93446d8fed0e07bc"
+                  shouldBeEnabled = "Yes"
+                  ignoreCount = "0"
+                  continueAfterRunningActions = "No"
+                  symbolName = "verify"
+                  moduleName = "hello"
+                  usesParentBreakpointCondition = "Yes"
+                  urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c"
+                  startingColumnNumber = "9223372036854775807"
+                  endingColumnNumber = "9223372036854775807"
+                  startingLineNumber = "249"
+                  endingLineNumber = "249"
+                  offsetFromSymbolStart = "535">
+               </Location>
+               <Location
+                  uuid = "BE5A95CC-4FA1-4426-8251-E2724903FEE5 - 93446d8fed0e00c6"
+                  shouldBeEnabled = "Yes"
+                  ignoreCount = "0"
+                  continueAfterRunningActions = "No"
+                  symbolName = "verify"
+                  moduleName = "hello"
+                  usesParentBreakpointCondition = "Yes"
+                  urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c"
+                  startingColumnNumber = "9223372036854775807"
+                  endingColumnNumber = "9223372036854775807"
+                  startingLineNumber = "255"
+                  endingLineNumber = "255"
+                  offsetFromSymbolStart = "535">
+               </Location>
+               <Location
+                  uuid = "BE5A95CC-4FA1-4426-8251-E2724903FEE5 - 93446d8fed0e014a"
+                  shouldBeEnabled = "Yes"
+                  ignoreCount = "0"
+                  continueAfterRunningActions = "No"
+                  symbolName = "verify"
+                  moduleName = "hello"
+                  usesParentBreakpointCondition = "Yes"
+                  urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c"
+                  startingColumnNumber = "9223372036854775807"
+                  endingColumnNumber = "9223372036854775807"
+                  startingLineNumber = "259"
+                  endingLineNumber = "259"
+                  offsetFromSymbolStart = "535">
+               </Location>
+               <Location
+                  uuid = "BE5A95CC-4FA1-4426-8251-E2724903FEE5 - 93446d8fed0e0108"
+                  shouldBeEnabled = "Yes"
+                  ignoreCount = "0"
+                  continueAfterRunningActions = "No"
+                  symbolName = "verify"
+                  moduleName = "hello"
+                  usesParentBreakpointCondition = "Yes"
+                  urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c"
+                  startingColumnNumber = "9223372036854775807"
+                  endingColumnNumber = "9223372036854775807"
+                  startingLineNumber = "261"
+                  endingLineNumber = "261"
+                  offsetFromSymbolStart = "610">
+               </Location>
+               <Location
+                  uuid = "BE5A95CC-4FA1-4426-8251-E2724903FEE5 - 93446d8fed0e0108"
+                  shouldBeEnabled = "Yes"
+                  ignoreCount = "0"
+                  continueAfterRunningActions = "No"
+                  symbolName = "verify"
+                  moduleName = "hello"
+                  usesParentBreakpointCondition = "Yes"
+                  urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/rsa-test.c"
+                  startingColumnNumber = "9223372036854775807"
+                  endingColumnNumber = "9223372036854775807"
+                  startingLineNumber = "261"
+                  endingLineNumber = "261"
+                  offsetFromSymbolStart = "621">
+               </Location>
+            </Locations>
+         </BreakpointContent>
+      </BreakpointProxy>
+      <BreakpointProxy
+         BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
+         <BreakpointContent
+            uuid = "F515B97A-2DDB-4CD7-B177-0930CB82886E"
+            shouldBeEnabled = "Yes"
+            ignoreCount = "0"
+            continueAfterRunningActions = "No"
+            filePath = "openssl-test.c"
+            startingColumnNumber = "9223372036854775807"
+            endingColumnNumber = "9223372036854775807"
+            startingLineNumber = "3083"
+            endingLineNumber = "3083"
+            landmarkName = "test(aa, pp, mm)"
+            landmarkType = "9">
+            <Locations>
+               <Location
+                  uuid = "F515B97A-2DDB-4CD7-B177-0930CB82886E - a07591907a67b234"
+                  shouldBeEnabled = "Yes"
+                  ignoreCount = "0"
+                  continueAfterRunningActions = "No"
+                  symbolName = "test"
+                  moduleName = "hello"
+                  usesParentBreakpointCondition = "Yes"
+                  urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/openssl-test.c"
+                  startingColumnNumber = "9223372036854775807"
+                  endingColumnNumber = "9223372036854775807"
+                  startingLineNumber = "3010"
+                  endingLineNumber = "3010"
+                  offsetFromSymbolStart = "196">
+               </Location>
+               <Location
+                  uuid = "F515B97A-2DDB-4CD7-B177-0930CB82886E - a07591907a67bb4f"
+                  shouldBeEnabled = "Yes"
+                  ignoreCount = "0"
+                  continueAfterRunningActions = "No"
+                  symbolName = "test"
+                  moduleName = "hello"
+                  usesParentBreakpointCondition = "Yes"
+                  urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/openssl-test.c"
+                  startingColumnNumber = "9223372036854775807"
+                  endingColumnNumber = "9223372036854775807"
+                  startingLineNumber = "3083"
+                  endingLineNumber = "3083"
+                  offsetFromSymbolStart = "196">
+               </Location>
+            </Locations>
+         </BreakpointContent>
+      </BreakpointProxy>
+      <BreakpointProxy
+         BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
+         <BreakpointContent
+            uuid = "CDA3CB38-3A0F-4B51-B1BA-98E3DCC1FB9D"
+            shouldBeEnabled = "No"
+            ignoreCount = "0"
+            continueAfterRunningActions = "No"
+            filePath = "openssl-test.c"
+            startingColumnNumber = "9223372036854775807"
+            endingColumnNumber = "9223372036854775807"
+            startingLineNumber = "3081"
+            endingLineNumber = "3081"
+            landmarkName = "test(aa, pp, mm)"
+            landmarkType = "9">
+            <Locations>
+               <Location
+                  uuid = "CDA3CB38-3A0F-4B51-B1BA-98E3DCC1FB9D - a07591907a67b5fa"
+                  shouldBeEnabled = "Yes"
+                  ignoreCount = "0"
+                  continueAfterRunningActions = "No"
+                  symbolName = "test"
+                  moduleName = "hello"
+                  usesParentBreakpointCondition = "Yes"
+                  urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/openssl-test.c"
+                  startingColumnNumber = "9223372036854775807"
+                  endingColumnNumber = "9223372036854775807"
+                  startingLineNumber = "3008"
+                  endingLineNumber = "3008"
+                  offsetFromSymbolStart = "151">
+               </Location>
+               <Location
+                  uuid = "CDA3CB38-3A0F-4B51-B1BA-98E3DCC1FB9D - a07591907a67bb0d"
+                  shouldBeEnabled = "Yes"
+                  ignoreCount = "0"
+                  continueAfterRunningActions = "No"
+                  symbolName = "test"
+                  moduleName = "hello"
+                  usesParentBreakpointCondition = "Yes"
+                  urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/openssl-test.c"
+                  startingColumnNumber = "9223372036854775807"
+                  endingColumnNumber = "9223372036854775807"
+                  startingLineNumber = "3081"
+                  endingLineNumber = "3081"
+                  offsetFromSymbolStart = "151">
+               </Location>
+            </Locations>
+         </BreakpointContent>
+      </BreakpointProxy>
+      <BreakpointProxy
+         BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
+         <BreakpointContent
+            uuid = "A8352521-4868-49CB-8637-0BE665EE874C"
+            shouldBeEnabled = "No"
+            ignoreCount = "0"
+            continueAfterRunningActions = "No"
+            filePath = "openssl-test.c"
+            startingColumnNumber = "9223372036854775807"
+            endingColumnNumber = "9223372036854775807"
+            startingLineNumber = "2917"
+            endingLineNumber = "2917"
+            landmarkName = "BN_mod_exp_mont(rr, a, p, m, ctx, in_mont)"
+            landmarkType = "9">
+            <Locations>
+               <Location
+                  uuid = "A8352521-4868-49CB-8637-0BE665EE874C - 16f55520f2fdcffa"
+                  shouldBeEnabled = "Yes"
+                  ignoreCount = "0"
+                  continueAfterRunningActions = "No"
+                  symbolName = "BN_mod_exp_mont"
+                  moduleName = "hello"
+                  usesParentBreakpointCondition = "Yes"
+                  urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/openssl-test.c"
+                  startingColumnNumber = "9223372036854775807"
+                  endingColumnNumber = "9223372036854775807"
+                  startingLineNumber = "2845"
+                  endingLineNumber = "2845"
+                  offsetFromSymbolStart = "2068">
+               </Location>
+               <Location
+                  uuid = "A8352521-4868-49CB-8637-0BE665EE874C - 16f55520f2fc3913"
+                  shouldBeEnabled = "Yes"
+                  ignoreCount = "0"
+                  continueAfterRunningActions = "No"
+                  symbolName = "BN_mod_exp_mont"
+                  moduleName = "hello"
+                  usesParentBreakpointCondition = "Yes"
+                  urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/openssl-test.c"
+                  startingColumnNumber = "9223372036854775807"
+                  endingColumnNumber = "9223372036854775807"
+                  startingLineNumber = "2918"
+                  endingLineNumber = "2918"
+                  offsetFromSymbolStart = "2068">
+               </Location>
+            </Locations>
+         </BreakpointContent>
+      </BreakpointProxy>
+      <BreakpointProxy
+         BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
+         <BreakpointContent
+            uuid = "0C98EE14-2401-420F-8D20-F49ED0DBC41B"
+            shouldBeEnabled = "No"
+            ignoreCount = "0"
+            continueAfterRunningActions = "No"
+            filePath = "openssl-test.c"
+            startingColumnNumber = "9223372036854775807"
+            endingColumnNumber = "9223372036854775807"
+            startingLineNumber = "332"
+            endingLineNumber = "332"
+            landmarkName = "BN_MONT_CTX_new()"
+            landmarkType = "9">
+         </BreakpointContent>
+      </BreakpointProxy>
+      <BreakpointProxy
+         BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
+         <BreakpointContent
+            uuid = "96C72BD4-E8C9-412E-9AB7-413108288B0B"
+            shouldBeEnabled = "No"
+            ignoreCount = "0"
+            continueAfterRunningActions = "No"
+            filePath = "openssl-test.c"
+            startingColumnNumber = "9223372036854775807"
+            endingColumnNumber = "9223372036854775807"
+            startingLineNumber = "2594"
+            endingLineNumber = "2594"
+            landmarkName = "BN_MONT_CTX_set(mont, mod, ctx)"
+            landmarkType = "9">
+            <Locations>
+               <Location
+                  uuid = "96C72BD4-E8C9-412E-9AB7-413108288B0B - 1aaa91d4f9a1e20c"
+                  shouldBeEnabled = "Yes"
+                  ignoreCount = "0"
+                  continueAfterRunningActions = "No"
+                  symbolName = "BN_MONT_CTX_set"
+                  moduleName = "hello"
+                  usesParentBreakpointCondition = "Yes"
+                  urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/openssl-test.c"
+                  startingColumnNumber = "9223372036854775807"
+                  endingColumnNumber = "9223372036854775807"
+                  startingLineNumber = "2521"
+                  endingLineNumber = "2521"
+                  offsetFromSymbolStart = "20">
+               </Location>
+               <Location
+                  uuid = "96C72BD4-E8C9-412E-9AB7-413108288B0B - 1aaa91d4f9a1e965"
+                  shouldBeEnabled = "Yes"
+                  ignoreCount = "0"
+                  continueAfterRunningActions = "No"
+                  symbolName = "BN_MONT_CTX_set"
+                  moduleName = "hello"
+                  usesParentBreakpointCondition = "Yes"
+                  urlString = "file:///Users/cedriczwahlen/Desktop/OpenCL_Hello_World_Example/openssl-test.c"
+                  startingColumnNumber = "9223372036854775807"
+                  endingColumnNumber = "9223372036854775807"
+                  startingLineNumber = "2594"
+                  endingLineNumber = "2594"
+                  offsetFromSymbolStart = "37">
+               </Location>
+            </Locations>
+         </BreakpointContent>
+      </BreakpointProxy>
+      <BreakpointProxy
+         BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
+         <BreakpointContent
+            uuid = "4C95F997-80D7-4841-B985-AC5D4BB9A5EF"
+            shouldBeEnabled = "No"
+            ignoreCount = "0"
+            continueAfterRunningActions = "No"
+            filePath = "openssl-test.c"
+            startingColumnNumber = "9223372036854775807"
+            endingColumnNumber = "9223372036854775807"
+            startingLineNumber = "2485"
+            endingLineNumber = "2485"
+            landmarkName = "BN_mod_inverse(in, a, n, ctx)"
+            landmarkType = "9">
+         </BreakpointContent>
+      </BreakpointProxy>
+      <BreakpointProxy
+         BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
+         <BreakpointContent
+            uuid = "CC868260-8B6B-4BA4-AC45-233E3DE02D02"
+            shouldBeEnabled = "No"
+            ignoreCount = "0"
+            continueAfterRunningActions = "No"
+            filePath = "openssl-test.c"
+            startingColumnNumber = "9223372036854775807"
+            endingColumnNumber = "9223372036854775807"
+            startingLineNumber = "2442"
+            endingLineNumber = "2442"
+            landmarkName = "int_bn_mod_inverse(in, a, n, ctx, pnoinv)"
+            landmarkType = "9">
+         </BreakpointContent>
+      </BreakpointProxy>
+      <BreakpointProxy
+         BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
+         <BreakpointContent
+            uuid = "49F72DAD-C98D-426E-9106-693BED1DC57C"
+            shouldBeEnabled = "No"
+            ignoreCount = "0"
+            continueAfterRunningActions = "No"
+            filePath = "openssl-test.c"
+            startingColumnNumber = "9223372036854775807"
+            endingColumnNumber = "9223372036854775807"
+            startingLineNumber = "1080"
+            endingLineNumber = "1080"
+            landmarkName = "bn_div_fixed_top(dv, rm, num, divisor, ctx)"
+            landmarkType = "9">
+         </BreakpointContent>
+      </BreakpointProxy>
+      <BreakpointProxy
+         BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
+         <BreakpointContent
+            uuid = "1DB7672F-C154-4530-8338-1D40045A83BB"
+            shouldBeEnabled = "Yes"
+            ignoreCount = "0"
+            continueAfterRunningActions = "No"
+            filePath = "openssl-test.c"
+            startingColumnNumber = "9223372036854775807"
+            endingColumnNumber = "9223372036854775807"
+            startingLineNumber = "1099"
+            endingLineNumber = "1099"
+            landmarkName = "bn_div_fixed_top(dv, rm, num, divisor, ctx)"
+            landmarkType = "9">
+         </BreakpointContent>
+      </BreakpointProxy>
+      <BreakpointProxy
+         BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
+         <BreakpointContent
+            uuid = "92922D1E-77A7-417A-A75F-7C60A90557C3"
+            shouldBeEnabled = "Yes"
+            ignoreCount = "0"
+            continueAfterRunningActions = "No"
+            filePath = "openssl-test.c"
+            startingColumnNumber = "9223372036854775807"
+            endingColumnNumber = "9223372036854775807"
+            startingLineNumber = "1110"
+            endingLineNumber = "1110"
+            landmarkName = "bn_div_fixed_top(dv, rm, num, divisor, ctx)"
+            landmarkType = "9">
+         </BreakpointContent>
+      </BreakpointProxy>
+      <BreakpointProxy
+         BreakpointExtensionID = "Xcode.Breakpoint.FileBreakpoint">
+         <BreakpointContent
+            uuid = "30F57658-A9E6-4742-B54A-BC31A7EEC6B1"
+            shouldBeEnabled = "No"
+            ignoreCount = "0"
+            continueAfterRunningActions = "No"
+            filePath = "openssl-test.c"
+            startingColumnNumber = "9223372036854775807"
+            endingColumnNumber = "9223372036854775807"
+            startingLineNumber = "1040"
+            endingLineNumber = "1040"
+            landmarkName = "bn_div_fixed_top(dv, rm, num, divisor, ctx)"
+            landmarkType = "9">
+         </BreakpointContent>
+      </BreakpointProxy>
+   </Breakpoints>
+</Bucket>
diff --git a/xcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcschemes/xcschememanagement.plist b/xcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcschemes/xcschememanagement.plist
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>SchemeUserState</key>
+	<dict>
+		<key>lib-gpu-verify.xcscheme_^#shared#^_</key>
+		<dict>
+			<key>orderHint</key>
+			<integer>0</integer>
+		</dict>
+	</dict>
+	<key>SuppressBuildableAutocreation</key>
+	<dict>
+		<key>466E0F5E0C932E1A00ED01DB</key>
+		<dict>
+			<key>primary</key>
+			<true/>
+		</dict>
+	</dict>
+</dict>
+</plist>
diff --git a/xcode/modexp.cl b/xcode/modexp.cl
@@ -0,0 +1,5 @@
+__kernel void verify() {
+    
+}
+
+
diff --git a/xcode/rsa-kernel.cl b/xcode/rsa-kernel.cl
@@ -0,0 +1,9 @@
+
+
+__kernel void square(__global float* input, __global float* output, const unsigned int count)
+{
+    int i = get_global_id(0);
+    
+    if(i < count)
+        output[i] = input[i] * input[i];
+}

	libgpuverify Signature verification on GPUs (WiP)
	Log \| Files \| Refs \| README \| LICENSE

A	.DS_Store	\|	0
A	.gitignore	\|	5	+++++
A	openssl/CMakeLists.txt	\|	28	++++++++++++++++++++++++++++
A	openssl/main.c	\|	93	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	openssl/openssl-test.c	\|	3087	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	openssl/openssl-test.h	\|	531	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	openssl/readme.txt	\|	25	+++++++++++++++++++++++++
A	other/.DS_Store	\|	0
A	other/CMakeLists.txt	\|	16	++++++++++++++++
A	other/Main.c	\|	20	++++++++++++++++++++
A	other/readme.txt	\|	32	++++++++++++++++++++++++++++++++
A	source/.DS_Store	\|	0
A	source/big-int-test.c	\|	1080	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	source/big-int-test.h	\|	127	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	source/lib-gpu-verify.c	\|	589	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	source/opencl-test.c	\|	211	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	source/opencl-test.h	\|	25	+++++++++++++++++++++++++
A	source/rsa-test.c	\|	276	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	source/rsa-test.h	\|	27	+++++++++++++++++++++++++++
A	xcode/.DS_Store	\|	0
A	xcode/ReadMe.txt	\|	2	++
A	xcode/lib-gpu-verify.xcodeproj/project.pbxproj	\|	328	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/contents.xcworkspacedata	\|	7	+++++++
A	xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist	\|	8	++++++++
A	xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcuserdata/cedriczwahlen.xcuserdatad/UserInterfaceState.xcuserstate	\|	0
A	xcode/lib-gpu-verify.xcodeproj/xcshareddata/xcschemes/lib-gpu-verify.xcscheme	\|	79	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	xcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist	\|	684	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	xcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcschemes/xcschememanagement.plist	\|	22	++++++++++++++++++++++
A	xcode/modexp.cl	\|	5	+++++
A	xcode/rsa-kernel.cl	\|	9	+++++++++