libgpuverify

Signature verification on GPUs (WiP)
Log | Files | Refs | README | LICENSE

universal.c (17523B)


      1 /*
      2  * universal.c
      3  * This file is part of lib-gpu-verify.
      4  *
      5  * lib-gpu-verify is free software: you can redistribute it and/or modify
      6  * it under the terms of the GNU General Public License as published by
      7  * the Free Software Foundation, either version 3 of the License, or
      8  * (at your option) any later version.
      9  *
     10  * lib-gpu-verify is distributed in the hope that it will be useful,
     11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13  * GNU General Public License for more details.
     14  *
     15  * Created by Cedric Zwahlen
     16  *
     17  */
     18 #include <stdio.h>
     19 #include "util.h"
     20 #include <math.h>
     21 
     22 struct gpuv_info * gpuv_init(enum GPUV_VARIANT variant, enum GPUV_BIT_LENGTH bit_length) {
     23     
     24     struct gpuv_info *info = malloc(sizeof(struct gpuv_info));
     25     memset(info, 0, sizeof(struct gpuv_info));
     26     
     27     info->variant = variant;
     28     
     29     info->platform = select_platform(0, false);
     30     info->device_id = select_device (info->platform);
     31     info->context = create_compute_context (info->device_id);
     32     
     33     switch (variant) {
     34         case GPUV_GPU_MONTGOMERY:
     35             info->program = compile_program (info->device_id, info->context, "gpuv-montg.cl");
     36             info->kernel = create_kernel (info->program, "mont");
     37             break;
     38         case GPUV_GPU_REGULAR:
     39             info->program = compile_program (info->device_id, info->context, "gpuv.cl");
     40             info->kernel = create_kernel (info->program, "several");
     41             break;
     42             
     43         case GPUV_CPU:
     44             
     45             break;
     46             
     47         default:
     48             return NULL;
     49             break;
     50     }
     51     
     52     return info;
     53     
     54 }
     55 
     56 struct gpuv_batch * gpuv_prepare_batch(void) {
     57     struct gpuv_batch * batch = malloc(sizeof(struct gpuv_batch));
     58     memset(batch, 0, sizeof(struct gpuv_batch));
     59     
     60     struct gpuv_signature_message * ms = malloc(sizeof(struct gpuv_signature_message) * 64);
     61     memset(ms, 0, sizeof(struct gpuv_signature_message) * 64);
     62     
     63     uint32_t * pk_indices = malloc(sizeof(uint32_t) * 64);
     64     memset(pk_indices, 0, sizeof(uint32_t) * 64);
     65     
     66     uint32_t * pk_list = malloc(sizeof(uint32_t) * 64);
     67     memset(pk_list, 0, sizeof(uint32_t) * 64);
     68     
     69     batch->pairs = ms;
     70     batch->max_count = 64;
     71     
     72     batch->pk_indices = pk_indices;
     73     
     74     batch->pk_max_count = 16;
     75     batch->pk_list = pk_list;
     76     
     77     return batch;
     78 }
     79 
     80 struct gpuv_public_key * gpuv_prepare_pubkey(unsigned long e, unsigned long len_n, void *n) {
     81     struct gpuv_public_key * pk = malloc(sizeof(struct gpuv_public_key));
     82     memset(pk, 0, sizeof(struct gpuv_public_key));
     83     
     84     pk->e = e;
     85     pk->n = (char *)n;
     86     pk->len_n = len_n;
     87     
     88     return pk;
     89 }
     90 
     91 struct gpuv_signature_message * gpuv_prepare_sig_msg(struct gpuv_public_key *pubkey) {
     92     struct gpuv_signature_message * sigmsg = malloc(sizeof(struct gpuv_signature_message));
     93     memset(sigmsg, 0, sizeof(struct gpuv_signature_message));
     94     
     95     sigmsg->pubkey = pubkey;
     96     
     97     return sigmsg;
     98 }
     99 
    100 void gpuv_add_signature(struct gpuv_signature_message * sig_msg, unsigned long len, void *s) {
    101     
    102     sig_msg->len_s = len;
    103     sig_msg->s = (char *)s;
    104     
    105 }
    106 
    107 void gpuv_add_message(struct gpuv_signature_message * sig_msg, unsigned long len, void *m) {
    108     
    109     sig_msg->len_m = len;
    110     sig_msg->m = (char *)m;
    111     
    112 }
    113 
    114 
    115 /*
    116  return 1 on error
    117  */
    118 int gpuv_add_to_batch(struct gpuv_batch * batch, struct gpuv_signature_message * sigmem) {
    119     
    120     if (batch->current >= batch->max_count) {
    121         unsigned long pl = batch->max_count * 2;
    122         struct gpuv_signature_message * p = realloc(batch->pairs, pl * sizeof(struct gpuv_signature_message));
    123         uint32_t * p_list = realloc(batch->pk_list, pl * sizeof(uint32_t));
    124         if (p == NULL || p_list == NULL) { return 1; }
    125         memset(&p[batch->max_count], 0, batch->max_count * sizeof(struct gpuv_signature_message));
    126         memset(&p_list[batch->max_count], 0, batch->max_count * sizeof(uint32_t));
    127         batch->pairs = p;
    128         batch->pk_list = p_list;
    129         batch->max_count = pl;
    130     }
    131     
    132     
    133     
    134     // create a map of public keys
    135     
    136     if (batch->pk_current >= batch->pk_max_count) {
    137         unsigned long pl = batch->pk_max_count * 2;
    138         uint32_t * p = realloc(batch->pk_indices, pl * sizeof(uint32_t));
    139         memset(&p[batch->pk_max_count], 0, batch->pk_max_count * sizeof(uint32_t));
    140         if (p == NULL) { return 1; }
    141         batch->pk_indices = p;
    142         batch->pk_max_count = pl;
    143     }
    144     
    145     
    146     int pk_index = 0;
    147     int found = 0;
    148     for (int i = 0; i <= batch->pk_current; i++) {
    149         if ( sigmem->pubkey == batch->pairs[ batch->pk_indices[i] ].pubkey ) {
    150             found = 1;
    151             break;
    152         }
    153         pk_index++;
    154     }
    155     
    156     if (found) {
    157         
    158         batch->pk_list[batch->current] = pk_index; // this says which public key from pk_pairs is referenced by which signature
    159         
    160     } else {
    161         
    162         
    163         batch->pk_indices[batch->pk_current] = (uint32_t)batch->current;
    164         
    165         batch->pk_list[batch->current] = (uint32_t)batch->pk_current;
    166         batch->pk_current++;
    167        
    168     }
    169     
    170     batch->pairs[batch->current] = *sigmem;
    171     batch->current++;
    172     
    173     return 0;
    174 }
    175 
    176 void gpuv_free_batch(struct gpuv_batch * batch) {
    177     
    178     for(int i = 0; i < batch->pk_current; i++) {
    179         
    180         free(batch->pairs[ batch->pk_indices[i] ].pubkey->ni);
    181         free(batch->pairs[ batch->pk_indices[i] ].pubkey->r_1);
    182     
    183         free(batch->pairs[ batch->pk_indices[i] ].pubkey);
    184         
    185     }
    186     
    187     for(int i = 0; i < batch->current; i++) {
    188         
    189         free(batch->pairs[i].M);
    190         free(batch->pairs[i].x);
    191         
    192     }
    193     
    194     free(batch->pairs);
    195     free(batch->pk_indices);
    196     free(batch->pk_list);
    197     
    198     free(batch);
    199     
    200 }
    201 
    202 void gpuv_free_state(struct gpuv_state * state) {
    203     
    204     free(state->results);
    205     
    206     free(state);
    207     
    208 }
    209 
    210 
    211 ///
    212 /// prepares a state object that contains buffers etc for the gpu
    213 struct gpuv_state * gpuv_prepare(struct gpuv_info *info, struct gpuv_batch * batch) {
    214     
    215     struct timespec p1, p2;
    216     
    217     clock_gettime(CLOCK_REALTIME, &p1);
    218     
    219     struct gpuv_state *state = malloc(sizeof(struct gpuv_state));
    220     memset(state, 0, sizeof(struct gpuv_state));
    221     
    222     state->info = info;
    223     state->queue = create_command_queue (info->device_id, info->context);
    224     
    225 //    state->event_kernel = malloc(sizeof(cl_event));
    226 //    state->event_results = malloc(sizeof(cl_event));
    227     
    228     unsigned long sig_pairs = batch->current;
    229     
    230     state->sig_count = sig_pairs;
    231     state->pubkey_count = batch->pk_current;
    232     
    233     unsigned long res_len = ceil((double)sig_pairs / (double)(sizeof(uint32_t) * 8)); // how many uint32
    234     unsigned long res_len_bytes = res_len * sizeof(uint32_t); // how many bytes needed
    235     
    236     uint32_t *results_buf = malloc(res_len_bytes);
    237     memset(results_buf, 0, res_len_bytes);
    238     
    239     // set up the results, which can be accessed by the user once the kernel has run
    240     state->results = results_buf;
    241     state->results_len = res_len;
    242     
    243     unsigned long len = (GPUV_BIT_LENGTH_2048 / 8) * state->sig_count;
    244     unsigned long pk_len = (GPUV_BIT_LENGTH_2048 / 8) * state->pubkey_count;
    245     
    246     int err = 0;
    247     
    248     unsigned long off_x = 0;
    249     unsigned long off_M = 0;
    250     unsigned long off_n = 0;
    251     unsigned long off_ni = 0;
    252     unsigned long off_e = 0;
    253     unsigned long off_m = 0;
    254     unsigned long off_s = 0;
    255     
    256     switch (info->variant) {
    257         case GPUV_GPU_MONTGOMERY:
    258             
    259             state->res_mem = clCreateBuffer(info->context, CL_MEM_READ_WRITE, res_len_bytes ,NULL, NULL);
    260             state->x_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, len, NULL, NULL);
    261             state->m_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, len, NULL, NULL);
    262             state->n_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, pk_len, NULL, NULL);
    263             state->ni_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, pk_len, NULL, NULL);
    264             state->exp_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, batch->pk_current * sizeof(unsigned long), NULL, NULL);
    265             state->msg_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, len, NULL, NULL);
    266             state->pks_indices = clCreateBuffer(info->context, CL_MEM_READ_ONLY, sizeof(uint32_t) * batch->current, NULL, NULL);
    267             
    268             if (!state->res_mem || !state->msg_mem || !state->pks_indices ||
    269                 !state->x_mem || !state->m_mem || !state->n_mem || !state->ni_mem || !state->exp_mem)
    270             {
    271                 printf("Error: Failed to allocate device memory!\n");
    272                 exit(1);
    273             }
    274             
    275             int l = sizeof(gpu_register);
    276             
    277             
    278             
    279             for (int j = 0; j < batch->pk_current; j++) {
    280                 
    281                 struct gpuv_public_key * p = batch->pairs[ batch->pk_indices[j] ].pubkey;
    282                 
    283                 pk_to_mont(p);
    284                 
    285                 err |= clEnqueueWriteBuffer(state->queue, state->n_mem, CL_TRUE, off_n, p->len_n       * l, p->n, 0, NULL, NULL);
    286                 err |= clEnqueueWriteBuffer(state->queue, state->ni_mem, CL_TRUE, off_ni, p->len_ni    * l, p->ni, 0, NULL, NULL);
    287                 err |= clEnqueueWriteBuffer(state->queue, state->exp_mem, CL_TRUE, off_e, sizeof(unsigned long), &p->e, 0, NULL, NULL);
    288                 
    289                 off_n += p->len_n * l;
    290                 off_ni += p->len_ni * l;
    291                 off_e += sizeof(unsigned long);
    292             }
    293             
    294             for (int i = 0; i < batch->current; i++) {
    295                 
    296                 struct gpuv_signature_message * s = &batch->pairs[i];
    297                 
    298                 sig_msg_to_mont(s);
    299                 
    300                 err |= clEnqueueWriteBuffer(state->queue, state->x_mem, CL_TRUE, off_x, s->len_x   * l, s->x, 0, NULL, NULL);
    301                 err |= clEnqueueWriteBuffer(state->queue, state->m_mem, CL_TRUE, off_M, s->len_M   * l, s->M, 0, NULL, NULL);
    302                 err |= clEnqueueWriteBuffer(state->queue, state->msg_mem, CL_TRUE, off_m, s->len_m  * l, s->m, 0, NULL, NULL);
    303                 // the kernel wants to know how many elements are in the array, so we write it at the beginning
    304                 
    305                 off_x += s->len_x * l;
    306                 off_M += s->len_M * l;
    307                 off_m += s->len_m * l;
    308                 
    309             }
    310             
    311             err |= clEnqueueWriteBuffer(state->queue, state->pks_indices, CL_TRUE, 0, sizeof(uint32_t) * batch->current, batch->pk_list, 0, NULL, NULL);
    312             err |= clEnqueueWriteBuffer(state->queue, state->res_mem, CL_TRUE, 0, res_len_bytes, state->results, 0, NULL, NULL);
    313             
    314             if (err != CL_SUCCESS)
    315             {
    316                 printf("Error: Failed to write to source array!\n");
    317                 exit(1);
    318             }
    319             
    320             break;
    321             
    322          
    323         case GPUV_GPU_REGULAR:
    324             
    325             
    326             state->res_mem = clCreateBuffer(info->context, CL_MEM_READ_WRITE, res_len_bytes ,NULL, NULL);
    327             state->n_mem = clCreateBuffer(info->context, CL_MEM_READ_WRITE, pk_len, NULL, NULL);
    328             state->exp_mem = clCreateBuffer(info->context, CL_MEM_READ_WRITE, sig_pairs * sizeof(DIGIT_T), NULL, NULL);
    329             state->sig_mem = clCreateBuffer(info->context, CL_MEM_READ_WRITE, len, NULL, NULL);
    330             state->msg_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, len, NULL, NULL); // the base, to compare whether we get the same signature
    331 
    332             state->pks_indices = clCreateBuffer(info->context, CL_MEM_READ_ONLY, sizeof(uint32_t) * batch->current, NULL, NULL);
    333             
    334             
    335             if (!state->sig_mem || !state->exp_mem || !state->n_mem || !state->msg_mem || !state->pks_indices)
    336             {
    337                 printf("Error: Failed to allocate device memory!\n");
    338                 exit(1);
    339             }
    340             
    341             int lr = sizeof(DIGIT_T);
    342             
    343             for (int j = 0; j < batch->pk_current; j++) {
    344                 
    345                 struct gpuv_public_key * p = batch->pairs[ batch->pk_indices[j] ].pubkey;
    346                 
    347                 err |= clEnqueueWriteBuffer(state->queue, state->n_mem, CL_TRUE, off_n, p->len_n * lr, p->n, 0, NULL, NULL);
    348                 err |= clEnqueueWriteBuffer(state->queue, state->exp_mem, CL_TRUE, off_e, sizeof(DIGIT_T), &p->e, 0, NULL, NULL);
    349                 
    350                 off_n += p->len_n * lr;
    351                 off_e += sizeof(DIGIT_T);
    352                 
    353             }
    354             
    355             
    356             for (int i = 0; i < batch->current; i++) {
    357                 
    358                 struct gpuv_signature_message * s = &batch->pairs[i];
    359                 
    360                 err |= clEnqueueWriteBuffer(state->queue, state->msg_mem, CL_TRUE, off_m, s->len_m * lr, s->m, 0, NULL, NULL);
    361                 err |= clEnqueueWriteBuffer(state->queue, state->sig_mem, CL_TRUE, off_s, s->len_s * lr, s->s, 0, NULL, NULL);
    362                
    363                 off_m += s->len_m * lr;
    364                 off_s += s->len_s * lr;
    365                 
    366             }
    367             
    368             err |= clEnqueueWriteBuffer(state->queue, state->pks_indices, CL_TRUE, 0, sizeof(uint32_t) * batch->current, batch->pk_list, 0, NULL, NULL);
    369             err |= clEnqueueWriteBuffer(state->queue, state->res_mem, CL_TRUE, 0, res_len_bytes, state->results, 0, NULL, NULL);
    370             
    371             if (err != CL_SUCCESS)
    372             {
    373                 printf("Error: Failed to write to source array!\n");
    374                 exit(1);
    375             }
    376             
    377             break;
    378             
    379         default:
    380             break;
    381     }
    382     
    383     
    384     clock_gettime(CLOCK_REALTIME, &p2);
    385     
    386     state->p.tv_sec = ( p2.tv_nsec < p1.tv_nsec ? p2.tv_sec - (p1.tv_sec + 1) : p2.tv_sec - p1.tv_sec );
    387     state->p.tv_nsec = ( p2.tv_nsec < p1.tv_nsec ? ((999999999 - p1.tv_nsec) + p2.tv_nsec) : (p2.tv_nsec - p1.tv_nsec) ) / 1000;
    388     
    389     state->ready = 1;
    390     
    391     return state;
    392     
    393 }
    394 
    395 
    396 /*
    397  Starts processing the state batch.
    398  
    399  cls i called once the kernel has finished, and results are available.
    400  
    401  return 0 for success, 1 for failure
    402  
    403  Returns immediately
    404  */
    405 int gpuv_start(struct gpuv_state *state, void (*cls)(void *, int, struct timespec, unsigned long, uint32_t *), void * arg, struct gpuv_batch *batch) {
    406     
    407     if (state->stale) {
    408         printf("State submitted twice.\n");
    409         return 1;
    410     }
    411     
    412     if (state->info->in_progress) {
    413         printf("GPU is busy.\n");
    414         return 1;
    415     }
    416     
    417     if (!state->ready) {
    418         printf("State not fully initialised.\n");
    419         return 1;
    420     }
    421     
    422     state->info->in_progress = 1;
    423     state->cls = cls;
    424     state->arg = arg;
    425     
    426     // Set the arguments to our compute kernel
    427     //
    428     int err = 0;
    429     
    430     switch (state->info->variant) {
    431         case GPUV_GPU_MONTGOMERY:
    432             
    433             err |= clSetKernelArg(state->info->kernel, 0, sizeof(cl_mem), &state->x_mem);
    434             err |= clSetKernelArg(state->info->kernel, 1, sizeof(cl_mem), &state->m_mem);
    435             err |= clSetKernelArg(state->info->kernel, 2, sizeof(cl_mem), &state->n_mem);
    436             err |= clSetKernelArg(state->info->kernel, 3, sizeof(cl_mem), &state->ni_mem);
    437             err |= clSetKernelArg(state->info->kernel, 4, sizeof(cl_mem), &state->exp_mem);
    438             err |= clSetKernelArg(state->info->kernel, 5, sizeof(cl_mem), &state->msg_mem);
    439             err |= clSetKernelArg(state->info->kernel, 6, sizeof(cl_mem), &state->pks_indices);
    440             err |= clSetKernelArg(state->info->kernel, 7, sizeof(cl_mem), &state->res_mem);
    441             
    442             break;
    443             
    444         case GPUV_GPU_REGULAR:
    445             
    446             err |= clSetKernelArg(state->info->kernel, 0, sizeof(cl_mem), &state->sig_mem);
    447             err |= clSetKernelArg(state->info->kernel, 1, sizeof(cl_mem), &state->exp_mem);
    448             err |= clSetKernelArg(state->info->kernel, 2, sizeof(cl_mem), &state->n_mem);
    449             err |= clSetKernelArg(state->info->kernel, 3, sizeof(cl_mem), &state->msg_mem);
    450             err |= clSetKernelArg(state->info->kernel, 4, sizeof(cl_mem), &state->res_mem);
    451             err |= clSetKernelArg(state->info->kernel, 5, sizeof(cl_mem), &state->pks_indices);
    452             
    453 
    454             break;
    455             
    456         case GPUV_CPU:
    457             
    458            
    459             cpu_verify(batch, state);
    460             
    461             return 0;
    462             
    463             break;
    464             
    465         default:
    466             break;
    467     }
    468     
    469     
    470 
    471     if (err != CL_SUCCESS)
    472     {
    473         printf("Error: Failed to set kernel arguments! %d\n", err);
    474         exit(1);
    475     }
    476     
    477     clock_gettime(CLOCK_REALTIME, &state->t);
    478     
    479     
    480     cl_event e = clCreateUserEvent(state->info->context, NULL);
    481     
    482     size_t local = 1;
    483     
    484     err = clEnqueueNDRangeKernel(state->queue, state->info->kernel, 1, NULL, &state->sig_count, &local, 0, NULL, &e);
    485     if (err)
    486     {
    487         printf("Error: Failed to execute kernel!\n");
    488         return 1;
    489     }
    490     
    491     clSetEventCallback(e, CL_COMPLETE, callback_kernel, state);
    492     clRetainEvent(e);
    493     
    494     return 0;
    495 }
    496 
    497 
    498 
    499 
    500 /*
    501  releases kernel resources – don't use info after this call
    502  */
    503 void gpuv_finish(struct gpuv_info * info) {
    504     
    505     clReleaseProgram(info->program);
    506     clReleaseKernel(info->kernel);
    507     clReleaseContext(info->context);
    508     
    509     free(info);
    510     
    511 }
    512 
    513