universal.c (17523B)
1 /* 2 * universal.c 3 * This file is part of lib-gpu-verify. 4 * 5 * lib-gpu-verify is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * 10 * lib-gpu-verify is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * Created by Cedric Zwahlen 16 * 17 */ 18 #include <stdio.h> 19 #include "util.h" 20 #include <math.h> 21 22 struct gpuv_info * gpuv_init(enum GPUV_VARIANT variant, enum GPUV_BIT_LENGTH bit_length) { 23 24 struct gpuv_info *info = malloc(sizeof(struct gpuv_info)); 25 memset(info, 0, sizeof(struct gpuv_info)); 26 27 info->variant = variant; 28 29 info->platform = select_platform(0, false); 30 info->device_id = select_device (info->platform); 31 info->context = create_compute_context (info->device_id); 32 33 switch (variant) { 34 case GPUV_GPU_MONTGOMERY: 35 info->program = compile_program (info->device_id, info->context, "gpuv-montg.cl"); 36 info->kernel = create_kernel (info->program, "mont"); 37 break; 38 case GPUV_GPU_REGULAR: 39 info->program = compile_program (info->device_id, info->context, "gpuv.cl"); 40 info->kernel = create_kernel (info->program, "several"); 41 break; 42 43 case GPUV_CPU: 44 45 break; 46 47 default: 48 return NULL; 49 break; 50 } 51 52 return info; 53 54 } 55 56 struct gpuv_batch * gpuv_prepare_batch(void) { 57 struct gpuv_batch * batch = malloc(sizeof(struct gpuv_batch)); 58 memset(batch, 0, sizeof(struct gpuv_batch)); 59 60 struct gpuv_signature_message * ms = malloc(sizeof(struct gpuv_signature_message) * 64); 61 memset(ms, 0, sizeof(struct gpuv_signature_message) * 64); 62 63 uint32_t * pk_indices = malloc(sizeof(uint32_t) * 64); 64 memset(pk_indices, 0, sizeof(uint32_t) * 64); 65 66 uint32_t * pk_list = malloc(sizeof(uint32_t) * 64); 67 memset(pk_list, 0, sizeof(uint32_t) * 64); 68 69 batch->pairs = ms; 70 batch->max_count = 64; 71 72 batch->pk_indices = pk_indices; 73 74 batch->pk_max_count = 16; 75 batch->pk_list = pk_list; 76 77 return batch; 78 } 79 80 struct gpuv_public_key * gpuv_prepare_pubkey(unsigned long e, unsigned long len_n, void *n) { 81 struct gpuv_public_key * pk = malloc(sizeof(struct gpuv_public_key)); 82 memset(pk, 0, sizeof(struct gpuv_public_key)); 83 84 pk->e = e; 85 pk->n = (char *)n; 86 pk->len_n = len_n; 87 88 return pk; 89 } 90 91 struct gpuv_signature_message * gpuv_prepare_sig_msg(struct gpuv_public_key *pubkey) { 92 struct gpuv_signature_message * sigmsg = malloc(sizeof(struct gpuv_signature_message)); 93 memset(sigmsg, 0, sizeof(struct gpuv_signature_message)); 94 95 sigmsg->pubkey = pubkey; 96 97 return sigmsg; 98 } 99 100 void gpuv_add_signature(struct gpuv_signature_message * sig_msg, unsigned long len, void *s) { 101 102 sig_msg->len_s = len; 103 sig_msg->s = (char *)s; 104 105 } 106 107 void gpuv_add_message(struct gpuv_signature_message * sig_msg, unsigned long len, void *m) { 108 109 sig_msg->len_m = len; 110 sig_msg->m = (char *)m; 111 112 } 113 114 115 /* 116 return 1 on error 117 */ 118 int gpuv_add_to_batch(struct gpuv_batch * batch, struct gpuv_signature_message * sigmem) { 119 120 if (batch->current >= batch->max_count) { 121 unsigned long pl = batch->max_count * 2; 122 struct gpuv_signature_message * p = realloc(batch->pairs, pl * sizeof(struct gpuv_signature_message)); 123 uint32_t * p_list = realloc(batch->pk_list, pl * sizeof(uint32_t)); 124 if (p == NULL || p_list == NULL) { return 1; } 125 memset(&p[batch->max_count], 0, batch->max_count * sizeof(struct gpuv_signature_message)); 126 memset(&p_list[batch->max_count], 0, batch->max_count * sizeof(uint32_t)); 127 batch->pairs = p; 128 batch->pk_list = p_list; 129 batch->max_count = pl; 130 } 131 132 133 134 // create a map of public keys 135 136 if (batch->pk_current >= batch->pk_max_count) { 137 unsigned long pl = batch->pk_max_count * 2; 138 uint32_t * p = realloc(batch->pk_indices, pl * sizeof(uint32_t)); 139 memset(&p[batch->pk_max_count], 0, batch->pk_max_count * sizeof(uint32_t)); 140 if (p == NULL) { return 1; } 141 batch->pk_indices = p; 142 batch->pk_max_count = pl; 143 } 144 145 146 int pk_index = 0; 147 int found = 0; 148 for (int i = 0; i <= batch->pk_current; i++) { 149 if ( sigmem->pubkey == batch->pairs[ batch->pk_indices[i] ].pubkey ) { 150 found = 1; 151 break; 152 } 153 pk_index++; 154 } 155 156 if (found) { 157 158 batch->pk_list[batch->current] = pk_index; // this says which public key from pk_pairs is referenced by which signature 159 160 } else { 161 162 163 batch->pk_indices[batch->pk_current] = (uint32_t)batch->current; 164 165 batch->pk_list[batch->current] = (uint32_t)batch->pk_current; 166 batch->pk_current++; 167 168 } 169 170 batch->pairs[batch->current] = *sigmem; 171 batch->current++; 172 173 return 0; 174 } 175 176 void gpuv_free_batch(struct gpuv_batch * batch) { 177 178 for(int i = 0; i < batch->pk_current; i++) { 179 180 free(batch->pairs[ batch->pk_indices[i] ].pubkey->ni); 181 free(batch->pairs[ batch->pk_indices[i] ].pubkey->r_1); 182 183 free(batch->pairs[ batch->pk_indices[i] ].pubkey); 184 185 } 186 187 for(int i = 0; i < batch->current; i++) { 188 189 free(batch->pairs[i].M); 190 free(batch->pairs[i].x); 191 192 } 193 194 free(batch->pairs); 195 free(batch->pk_indices); 196 free(batch->pk_list); 197 198 free(batch); 199 200 } 201 202 void gpuv_free_state(struct gpuv_state * state) { 203 204 free(state->results); 205 206 free(state); 207 208 } 209 210 211 /// 212 /// prepares a state object that contains buffers etc for the gpu 213 struct gpuv_state * gpuv_prepare(struct gpuv_info *info, struct gpuv_batch * batch) { 214 215 struct timespec p1, p2; 216 217 clock_gettime(CLOCK_REALTIME, &p1); 218 219 struct gpuv_state *state = malloc(sizeof(struct gpuv_state)); 220 memset(state, 0, sizeof(struct gpuv_state)); 221 222 state->info = info; 223 state->queue = create_command_queue (info->device_id, info->context); 224 225 // state->event_kernel = malloc(sizeof(cl_event)); 226 // state->event_results = malloc(sizeof(cl_event)); 227 228 unsigned long sig_pairs = batch->current; 229 230 state->sig_count = sig_pairs; 231 state->pubkey_count = batch->pk_current; 232 233 unsigned long res_len = ceil((double)sig_pairs / (double)(sizeof(uint32_t) * 8)); // how many uint32 234 unsigned long res_len_bytes = res_len * sizeof(uint32_t); // how many bytes needed 235 236 uint32_t *results_buf = malloc(res_len_bytes); 237 memset(results_buf, 0, res_len_bytes); 238 239 // set up the results, which can be accessed by the user once the kernel has run 240 state->results = results_buf; 241 state->results_len = res_len; 242 243 unsigned long len = (GPUV_BIT_LENGTH_2048 / 8) * state->sig_count; 244 unsigned long pk_len = (GPUV_BIT_LENGTH_2048 / 8) * state->pubkey_count; 245 246 int err = 0; 247 248 unsigned long off_x = 0; 249 unsigned long off_M = 0; 250 unsigned long off_n = 0; 251 unsigned long off_ni = 0; 252 unsigned long off_e = 0; 253 unsigned long off_m = 0; 254 unsigned long off_s = 0; 255 256 switch (info->variant) { 257 case GPUV_GPU_MONTGOMERY: 258 259 state->res_mem = clCreateBuffer(info->context, CL_MEM_READ_WRITE, res_len_bytes ,NULL, NULL); 260 state->x_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, len, NULL, NULL); 261 state->m_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, len, NULL, NULL); 262 state->n_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, pk_len, NULL, NULL); 263 state->ni_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, pk_len, NULL, NULL); 264 state->exp_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, batch->pk_current * sizeof(unsigned long), NULL, NULL); 265 state->msg_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, len, NULL, NULL); 266 state->pks_indices = clCreateBuffer(info->context, CL_MEM_READ_ONLY, sizeof(uint32_t) * batch->current, NULL, NULL); 267 268 if (!state->res_mem || !state->msg_mem || !state->pks_indices || 269 !state->x_mem || !state->m_mem || !state->n_mem || !state->ni_mem || !state->exp_mem) 270 { 271 printf("Error: Failed to allocate device memory!\n"); 272 exit(1); 273 } 274 275 int l = sizeof(gpu_register); 276 277 278 279 for (int j = 0; j < batch->pk_current; j++) { 280 281 struct gpuv_public_key * p = batch->pairs[ batch->pk_indices[j] ].pubkey; 282 283 pk_to_mont(p); 284 285 err |= clEnqueueWriteBuffer(state->queue, state->n_mem, CL_TRUE, off_n, p->len_n * l, p->n, 0, NULL, NULL); 286 err |= clEnqueueWriteBuffer(state->queue, state->ni_mem, CL_TRUE, off_ni, p->len_ni * l, p->ni, 0, NULL, NULL); 287 err |= clEnqueueWriteBuffer(state->queue, state->exp_mem, CL_TRUE, off_e, sizeof(unsigned long), &p->e, 0, NULL, NULL); 288 289 off_n += p->len_n * l; 290 off_ni += p->len_ni * l; 291 off_e += sizeof(unsigned long); 292 } 293 294 for (int i = 0; i < batch->current; i++) { 295 296 struct gpuv_signature_message * s = &batch->pairs[i]; 297 298 sig_msg_to_mont(s); 299 300 err |= clEnqueueWriteBuffer(state->queue, state->x_mem, CL_TRUE, off_x, s->len_x * l, s->x, 0, NULL, NULL); 301 err |= clEnqueueWriteBuffer(state->queue, state->m_mem, CL_TRUE, off_M, s->len_M * l, s->M, 0, NULL, NULL); 302 err |= clEnqueueWriteBuffer(state->queue, state->msg_mem, CL_TRUE, off_m, s->len_m * l, s->m, 0, NULL, NULL); 303 // the kernel wants to know how many elements are in the array, so we write it at the beginning 304 305 off_x += s->len_x * l; 306 off_M += s->len_M * l; 307 off_m += s->len_m * l; 308 309 } 310 311 err |= clEnqueueWriteBuffer(state->queue, state->pks_indices, CL_TRUE, 0, sizeof(uint32_t) * batch->current, batch->pk_list, 0, NULL, NULL); 312 err |= clEnqueueWriteBuffer(state->queue, state->res_mem, CL_TRUE, 0, res_len_bytes, state->results, 0, NULL, NULL); 313 314 if (err != CL_SUCCESS) 315 { 316 printf("Error: Failed to write to source array!\n"); 317 exit(1); 318 } 319 320 break; 321 322 323 case GPUV_GPU_REGULAR: 324 325 326 state->res_mem = clCreateBuffer(info->context, CL_MEM_READ_WRITE, res_len_bytes ,NULL, NULL); 327 state->n_mem = clCreateBuffer(info->context, CL_MEM_READ_WRITE, pk_len, NULL, NULL); 328 state->exp_mem = clCreateBuffer(info->context, CL_MEM_READ_WRITE, sig_pairs * sizeof(DIGIT_T), NULL, NULL); 329 state->sig_mem = clCreateBuffer(info->context, CL_MEM_READ_WRITE, len, NULL, NULL); 330 state->msg_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, len, NULL, NULL); // the base, to compare whether we get the same signature 331 332 state->pks_indices = clCreateBuffer(info->context, CL_MEM_READ_ONLY, sizeof(uint32_t) * batch->current, NULL, NULL); 333 334 335 if (!state->sig_mem || !state->exp_mem || !state->n_mem || !state->msg_mem || !state->pks_indices) 336 { 337 printf("Error: Failed to allocate device memory!\n"); 338 exit(1); 339 } 340 341 int lr = sizeof(DIGIT_T); 342 343 for (int j = 0; j < batch->pk_current; j++) { 344 345 struct gpuv_public_key * p = batch->pairs[ batch->pk_indices[j] ].pubkey; 346 347 err |= clEnqueueWriteBuffer(state->queue, state->n_mem, CL_TRUE, off_n, p->len_n * lr, p->n, 0, NULL, NULL); 348 err |= clEnqueueWriteBuffer(state->queue, state->exp_mem, CL_TRUE, off_e, sizeof(DIGIT_T), &p->e, 0, NULL, NULL); 349 350 off_n += p->len_n * lr; 351 off_e += sizeof(DIGIT_T); 352 353 } 354 355 356 for (int i = 0; i < batch->current; i++) { 357 358 struct gpuv_signature_message * s = &batch->pairs[i]; 359 360 err |= clEnqueueWriteBuffer(state->queue, state->msg_mem, CL_TRUE, off_m, s->len_m * lr, s->m, 0, NULL, NULL); 361 err |= clEnqueueWriteBuffer(state->queue, state->sig_mem, CL_TRUE, off_s, s->len_s * lr, s->s, 0, NULL, NULL); 362 363 off_m += s->len_m * lr; 364 off_s += s->len_s * lr; 365 366 } 367 368 err |= clEnqueueWriteBuffer(state->queue, state->pks_indices, CL_TRUE, 0, sizeof(uint32_t) * batch->current, batch->pk_list, 0, NULL, NULL); 369 err |= clEnqueueWriteBuffer(state->queue, state->res_mem, CL_TRUE, 0, res_len_bytes, state->results, 0, NULL, NULL); 370 371 if (err != CL_SUCCESS) 372 { 373 printf("Error: Failed to write to source array!\n"); 374 exit(1); 375 } 376 377 break; 378 379 default: 380 break; 381 } 382 383 384 clock_gettime(CLOCK_REALTIME, &p2); 385 386 state->p.tv_sec = ( p2.tv_nsec < p1.tv_nsec ? p2.tv_sec - (p1.tv_sec + 1) : p2.tv_sec - p1.tv_sec ); 387 state->p.tv_nsec = ( p2.tv_nsec < p1.tv_nsec ? ((999999999 - p1.tv_nsec) + p2.tv_nsec) : (p2.tv_nsec - p1.tv_nsec) ) / 1000; 388 389 state->ready = 1; 390 391 return state; 392 393 } 394 395 396 /* 397 Starts processing the state batch. 398 399 cls i called once the kernel has finished, and results are available. 400 401 return 0 for success, 1 for failure 402 403 Returns immediately 404 */ 405 int gpuv_start(struct gpuv_state *state, void (*cls)(void *, int, struct timespec, unsigned long, uint32_t *), void * arg, struct gpuv_batch *batch) { 406 407 if (state->stale) { 408 printf("State submitted twice.\n"); 409 return 1; 410 } 411 412 if (state->info->in_progress) { 413 printf("GPU is busy.\n"); 414 return 1; 415 } 416 417 if (!state->ready) { 418 printf("State not fully initialised.\n"); 419 return 1; 420 } 421 422 state->info->in_progress = 1; 423 state->cls = cls; 424 state->arg = arg; 425 426 // Set the arguments to our compute kernel 427 // 428 int err = 0; 429 430 switch (state->info->variant) { 431 case GPUV_GPU_MONTGOMERY: 432 433 err |= clSetKernelArg(state->info->kernel, 0, sizeof(cl_mem), &state->x_mem); 434 err |= clSetKernelArg(state->info->kernel, 1, sizeof(cl_mem), &state->m_mem); 435 err |= clSetKernelArg(state->info->kernel, 2, sizeof(cl_mem), &state->n_mem); 436 err |= clSetKernelArg(state->info->kernel, 3, sizeof(cl_mem), &state->ni_mem); 437 err |= clSetKernelArg(state->info->kernel, 4, sizeof(cl_mem), &state->exp_mem); 438 err |= clSetKernelArg(state->info->kernel, 5, sizeof(cl_mem), &state->msg_mem); 439 err |= clSetKernelArg(state->info->kernel, 6, sizeof(cl_mem), &state->pks_indices); 440 err |= clSetKernelArg(state->info->kernel, 7, sizeof(cl_mem), &state->res_mem); 441 442 break; 443 444 case GPUV_GPU_REGULAR: 445 446 err |= clSetKernelArg(state->info->kernel, 0, sizeof(cl_mem), &state->sig_mem); 447 err |= clSetKernelArg(state->info->kernel, 1, sizeof(cl_mem), &state->exp_mem); 448 err |= clSetKernelArg(state->info->kernel, 2, sizeof(cl_mem), &state->n_mem); 449 err |= clSetKernelArg(state->info->kernel, 3, sizeof(cl_mem), &state->msg_mem); 450 err |= clSetKernelArg(state->info->kernel, 4, sizeof(cl_mem), &state->res_mem); 451 err |= clSetKernelArg(state->info->kernel, 5, sizeof(cl_mem), &state->pks_indices); 452 453 454 break; 455 456 case GPUV_CPU: 457 458 459 cpu_verify(batch, state); 460 461 return 0; 462 463 break; 464 465 default: 466 break; 467 } 468 469 470 471 if (err != CL_SUCCESS) 472 { 473 printf("Error: Failed to set kernel arguments! %d\n", err); 474 exit(1); 475 } 476 477 clock_gettime(CLOCK_REALTIME, &state->t); 478 479 480 cl_event e = clCreateUserEvent(state->info->context, NULL); 481 482 size_t local = 1; 483 484 err = clEnqueueNDRangeKernel(state->queue, state->info->kernel, 1, NULL, &state->sig_count, &local, 0, NULL, &e); 485 if (err) 486 { 487 printf("Error: Failed to execute kernel!\n"); 488 return 1; 489 } 490 491 clSetEventCallback(e, CL_COMPLETE, callback_kernel, state); 492 clRetainEvent(e); 493 494 return 0; 495 } 496 497 498 499 500 /* 501 releases kernel resources – don't use info after this call 502 */ 503 void gpuv_finish(struct gpuv_info * info) { 504 505 clReleaseProgram(info->program); 506 clReleaseKernel(info->kernel); 507 clReleaseContext(info->context); 508 509 free(info); 510 511 } 512 513