libgpuverify

Signature verification on GPUs (WiP)
Log | Files | Refs | README | LICENSE

commit f38a238420defdd923d4edece13511e7f6842b7f
parent fb111b34e2a4ecce05efbadf1f62fcd4ce752415
Author: Christian Grothoff <christian@grothoff.org>
Date:   Sun, 19 Nov 2023 00:54:50 +0100

proper square example

Diffstat:
Msource/Makefile | 3+++
Asource/square.c | 529+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asource/square.cl | 2++
Mxcode/rsa-kernel.cl | 13++++++++-----
4 files changed, 542 insertions(+), 5 deletions(-)

diff --git a/source/Makefile b/source/Makefile @@ -1,2 +1,5 @@ all: gcc -g -O0 -D CL_TARGET_OPENCL_VERSION=100 -o foo rsa-test.c lib-gpu-verify.c big-int-test.c -lgcrypt -lOpenCL -lm + +square: square.c + gcc -g -O0 -D CL_TARGET_OPENCL_VERSION=100 -o square square.c -lOpenCL -lm diff --git a/source/square.c b/source/square.c @@ -0,0 +1,529 @@ +#include <stdio.h> +#include <stdlib.h> +#include <CL/opencl.h> +#include <errno.h> +#include <string.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <stdbool.h> +#include <unistd.h> + + +static cl_platform_id +select_platform (unsigned int offset, + bool print_platforms) +{ + cl_uint max_platforms = 4; + cl_platform_id platforms[max_platforms]; + cl_uint num_platforms; + cl_int rplat; + + rplat = clGetPlatformIDs (max_platforms, + platforms, + &num_platforms); + if (CL_SUCCESS != rplat) + { + fprintf (stderr, + "Error: Failed to lookup platforms! (%d)\n", + rplat); + exit (1); + } + if (print_platforms) + { + for (unsigned int i = 0; i<num_platforms; i++) + { + char buf[128]; + size_t rbuf; + static struct + { + cl_platform_info cpi; + const char *name; + } param[] = { + { CL_PLATFORM_PROFILE, "profile" }, + { CL_PLATFORM_VENDOR, "vendor" }, + { CL_PLATFORM_NAME, "name" }, + { CL_PLATFORM_EXTENSIONS, "extensions" }, + { 0, NULL } + }; + + for (unsigned int j = 0; NULL != param[j].name; j++) + { + cl_int err; + + err = clGetPlatformInfo (platforms[i], + param[j].cpi, + sizeof (buf), + buf, + &rbuf); + if (err != CL_SUCCESS) + { + fprintf (stderr, + "Error: Failed to get platform info for %s! (%d)\n", + param[j].name, + err); + } + else + { + printf ("#%u %s %.*s\n", + i, + param[j].name, + (int) rbuf, + buf); + } + } + } + exit (0); + } + if (offset >= num_platforms) + { + fprintf (stderr, + "Found only %u platforms\n", + (unsigned int) num_platforms); + exit (1); + } + return platforms[offset]; +} + + +static cl_device_id +select_device (cl_platform_id platform) +{ + cl_device_id device_id; + char buf[1024]; + size_t len; + cl_int err; + cl_uint address_bits = 0; + + err = clGetDeviceIDs (platform, + CL_DEVICE_TYPE_ALL, + 1, /* 1 device */ + &device_id, + NULL); + if (CL_SUCCESS != err) + { + fprintf (stderr, + "Error: Failed to find a device! (%d)\n", + err); + exit (1); + } + + err = clGetDeviceInfo (device_id, + CL_DRIVER_VERSION, + sizeof (buf), + buf, + &len); + if (CL_SUCCESS != err) + { + fprintf (stderr, + "Error: Failed to get device driver version! (%d)\n", + err); + exit (1); + } + printf ("Driver version: %.*s\n", + (int) len, + buf); + clGetDeviceInfo (device_id, + CL_DEVICE_ADDRESS_BITS, + sizeof (address_bits), + &address_bits, + &len); + if (CL_SUCCESS != err) + { + fprintf (stderr, + "Error: Failed to get device address bits! (%d)\n", + err); + exit (1); + } + printf ("device address bits: %d\n", + (int) address_bits); + return device_id; +} + + +static void +logger (const char *errinfo, + const void *private_info, + size_t cb, + void *user_data) +{ + fprintf (stderr, + "<OpenCL>: %s\n", + errinfo); +} + + +static cl_context +create_compute_context (cl_device_id device_id) +{ + cl_int err; + cl_context context; + + context = clCreateContext (NULL, + 1, + &device_id, + &logger, NULL, + &err); + if (CL_SUCCESS != err) + { + fprintf (stderr, + "Error: Failed to create a compute context (%d)\n", + err); + exit (1); + } + return context; +} + + +static cl_command_queue +create_command_queue (cl_device_id device_id, + cl_context context) +{ + cl_int err; + cl_command_queue commands; + + commands = clCreateCommandQueue (context, + device_id, + 0, /* properties */ + &err); + if (CL_SUCCESS != err) + { + fprintf (stderr, + "Error: Failed to create a command commands!\n (%d)", + err); + exit (1); + } + return commands; +} + + +static cl_program +compile_program (cl_device_id device_id, + cl_context context, + const char *sourcefile) +{ + cl_program program; + int fd; + void *code; + struct stat ss; + cl_int err; + + fd = open (sourcefile, + O_RDONLY); + if (-1 == fd) + { + fprintf (stderr, + "Failed to open %s: %s\n", + sourcefile, + strerror (errno)); + exit (1); + } + if (0 != fstat (fd, + &ss)) + { + fprintf (stderr, + "Failed to stat %s: %s\n", + sourcefile, + strerror (errno)); + close (fd); + exit (1); + } + code = mmap (NULL, + ss.st_size, + PROT_READ, + MAP_PRIVATE, + fd, + 0 /* offset */); + close (fd); + { + size_t sz = ss.st_size; + + program = clCreateProgramWithSource (context, + 1, /* 1 source file */ + (const char **) &code, + &sz, + &err); + if (CL_SUCCESS != err) + { + fprintf (stderr, + "Error: Failed to create compute program (%d)!\n", + err); + munmap (code, + ss.st_size); + exit (1); + } + } + err = clBuildProgram (program, + 0, /* number of devices */ + NULL, /* devices */ + NULL, /* options (char *) */ + NULL, /* callback */ + NULL); + munmap (code, + ss.st_size); + if (CL_SUCCESS != err) + { + size_t len; + char buffer[2048]; + + fprintf (stderr, + "Error: Failed to build program executable (%d)!\n", + err); + err = clGetProgramBuildInfo (program, + device_id, + CL_PROGRAM_BUILD_LOG, + sizeof(buffer), + buffer, + &len); + if (CL_SUCCESS != err) + { + fprintf (stderr, + "Error: could not get build logs (%d)!\n", + err); + exit (1); + } + fprintf (stderr, + "<clBuild>: %.*s\n", + (int) len, + buffer); + exit (1); + } + return program; +} + + +static cl_kernel +create_kernel (cl_program program, + const char *name) +{ + cl_kernel kernel; + cl_int err; + + kernel = clCreateKernel (program, + name, + &err); + if (CL_SUCCESS != err) + { + fprintf (stderr, + "Error: Failed to create compute kernel %s: %d!\n", + name, + err); + exit (1); + } + return kernel; +} + + +static void +square_with_kernel (cl_device_id device_id, + cl_context context, + cl_command_queue commands, + cl_kernel kernel, + unsigned int problem_size, + const float problem[static problem_size], + float solution[static problem_size]) +{ + // Create the input and output arrays in device memory for our calculation + cl_int err; + cl_mem sq_input; + cl_mem sq_output; + size_t local; + + sq_input = clCreateBuffer (context, + CL_MEM_READ_ONLY + | CL_MEM_USE_HOST_PTR, + sizeof(float) * problem_size, + (void *) problem, + &err); + if (CL_SUCCESS != err) + { + fprintf (stderr, + "Error: Failed to create input buffer: %d!\n", + err); + exit (1); + } + sq_output = clCreateBuffer (context, + CL_MEM_WRITE_ONLY, + sizeof(float) * problem_size, + NULL /* no previous data */, + &err); + if (CL_SUCCESS != err) + { + fprintf (stderr, + "Error: Failed to create output buffer: %d!\n", + err); + exit (1); + } +#if DEAD + // Write our data set into the input array in device memory + err = clEnqueueWriteBuffer (commands, + sq_input, + CL_TRUE, + 0, + sizeof(DIGIT_T) * s_len[n - 1], + signatures, + 0, + NULL, + NULL); + if (err != CL_SUCCESS) + { + printf ("Error: Failed to write to source array!\n"); + exit (1); + } +#endif + err = 0; + err = clSetKernelArg (kernel, 0, sizeof(cl_mem), &sq_input); + err |= clSetKernelArg (kernel, 1, sizeof(cl_mem), &sq_output); + err |= clSetKernelArg (kernel, 2, sizeof(problem_size), &problem_size); + if (err != CL_SUCCESS) + { + fprintf (stderr, + "Failed to set kernel arguments! %d\n", + err); + exit (1); + } + + // Get the maximum work group size for executing the kernel on the device + // FIXME: do this once when the kernel is created, not every time we run it! + err = clGetKernelWorkGroupInfo (kernel, + device_id, + CL_KERNEL_WORK_GROUP_SIZE, + sizeof(local), + &local, + NULL); + if (CL_SUCCESS != err) + { + fprintf (stderr, + "Error: Failed to retrieve kernel work group info! %d\n", err); + exit (1); + } + + // Execute the kernel over the entire range of our 1d input data set + // using the maximum number of work group items for this device + { + size_t global_problem_size = problem_size; + +#if 0 + fprintf (stderr, + "GPS: %llu - LPS: %llu\n", + (unsigned long long) problem_size, + (unsigned long long) local); +#endif + local = (local > problem_size) ? problem_size : local; + err = clEnqueueNDRangeKernel (commands, + kernel, + 1 /* work_dim */, + NULL /* global work offset */, + &global_problem_size, /* array of work_dim values */ + &local, + 0, /* num_events */ + NULL, /* event wait list */ + NULL /* event */); + } + if (CL_SUCCESS != err) + { + fprintf (stderr, + "Error: Failed to execute kernel (%d)!\n", + err); + exit (1); + } + + // Wait for the command commands to get serviced before reading back results + clFinish (commands); + + // Read back the results from the device to verify the output + err = clEnqueueReadBuffer (commands, + sq_output, + CL_TRUE /* blocking read */, + 0 /* offset */, + sizeof (float) * problem_size, + solution, + 0, /* num events in wait list */ + NULL /* wait list */, + NULL /* event */); + if (CL_SUCCESS != err) + { + fprintf (stderr, + "Error: Failed to read output array! %d\n", + err); + exit (1); + } + clReleaseMemObject (sq_output); + clReleaseMemObject (sq_input); +} + + +int +main (int argc, + char **argv) +{ + unsigned int offset = 0; + bool print_platforms = false; + cl_platform_id platform; + cl_device_id device_id; + cl_context context; + cl_command_queue commands; + cl_program program; + cl_kernel kernel; + + /* TODO: replace by getopt-style command-line parsing ... */ + if ( (NULL != argv[1]) && + (0 == strcmp (argv[1], + "list")) ) + print_platforms = true; + if ( (NULL != argv[1]) && + (0 == strncmp (argv[1], + "platform=", + strlen ("platform="))) ) + offset = atoi (&argv[1][strlen ("platform=")]); + + platform = select_platform (offset, + print_platforms); + device_id = select_device (platform); + context = create_compute_context (device_id); + commands = create_command_queue (device_id, + context); + program = compile_program (device_id, + context, + "square.cl"); + kernel = create_kernel (program, + "square"); + { + unsigned int scale = 1024 * 1024; + float *inputs = malloc (sizeof (float) * scale); + float *squares = malloc (sizeof (float) * scale); + + if ( (NULL == inputs) || + (NULL == squares) ) + { + fprintf (stderr, + "allocation failed (%s)\n", + strerror (errno)); + exit (1); + } + for (unsigned int i = 0; i<scale; i++) + inputs[i] = 1.0 * i + i * 0.01; + for (unsigned int b = 0; b < 10000; b++) + { + square_with_kernel (device_id, + context, + commands, + kernel, + scale, + inputs, + squares); + } + for (unsigned int i = 0; i<scale; i++) + if (squares[i] != (inputs[i] * inputs[i])) + fprintf (stderr, + "Bad computation (%u)\n", + i); + free (inputs); + free (squares); + } + clReleaseKernel (kernel); + clReleaseProgram (program); + clReleaseCommandQueue (commands); + clReleaseContext (context); + return 0; +} diff --git a/source/square.cl b/source/square.cl @@ -0,0 +1 @@ +../xcode/rsa-kernel.cl +\ No newline at end of file diff --git a/xcode/rsa-kernel.cl b/xcode/rsa-kernel.cl @@ -1,9 +1,12 @@ -__kernel void square(__global float* input, __global float* output, const unsigned int count) +__kernel void +square (__global float* input, + __global float* output, + const unsigned int count) { - int i = get_global_id(0); - - if(i < count) - output[i] = input[i] * input[i]; + int i = get_global_id(0); + + if(i < count) + output[i] = input[i] * input[i]; }