commit 9d3cc30d284ec77c1068712886051c517ae2eeb7
parent 2ed17292b5a023671cd688a685e60c60c22b45ce
Author: Cedric <cedric.zwahlen@students.bfh.ch>
Date: Mon, 25 Dec 2023 15:27:05 +0100
Fix montgomery
Unoptimised version works
Diffstat:
8 files changed, 68 insertions(+), 63 deletions(-)
diff --git a/source/.DS_Store b/source/.DS_Store
Binary files differ.
diff --git a/source/montgomery.c b/source/montgomery.c
@@ -292,10 +292,10 @@ void erase_all( gpu_register *a, size_t n)
void shift_right(gpu_register *r, int n) {
- for (int i = 0; i < R; i++) {
+ for (int i = 0; i < R+1; i++) {
r[i] = r[i + n];
- r[i + n] = 0;
+ //r[i + n] = 0;
}
@@ -305,7 +305,7 @@ void shift_right(gpu_register *r, int n) {
int compare(gpu_register *r, gpu_register *l, int n) {
int x = 0;
- for (int i = 0; i < n; i++) {
+ for (int i = n - 1; i >= 0; i--) {
x = r[i] > l[i];
if (x) return 1;
x = r[i] < l[i];
@@ -335,14 +335,16 @@ void montMul( gpu_register *ret,
multiply(tmp_3,tmp_2,n,R);
- gpu_register i = add(tmp_2,tmp_1,tmp_3,R*2); // MARK: something gets lost in the carry
-
- if (i != 0) printf("non-zero add\n");
+ add(tmp_2,tmp_1,tmp_3,R*2+1); // MARK: something gets lost in the carry
shift_right(tmp_2, R);
- if (compare(tmp_2, n, R) >= 0) {
- subtract(ret, tmp_2, n, R);
+ erase_all(tmp_3, R*2);
+ equal(tmp_3, n, R);
+
+ if (compare(tmp_2, tmp_3, R+1) >= 0) {
+ printf("non-zero add\n");
+ subtract(ret, tmp_2, tmp_3, R+1);
} else {
equal(ret, tmp_2, R);
}
@@ -363,14 +365,16 @@ void montSqr( gpu_register *ret,
multiply(tmp_3,tmp_2,n,R);
- gpu_register i = add(tmp_2,tmp_1,tmp_3,R*2);
-
- if (i != 0) printf("non-zero add\n");
+ add(tmp_2,tmp_1,tmp_3,R*2+1);
shift_right(tmp_2, R);
- if (compare(tmp_2, n, R) >= 0) {
- subtract(ret, tmp_2, n, R);
+ erase_all(tmp_3, R*2);
+ equal(tmp_3, n, R);
+
+ if (compare(tmp_2, tmp_3, R+1) >= 0) {
+ printf("non-zero add\n");
+ subtract(ret, tmp_2, tmp_3, R+1);
} else {
equal(ret, tmp_2, R);
}
diff --git a/source/montmodmult.c b/source/montmodmult.c
@@ -235,9 +235,9 @@ int modmult_opencl_prepare(struct gpu_info *info, struct gpu_state_alt *state,
state->exp_mem = clCreateBuffer(info->context, CL_MEM_READ_ONLY, n * sizeof(gpu_register), NULL, NULL);
- state->tmp_1 = clCreateBuffer(info->context, CL_MEM_READ_WRITE, len * 2, NULL, NULL);
- state->tmp_2 = clCreateBuffer(info->context, CL_MEM_READ_WRITE, len * 2, NULL, NULL);
- state->tmp_3 = clCreateBuffer(info->context, CL_MEM_READ_WRITE, len * 2, NULL, NULL);
+ state->tmp_1 = clCreateBuffer(info->context, CL_MEM_READ_WRITE, len * 2 + sizeof(gpu_register), NULL, NULL);
+ state->tmp_2 = clCreateBuffer(info->context, CL_MEM_READ_WRITE, len * 2 + sizeof(gpu_register), NULL, NULL);
+ state->tmp_3 = clCreateBuffer(info->context, CL_MEM_READ_WRITE, len * 2 + sizeof(gpu_register), NULL, NULL);
state->pks_indices = clCreateBuffer(info->context, CL_MEM_READ_ONLY, sizeof(unsigned long) * (pk + 1),NULL, NULL);
@@ -247,15 +247,15 @@ int modmult_opencl_prepare(struct gpu_info *info, struct gpu_state_alt *state,
exit(1);
}
- void *tmp_1 = malloc(len * 2);
- void *tmp_2 = malloc(len * 2);
- void *tmp_3 = malloc(len * 2);
+ void *tmp_1 = malloc(len * 2 + sizeof(gpu_register));
+ void *tmp_2 = malloc(len * 2 + sizeof(gpu_register));
+ void *tmp_3 = malloc(len * 2 + sizeof(gpu_register));
//void *res = malloc(len);
- memset(tmp_1, 0, len * 2);
- memset(tmp_2, 0, len * 2);
- memset(tmp_3, 0, len * 2);
+ memset(tmp_1, 0, len * 2 + sizeof(gpu_register));
+ memset(tmp_2, 0, len * 2 + sizeof(gpu_register));
+ memset(tmp_3, 0, len * 2 + sizeof(gpu_register));
//memset(res, 0, len);
// Write our data set into the input array in device memory
@@ -269,9 +269,9 @@ int modmult_opencl_prepare(struct gpu_info *info, struct gpu_state_alt *state,
err |= clEnqueueWriteBuffer(info->commands, state->exp_mem, CL_TRUE, 0, n * sizeof(gpu_register), exp_buf, 0, NULL, NULL);
- err |= clEnqueueWriteBuffer(info->commands, state->tmp_1, CL_TRUE, 0, len * 2, tmp_1, 0, NULL, NULL);
- err |= clEnqueueWriteBuffer(info->commands, state->tmp_2, CL_TRUE, 0, len * 2, tmp_2, 0, NULL, NULL);
- err |= clEnqueueWriteBuffer(info->commands, state->tmp_3, CL_TRUE, 0, len * 2, tmp_3, 0, NULL, NULL);
+ err |= clEnqueueWriteBuffer(info->commands, state->tmp_1, CL_TRUE, 0, len * 2 + sizeof(gpu_register), tmp_1, 0, NULL, NULL);
+ err |= clEnqueueWriteBuffer(info->commands, state->tmp_2, CL_TRUE, 0, len * 2 + sizeof(gpu_register), tmp_2, 0, NULL, NULL);
+ err |= clEnqueueWriteBuffer(info->commands, state->tmp_3, CL_TRUE, 0, len * 2 + sizeof(gpu_register), tmp_3, 0, NULL, NULL);
err |= clEnqueueWriteBuffer(info->commands, state->pks_indices, CL_TRUE, 0, sizeof(unsigned long) * (pk + 1), pks, 0, NULL, NULL);
if (err != CL_SUCCESS)
@@ -479,6 +479,7 @@ void modmult_opencl_release(struct gpu_state_alt *state) {
clReleaseMemObject(state->tmp_1);
clReleaseMemObject(state->tmp_2);
+ clReleaseMemObject(state->tmp_3);
clReleaseMemObject(state->pks_indices);
}
@@ -537,15 +538,15 @@ void modmult_gpu_execute(struct gpu_info *info, struct gpu_state_alt *state,
void *res = malloc(len);
- void *tmp_1 = malloc(len * 2);
- void *tmp_2 = malloc(len * 2);
- void *tmp_3 = malloc(len * 2);
+ void *tmp_1 = malloc(len * 2 + sizeof(gpu_register)); // double-size, to hold multiplications, and plus the word size, for worst case of adding 2 double size words
+ void *tmp_2 = malloc(len * 2 + sizeof(gpu_register));
+ void *tmp_3 = malloc(len * 2 + sizeof(gpu_register));
//void *res = malloc(len);
- memset(tmp_1, 0, len * 2);
- memset(tmp_2, 0, len * 2);
- memset(tmp_3, 0, len * 2);
+ memset(tmp_1, 0, len * 2 + sizeof(gpu_register));
+ memset(tmp_2, 0, len * 2 + sizeof(gpu_register));
+ memset(tmp_3, 0, len * 2 + sizeof(gpu_register));
memset(res, 0, len);
diff --git a/xcode/.DS_Store b/xcode/.DS_Store
Binary files differ.
diff --git a/xcode/lib-gpu-generate/msgsig.txt b/xcode/lib-gpu-generate/msgsig.txt
@@ -1,2 +1,2 @@
-672CF1AA50D19C0B
-041605674572DEFC
+2472447B46D075CA
+083C9D3EF9FBB87346BF102F9C713790A182E54F4128ABA6363EB42AB259C7BBF04B815784EF231A596C7281FC9E62CE85577BBE863C0306303747B5A72F23C109B52B658CE5978B601A880913757C3CCEC3E4DB0ACA30487366F51767CA5FD5743C2D4A894FC275F0823CC34E240769A78114CE804792C6C69DAFBF085E12BF0D7FCBE4CE114C688108EDA1586358E4DB10BCAC063E5BB1D6DDC51B7251E2EA9C6FBB33002DED77269DE3D331B0FB980449F28E745EDAAE4035D956670BA72D24F84611797693F12CE7C7EB25940BC5186EB0D4D7C6AEDC67DEE6B087BE288F2605FC645AC9D48ED54B0CB1F41EB6347EE4A086D120EF26FEA4A4217AAAC2AC
diff --git a/xcode/lib-gpu-generate/publickey.txt b/xcode/lib-gpu-generate/publickey.txt
@@ -1,3 +1,3 @@
-BDFD5533442211
-34

+010001
0
diff --git a/xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcuserdata/cedriczwahlen.xcuserdatad/UserInterfaceState.xcuserstate b/xcode/lib-gpu-verify.xcodeproj/project.xcworkspace/xcuserdata/cedriczwahlen.xcuserdatad/UserInterfaceState.xcuserstate
Binary files differ.
diff --git a/xcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist b/xcode/lib-gpu-verify.xcodeproj/xcuserdata/cedriczwahlen.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist
@@ -3227,8 +3227,8 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "601"
- endingLineNumber = "601"
+ startingLineNumber = "602"
+ endingLineNumber = "602"
landmarkName = "mont_modmult_tests()"
landmarkType = "9">
<Locations>
@@ -3291,8 +3291,8 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "651"
- endingLineNumber = "651"
+ startingLineNumber = "652"
+ endingLineNumber = "652"
landmarkName = "mont_modmult_tests()"
landmarkType = "9">
<Locations>
@@ -4004,8 +4004,8 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "575"
- endingLineNumber = "575"
+ startingLineNumber = "576"
+ endingLineNumber = "576"
landmarkName = "modmult_gpu_execute(info, state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, mod_buf, s_buf, pks, n)"
landmarkType = "9">
<Locations>
@@ -4833,8 +4833,8 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "562"
- endingLineNumber = "562"
+ startingLineNumber = "563"
+ endingLineNumber = "563"
landmarkName = "modmult_gpu_execute(info, state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, mod_buf, s_buf, pks, n)"
landmarkType = "9">
<Locations>
@@ -4926,8 +4926,8 @@
filePath = "../source/montgomery.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "390"
- endingLineNumber = "390"
+ startingLineNumber = "394"
+ endingLineNumber = "394"
landmarkName = "mont(x, m, res, n, ni, exp, tmp_1, tmp_2, tmp_3, pks)"
landmarkType = "9">
</BreakpointContent>
@@ -4942,8 +4942,8 @@
filePath = "../source/montgomery.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "358"
- endingLineNumber = "358"
+ startingLineNumber = "360"
+ endingLineNumber = "360"
landmarkName = "montSqr(ret, a, ni, n, tmp_1, tmp_2, tmp_3)"
landmarkType = "9">
<Locations>
@@ -5051,8 +5051,8 @@
filePath = "../source/montgomery.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "366"
- endingLineNumber = "366"
+ startingLineNumber = "368"
+ endingLineNumber = "368"
landmarkName = "montSqr(ret, a, ni, n, tmp_1, tmp_2, tmp_3)"
landmarkType = "9">
<Locations>
@@ -5219,8 +5219,8 @@
filePath = "../source/montgomery.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "432"
- endingLineNumber = "432"
+ startingLineNumber = "436"
+ endingLineNumber = "436"
landmarkName = "mont(x, m, res, n, ni, exp, tmp_1, tmp_2, tmp_3, pks)"
landmarkType = "9">
<Locations>
@@ -5372,8 +5372,8 @@
filePath = "../source/montgomery.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "411"
- endingLineNumber = "411"
+ startingLineNumber = "415"
+ endingLineNumber = "415"
landmarkName = "mont(x, m, res, n, ni, exp, tmp_1, tmp_2, tmp_3, pks)"
landmarkType = "9">
<Locations>
@@ -5525,8 +5525,8 @@
filePath = "../source/montgomery.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "415"
- endingLineNumber = "415"
+ startingLineNumber = "419"
+ endingLineNumber = "419"
landmarkName = "mont(x, m, res, n, ni, exp, tmp_1, tmp_2, tmp_3, pks)"
landmarkType = "9">
</BreakpointContent>
@@ -5701,8 +5701,8 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "644"
- endingLineNumber = "644"
+ startingLineNumber = "645"
+ endingLineNumber = "645"
landmarkName = "mont_modmult_tests()"
landmarkType = "9">
</BreakpointContent>
@@ -5717,8 +5717,8 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "534"
- endingLineNumber = "534"
+ startingLineNumber = "535"
+ endingLineNumber = "535"
landmarkName = "modmult_gpu_execute(info, state, x_buf, m_buf, r_1_buf, n_buf, ni_buf, msg_buf, exp_buf, mod_buf, s_buf, pks, n)"
landmarkType = "9">
</BreakpointContent>
@@ -5733,8 +5733,8 @@
filePath = "../source/montmodmult.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "605"
- endingLineNumber = "605"
+ startingLineNumber = "606"
+ endingLineNumber = "606"
landmarkName = "mont_modmult_tests()"
landmarkType = "9">
</BreakpointContent>
@@ -5765,8 +5765,8 @@
filePath = "../source/montgomery.c"
startingColumnNumber = "9223372036854775807"
endingColumnNumber = "9223372036854775807"
- startingLineNumber = "420"
- endingLineNumber = "420"
+ startingLineNumber = "424"
+ endingLineNumber = "424"
landmarkName = "mont(x, m, res, n, ni, exp, tmp_1, tmp_2, tmp_3, pks)"
landmarkType = "9">
<Locations>