-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.cpp
435 lines (337 loc) · 12.3 KB
/
main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
#include <stdio.h>
#include <time.h>
#include <set>
#include <thread>
#include <list>
#include <mutex>
#include <iostream>
#ifdef USE_GMP
#include <gmp.h>
#else //USE_GMP
#include "mpir.h"
#endif //USE_GMP
#include "targets.h"
#include "ActiveSocket.h"
//#include "vld.h"
static const char *VERSION = "0.8-opencl";
void leftmost_bit(mpz_t &out, const mpz_t &x)
{
//assert(mpz_cmp_ui(x, 0) > 0);
mpz_t r;
mpz_t r2;
mpz_init(r);
mpz_set_ui(r, 1);
mpz_init(r2);
while(mpz_cmp(r, x) <= 0)
{
mpz_mul_ui(r2, r, 2);
mpz_set(r, r2);
}
mpz_div_ui(out, r, 2);
mpz_clear(r2);
mpz_clear(r);
}
// horrible raw cpp includes! I'm so lazy. deal with it.
#include "OpenCLFramework.h"
#include "result.h"
#include "curvefp.h"
#include "point.h"
static std::mutex countMutex;
static std::mutex resultMutex;
static std::mutex statsMutex;
static std::list<Result> resultList;
static unsigned int tryCount = 0;
static unsigned int resultCount = 0;
static unsigned int duplicateCount = 0;
static unsigned int rejectCount = 0;
static unsigned int queuedCount = 0;
void work()
{
/**
Evil-Knievel: Before starting OpenCL we must get the starting point, these methods are 1:1 from gh2k's port!
**/
static const unsigned int tries = 100;
uint64 seed64 = (uint64)time(0) << 32 | static_cast<uint32>(std::hash<std::thread::id>()(std::this_thread::get_id()));
uint32 seed32 = static_cast<uint32>(time(0)) ^ static_cast<uint32>(std::hash<std::thread::id>()(std::this_thread::get_id()));
srand(seed32);
std::set<uint32> targets;
initTargets(targets);
mpz_t seed;
mpz_init(seed);
mpz_set_ui(seed, seed64);
gmp_randstate_t randstate;
gmp_randinit_mt(randstate);
gmp_randseed(randstate, seed);
mpz_t p;
mpz_t b;
mpz_t a;
mpz_t bitmask_32;
mpz_t temp_integer;
mpz_init(a);
mpz_set_ui(a, 0);
mpz_init(b);
mpz_set_ui(b, 7);
mpz_init(p);
mpz_set_str(p, "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F", 16);
mpz_init(bitmask_32);
mpz_set_str(bitmask_32, "FFFFFFFF", 16);
mpz_init(temp_integer);
mpz_t gx;
mpz_t gy;
mpz_t r;
mpz_init(gx);
mpz_init(gy);
mpz_init(r);
mpz_set_str(gx, "79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798", 16);
mpz_set_str(gy, "483ada7726a3c4655da4fbfc0e1108a8fd17b448a68554199c47d08ffb10d4b8", 16);
mpz_set_str(r, "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141", 16);
CurveFp curve(p, a, b);
Point point(curve, gx, gy, r);
Point g(point);
mpz_t priv;
mpz_init(priv);
mpz_urandomb(priv, randstate, 256);
//mpz_set_str(priv, "1", 16);
mpz_t temppriv;
mpz_init_set(temppriv, priv);
Point myp = g*priv;
/**
Evil-Knievel: Create all OpenCL Related Entities here. We should think about moving it to OpenCLFramework.cpp
but I am a C++ noob so I make it quick and dirty.
**/
// Create Function Instance
OpenCLFramework clFunctions;
// open context
cl::Context context(CL_DEVICE_TYPE_GPU);
std::vector<cl::Device> devices= context.getInfo<CL_CONTEXT_DEVICES>();
std::cout << "(opencl) will use the following device: " << devices[0].getInfo<CL_DEVICE_NAME>() << std::endl;
//Make a queue to put jobs on the first compute device
cl::CommandQueue queue(context, devices[0]);
// create kernel from cl file
cl::Program program = clFunctions.buildProgramFromSource(&context, "kernel/evilknievel.cl", "");
// Host Memory Stuff that will be mapped to a cl::Buffer somewhere
bignum points_in[2]; // <------ here goes the starting point, e.g. random myp point
//bignum points_found[2];
cl_uint found_cell_index = 0;
cl_uint found = 0;
// One time initialization of points_in from myp! (ATTENTION: THIS IS A REALLY SHITTY DATA STRUCTURE HERE
mpz_t shifter;
mpz_init(shifter);
mpz_t holder;
mpz_init(holder);
mpz_set(shifter, myp.x());
for (int i = 0; i < 8; ++i){
mpz_and(holder, shifter, bitmask_32);
points_in[0].d[i] = (cl_uint)mpz_get_ui(holder);
mpz_fdiv_q_2exp(shifter, shifter, 32);
}
mpz_set(shifter, myp.y());
for (int i = 0; i < 8; ++i){
mpz_and(holder, shifter, bitmask_32);
points_in[1].d[i] = (cl_uint)mpz_get_ui(holder);
mpz_fdiv_q_2exp(shifter, shifter, 32);
}
mpz_clear(holder);
mpz_clear(shifter);
// create buffers for CL Device
cl::Buffer found_buffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_uint)* 1);
cl::Buffer found_cell(context, CL_MEM_READ_WRITE, sizeof(cl_uint)* 1);
cl::Buffer points_in_buffer(context, CL_MEM_READ_WRITE /* | CL_MEM_COPY_HOST_PTR */, sizeof(bignum)* 2);
//cl::Buffer found_point(context, CL_MEM_WRITE_ONLY /* | CL_MEM_COPY_HOST_PTR */, sizeof(bignum)* 2);
// Write necessary buffers
queue.enqueueWriteBuffer(found_cell, CL_TRUE, 0, sizeof(cl_uint)* 1, &found_cell_index);
queue.enqueueWriteBuffer(points_in_buffer, CL_TRUE, 0, sizeof(bignum)* 2, points_in);
// Prepare Opencl Kernel for execution
cl::Kernel kernel = cl::Kernel(program, "search_triplets");
kernel.setArg(0, found_buffer);
kernel.setArg(1, points_in_buffer);
//kernel.setArg(2, found_point);
kernel.setArg(2, found_cell);
unsigned int i_global_work_size = 8192; // <------- these are hardcoded, maybe we need some autodetection like cgminer does !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// IF YOU CHANGE THIS ALSO CHANGE THE 81919 AT THE END OF THE KERNEL FILE TO THE NEW VALUE-1 !!!!!!! <---------------- VERY VERY IMPORTANT
unsigned int i_local_work_size = 256; // <------- these are hardcoded, maybe we need some autodetection like cgminer does !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
/* it is ugly to create this mpz_number here, but we have to do this after setting global work size*/
mpz_t work_size_incrementor;
mpz_init(work_size_incrementor);
mpz_set_ui(work_size_incrementor, i_global_work_size);
// init a fixed point to be added each iteration
Point g_worksize_increment = g*work_size_incrementor;
const cl::NDRange global_work_offset(0, 0);
const cl::NDRange global_work_size(i_global_work_size, 1, 1);
const cl::NDRange local_work_size(i_local_work_size, 1, 1);
int test = 1;
while( true )
{
// Launch the kernel and do 81920 parallel calculations
int ret = queue.enqueueNDRangeKernel(kernel, global_work_offset, global_work_size, local_work_size, NULL, NULL);
if (ret != CL_SUCCESS){
std::cerr << "clEnqueueNDRangeKernel fail. Error " << ret << std::endl;
if (ret == CL_OUT_OF_RESOURCES){
std::cerr << "Kernel out of resources!!! Try lowering global/local worksize in the source code! You GPU seems to be 'low end'" << std::endl;
}
}
// Now check if the kernel reported a found point back
queue.enqueueReadBuffer(found_buffer, CL_TRUE, 0, sizeof(cl_int)* 1, &found);
if (found > 0){
// Yes, the GPU actually did find a value triplet.
// Now let us read the result from the GPU!!!
// WARNING: AT THE MOMENT THE GPU CAN ONLY FIND ONE TRIPLET PER KERNEL/RUN and we have only one found and one points_found variable shared with the kernel
queue.enqueueReadBuffer(found_cell, CL_TRUE, 0, sizeof(cl_uint)* 1, &found_cell_index); // <------ found index is the thread number which actually found the triplet
// (RATHER THAN GETTING THIS INFO FROM THE GPU, I TRY TO CALC THE X,Y A FEW LINES BELOW THIS ON MY OWN! NOT SURE IF ITS THE BEST IDEA ....... queue.enqueueReadBuffer(found_point, CL_TRUE, 0, sizeof(bignum)* 2, points_found); // <----- and here the found x and y are given back
found = 0; // reset found
queue.enqueueWriteBuffer(found_buffer, CL_TRUE, 0, sizeof(cl_int)* 1, &found); // also reset found variable on GPU memory
/**
At this point we can reconstruct the triplet from points_found[0], points_found[1] and temppriv!!!
in face we dont take points_found for now but calculate the found point from myp and temppriv and found_cell_index
**/
// IMPORTANT!! THIS HERE IS VERY INEFFICIENT, MAYBE YOU GUYS HAVE A BETTER IDEA!
//std::cout << "FOUND CELL_INDEX: " << (test+found_cell_index) << std::endl;
Point myp_found = myp;
mpz_t found_cell_index_mpz;
mpz_init(found_cell_index_mpz);
mpz_set_ui(found_cell_index_mpz, found_cell_index);
Point adding_found_index_offset = g*found_cell_index_mpz;
myp_found.add(adding_found_index_offset);
mpz_add(temppriv, temppriv, found_cell_index_mpz);
resultMutex.lock();
resultList.push_back(Result(myp_found.x(), myp_found.y(), temppriv));
resultMutex.unlock();
mpz_sub(temppriv, temppriv, found_cell_index_mpz); // revert temppriv as it is going to be incremented at the end of the loop anyway
mpz_clear(found_cell_index_mpz);
}
// Before doing anything else, increment temppriv by the number of iterations we have done so far!
mpz_add(temppriv, temppriv, work_size_incrementor);
// and also update our starting point here so we keep track how far we moved already
myp.add(g_worksize_increment);
countMutex.lock();
test += i_global_work_size;
tryCount += i_global_work_size; // We have actually did so many tries as the global_work_size says as we are having this many parallel threads on the GPU
countMutex.unlock();
/**
resultMutex.lock();
resultList.push_back(Result(myp.x(), myp.y(), temppriv));
resultMutex.unlock();
countMutex.lock();
tryCount += tries;
countMutex.unlock();**/
}
mpz_clear(work_size_incrementor);
mpz_clear(gx);
mpz_clear(gy);
mpz_clear(r);
mpz_clear(p);
mpz_clear(b);
mpz_clear(a);
mpz_clear(priv);
mpz_clear(temppriv);
mpz_clear(seed);
mpz_clear(bitmask_32);
mpz_clear(temp_integer);
}
int submit_share(const std::string &btc, const std::string &result)
{
std::string outbuf;
outbuf += "result ";
outbuf += btc;
outbuf += " ";
outbuf += result;
outbuf += "\n";
CActiveSocket socket;
socket.Initialize();
if (!socket.Open((const uint8 *)"stargate.bitwarrant.com", 4444))
//if (!socket.Open((const uint8 *)"localhost", 4444))
return -1;
if (!socket.Send((const uint8 *)outbuf.data(), outbuf.size()))
return -1;
int r = 0;
char resp[1024] = {};
if (socket.Receive(1024) > 0)
{
memcpy(resp, socket.GetData(), socket.GetBytesReceived());
if (strstr(resp, "submission is great"))
r = 1;
if (strstr(resp, "submission is already here"))
r = 2;
}
socket.Close();
return r;
}
void submitter(const std::string btc)
{
std::list<Result> queuedResults;
std::chrono::milliseconds sleeptime( 100 );
while( true )
{
resultMutex.lock();
queuedResults.splice(queuedResults.end(), resultList);
resultMutex.unlock();
std::list<Result> heldResults;
if (queuedResults.size())
{
Result nextResult(queuedResults.front());
queuedResults.pop_front();
std::string rs(nextResult.to_s());
int r = submit_share(btc, rs);
statsMutex.lock();
switch (r)
{
case -1:
heldResults.push_back(nextResult);
break;
case 1:
++ resultCount;
break;
case 0:
++ rejectCount;
break;
case 2:
++ duplicateCount;
break;
}
queuedCount = static_cast<unsigned int>(queuedResults.size() + heldResults.size());
statsMutex.unlock();
queuedResults.splice(queuedResults.end(), heldResults);
} else {
std::this_thread::sleep_for( sleeptime );
}
}
}
int main(int argc, char **argv)
{
if ( argc != 2 )
{
printf("Usage: %s bitcoinaddress\n", argv[0]);
return -1;
}
std::string btc(argv[1]);
std::list<Result> held_results;
printf("C++ BTC address space analyzer by gh2k (modified to support OpenCL), v%s\n", VERSION);
puts("Tips: 1gh2k13JzLTxDz2vmo8RH7HcjTLLg5kdc ;-)");
puts("");
printf("Starting OpenCL kernel. Hang on to your hats...\n");
printf("Sending payouts to %s\n", btc.data());
std::list<std::thread *> threads;
// EVILKNIEVEL: For threads only one worker thread, as the parallelization will be done on the GPU itself!
threads.push_back(new std::thread(work));
threads.push_back(new std::thread(submitter, btc));
std::chrono::milliseconds sleeptime( 100 );
time_t secs = time(0);
time_t starttime = time(0);
while( true )
{
std::this_thread::sleep_for( sleeptime );
time_t curtime = time(0);
if (curtime > secs + 10)
{
secs = curtime;
countMutex.lock();
unsigned int curtries = tryCount;
tryCount = 0;
countMutex.unlock();
statsMutex.lock();
double resultsPerHour = static_cast<double>(resultCount) / (static_cast<double>(curtime - starttime) / 60.0 / 60.0);
printf("%d keys/sec - %d accepted (%.2f / hour) - %d rej, %d dup, %d que\n", curtries / 10, resultCount, resultsPerHour, rejectCount, duplicateCount, queuedCount);
statsMutex.unlock();
}
}
}