forked from pallas1/ccminer-bmw512
-
Notifications
You must be signed in to change notification settings - Fork 1
/
skein2.cpp
149 lines (113 loc) · 3.93 KB
/
skein2.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
/**
* SKEIN512 80 + SKEIN512 64 (Woodcoin)
* by tpruvot@github - 2015
*/
#include <string.h>
#include "sph/sph_skein.h"
#include "miner.h"
#include "cuda_helper.h"
static uint32_t *d_hash[MAX_GPUS];
extern void skein512_cpu_setBlock_80(void *pdata);
extern void skein512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int swap);
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
void skein2hash(void *output, const void *input)
{
uint32_t _ALIGN(64) hash[16];
sph_skein512_context ctx_skein;
sph_skein512_init(&ctx_skein);
sph_skein512(&ctx_skein, input, 80);
sph_skein512_close(&ctx_skein, hash);
sph_skein512_init(&ctx_skein);
sph_skein512(&ctx_skein, hash, 64);
sph_skein512_close(&ctx_skein, hash);
memcpy(output, (void*) hash, 32);
}
static bool init[MAX_GPUS] = { 0 };
int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
{
int dev_id = device_map[thr_id];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
uint32_t throughput = cuda_default_throughput(thr_id, 1U << 19); // 256*256*8
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0;
if (!init[thr_id])
{
cudaSetDevice(dev_id);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);
cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput);
quark_skein512_cpu_init(thr_id, throughput);
cuda_check_cpu_init(thr_id, throughput);
CUDA_SAFE_CALL(cudaDeviceSynchronize());
init[thr_id] = true;
}
uint32_t endiandata[20];
for (int k=0; k < 19; k++)
be32enc(&endiandata[k], pdata[k]);
skein512_cpu_setBlock_80((void*)endiandata);
cuda_check_cpu_setTarget(ptarget);
do {
int order = 0;
// Hash with CUDA
skein512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
*hashes_done = pdata[19] - first_nonce + throughput;
work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
if (work->nonces[0] != UINT32_MAX)
{
uint32_t _ALIGN(64) vhash[8];
endiandata[19] = swab32(work->nonces[0]);
skein2hash(vhash, endiandata);
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
work->valid_nonces = 1;
work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
work_set_target_ratio(work, vhash);
if (work->nonces[1] != 0) {
endiandata[19] = swab32(work->nonces[1]);
skein2hash(vhash, endiandata);
work->valid_nonces++;
bn_set_target_ratio(work, vhash, 1);
gpulog(LOG_DEBUG, thr_id, "found second nonce %08x!", endiandata[19]);
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;
} else {
pdata[19] = work->nonces[0] + 1; // cursor for next scan
}
return work->valid_nonces;
}
else if (vhash[7] > ptarget[7]) {
gpu_increment_reject(thr_id);
if (!opt_quiet)
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]);
pdata[19] = work->nonces[0] + 1;
continue;
}
}
if ((uint64_t) throughput + pdata[19] >= max_nonce) {
pdata[19] = max_nonce;
break;
}
pdata[19] += throughput;
} while (!work_restart[thr_id].restart);
*hashes_done = pdata[19] - first_nonce;
return 0;
}
// cleanup
void free_skein2(int thr_id)
{
if (!init[thr_id])
return;
cudaThreadSynchronize();
cudaFree(d_hash[thr_id]);
cuda_check_cpu_free(thr_id);
init[thr_id] = false;
cudaDeviceSynchronize();
}