-
Notifications
You must be signed in to change notification settings - Fork 3
/
sha3.cu
298 lines (247 loc) · 9.46 KB
/
sha3.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
/* -------------------------------------------------------------------------
* Works when compiled for either 32-bit or 64-bit targets, optimized for
* 64 bit.
*
* Canonical implementation of Init/Update/Finalize for SHA-3 byte input.
*
* SHA3-256, SHA3-384, SHA-512 are implemented. SHA-224 can easily be added.
*
* Based on code from http://keccak.noekeon.org/ .
*
* I place the code that I wrote into public domain, free to use.
*
* I would appreciate if you give credits to this work if you used it to
* write or test * your code.
*
* Aug 2015. Andrey Jivsov. [email protected]
* ---------------------------------------------------------------------- */
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include "sha3.cuh"
#define SHA3_ASSERT(x)
#define SHA3_TRACE(format, ...)
#define SHA3_TRACE_BUF(format, buf, l)
/*
* This flag is used to configure "pure" Keccak, as opposed to NIST SHA3.
*/
#define SHA3_USE_KECCAK_FLAG 0x80000000
#define SHA3_CW(x) ((x) & (~SHA3_USE_KECCAK_FLAG))
#if defined(_MSC_VER)
#define SHA3_CONST(x) x
#else
#define SHA3_CONST(x) x##L
#endif
#ifndef SHA3_ROTL64
#define SHA3_ROTL64(x, y) \
(((x) << (y)) | ((x) >> ((sizeof(uint64_t) * 8) - (y))))
#endif
__device__ static const uint64_t keccakf_rndc[24] = {
SHA3_CONST(0x0000000000000001UL), SHA3_CONST(0x0000000000008082UL),
SHA3_CONST(0x800000000000808aUL), SHA3_CONST(0x8000000080008000UL),
SHA3_CONST(0x000000000000808bUL), SHA3_CONST(0x0000000080000001UL),
SHA3_CONST(0x8000000080008081UL), SHA3_CONST(0x8000000000008009UL),
SHA3_CONST(0x000000000000008aUL), SHA3_CONST(0x0000000000000088UL),
SHA3_CONST(0x0000000080008009UL), SHA3_CONST(0x000000008000000aUL),
SHA3_CONST(0x000000008000808bUL), SHA3_CONST(0x800000000000008bUL),
SHA3_CONST(0x8000000000008089UL), SHA3_CONST(0x8000000000008003UL),
SHA3_CONST(0x8000000000008002UL), SHA3_CONST(0x8000000000000080UL),
SHA3_CONST(0x000000000000800aUL), SHA3_CONST(0x800000008000000aUL),
SHA3_CONST(0x8000000080008081UL), SHA3_CONST(0x8000000000008080UL),
SHA3_CONST(0x0000000080000001UL), SHA3_CONST(0x8000000080008008UL)};
__device__ static const unsigned keccakf_rotc[24] = {
1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44};
__device__ static const unsigned keccakf_piln[24] = {
10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1};
/* generally called after SHA3_KECCAK_SPONGE_WORDS-ctx->capacityWords words
* are XORed into the state s
*/
__device__ static void keccakf(uint64_t s[25]) {
int i, j, round;
uint64_t t, bc[5];
#define KECCAK_ROUNDS 24
for (round = 0; round < KECCAK_ROUNDS; round++) {
/* Theta */
for (i = 0; i < 5; i++)
bc[i] = s[i] ^ s[i + 5] ^ s[i + 10] ^ s[i + 15] ^ s[i + 20];
for (i = 0; i < 5; i++) {
t = bc[(i + 4) % 5] ^ SHA3_ROTL64(bc[(i + 1) % 5], 1);
for (j = 0; j < 25; j += 5) s[j + i] ^= t;
}
/* Rho Pi */
t = s[1];
for (i = 0; i < 24; i++) {
j = keccakf_piln[i];
bc[0] = s[j];
s[j] = SHA3_ROTL64(t, keccakf_rotc[i]);
t = bc[0];
}
/* Chi */
for (j = 0; j < 25; j += 5) {
for (i = 0; i < 5; i++) bc[i] = s[j + i];
for (i = 0; i < 5; i++)
s[j + i] ^= (~bc[(i + 1) % 5]) & bc[(i + 2) % 5];
}
/* Iota */
s[0] ^= keccakf_rndc[round];
}
}
/* *************************** Public Inteface ************************ */
/* For Init or Reset call these: */
__device__ sha3_return_t sha3_Init(void *priv, unsigned bitSize) {
sha3_context *ctx = (sha3_context *)priv;
if (bitSize != 256 && bitSize != 384 && bitSize != 512)
return SHA3_RETURN_BAD_PARAMS;
memset(ctx, 0, sizeof(*ctx));
ctx->capacityWords = 2 * bitSize / (8 * sizeof(uint64_t));
return SHA3_RETURN_OK;
}
__device__ void sha3_Init256(void *priv) { sha3_Init(priv, 256); }
__device__ void sha3_Init384(void *priv) { sha3_Init(priv, 384); }
__device__ void sha3_Init512(void *priv) { sha3_Init(priv, 512); }
// enum SHA3_FLAGS {
// SHA3_FLAGS_NONE=0,
// SHA3_FLAGS_KECCAK=1
// };
__device__ enum SHA3_FLAGS sha3_SetFlags(void *priv, enum SHA3_FLAGS flags) {
sha3_context *ctx = (sha3_context *)priv;
flags = SHA3_FLAGS_KECCAK;
ctx->capacityWords |=
(flags == SHA3_FLAGS_KECCAK ? SHA3_USE_KECCAK_FLAG : 0);
return flags;
}
__device__ void sha3_Update(void *priv, void const *bufIn, size_t len) {
sha3_context *ctx = (sha3_context *)priv;
/* 0...7 -- how much is needed to have a word */
unsigned old_tail = (8 - ctx->byteIndex) & 7;
size_t words;
unsigned tail;
size_t i;
const uint8_t *buf = (const uint8_t *)bufIn;
SHA3_TRACE_BUF("called to update with:", buf, len);
SHA3_ASSERT(ctx->byteIndex < 8);
SHA3_ASSERT(ctx->wordIndex < sizeof(ctx->u.s) / sizeof(ctx->u.s[0]));
if (len < old_tail) { /* have no complete word or haven't started
* the word yet */
SHA3_TRACE("because %d<%d, store it and return", (unsigned)len,
(unsigned)old_tail);
/* endian-independent code follows: */
while (len--)
ctx->saved |= (uint64_t)(*(buf++)) << ((ctx->byteIndex++) * 8);
SHA3_ASSERT(ctx->byteIndex < 8);
return;
}
if (old_tail) { /* will have one word to process */
SHA3_TRACE("completing one word with %d bytes", (unsigned)old_tail);
/* endian-independent code follows: */
len -= old_tail;
while (old_tail--)
ctx->saved |= (uint64_t)(*(buf++)) << ((ctx->byteIndex++) * 8);
/* now ready to add saved to the sponge */
ctx->u.s[ctx->wordIndex] ^= ctx->saved;
SHA3_ASSERT(ctx->byteIndex == 8);
ctx->byteIndex = 0;
ctx->saved = 0;
if (++ctx->wordIndex ==
(SHA3_KECCAK_SPONGE_WORDS - SHA3_CW(ctx->capacityWords))) {
keccakf(ctx->u.s);
ctx->wordIndex = 0;
}
}
/* now work in full words directly from input */
SHA3_ASSERT(ctx->byteIndex == 0);
words = len / sizeof(uint64_t);
tail = len - words * sizeof(uint64_t);
SHA3_TRACE("have %d full words to process", (unsigned)words);
for (i = 0; i < words; i++, buf += sizeof(uint64_t)) {
const uint64_t t =
(uint64_t)(buf[0]) | ((uint64_t)(buf[1]) << 8 * 1) |
((uint64_t)(buf[2]) << 8 * 2) | ((uint64_t)(buf[3]) << 8 * 3) |
((uint64_t)(buf[4]) << 8 * 4) | ((uint64_t)(buf[5]) << 8 * 5) |
((uint64_t)(buf[6]) << 8 * 6) | ((uint64_t)(buf[7]) << 8 * 7);
#if defined(__x86_64__) || defined(__i386__)
SHA3_ASSERT(memcmp(&t, buf, 8) == 0);
#endif
ctx->u.s[ctx->wordIndex] ^= t;
if (++ctx->wordIndex ==
(SHA3_KECCAK_SPONGE_WORDS - SHA3_CW(ctx->capacityWords))) {
keccakf(ctx->u.s);
ctx->wordIndex = 0;
}
}
SHA3_TRACE("have %d bytes left to process, save them", (unsigned)tail);
/* finally, save the partial word */
SHA3_ASSERT(ctx->byteIndex == 0 && tail < 8);
while (tail--) {
SHA3_TRACE("Store byte %02x '%c'", *buf, *buf);
ctx->saved |= (uint64_t)(*(buf++)) << ((ctx->byteIndex++) * 8);
}
SHA3_ASSERT(ctx->byteIndex < 8);
SHA3_TRACE("Have saved=0x%016" PRIx64 " at the end", ctx->saved);
}
/* This is simply the 'update' with the padding block.
* The padding block is 0x01 || 0x00* || 0x80. First 0x01 and last 0x80
* bytes are always present, but they can be the same byte.
*/
__device__ void const *sha3_Finalize(void *priv) {
sha3_context *ctx = (sha3_context *)priv;
SHA3_TRACE("called with %d bytes in the buffer", ctx->byteIndex);
/* Append 2-bit suffix 01, per SHA-3 spec. Instead of 1 for padding we
* use 1<<2 below. The 0x02 below corresponds to the suffix 01.
* Overall, we feed 0, then 1, and finally 1 to start padding. Without
* M || 01, we would simply use 1 to start padding. */
uint64_t t;
if (ctx->capacityWords & SHA3_USE_KECCAK_FLAG) {
/* Keccak version */
t = (uint64_t)(((uint64_t)1) << (ctx->byteIndex * 8));
} else {
/* SHA3 version */
t = (uint64_t)(((uint64_t)(0x02 | (1 << 2))) << ((ctx->byteIndex) * 8));
}
ctx->u.s[ctx->wordIndex] ^= ctx->saved ^ t;
ctx->u.s[SHA3_KECCAK_SPONGE_WORDS - SHA3_CW(ctx->capacityWords) - 1] ^=
SHA3_CONST(0x8000000000000000UL);
keccakf(ctx->u.s);
/* Return first bytes of the ctx->s. This conversion is not needed for
* little-endian platforms e.g. wrap with #if !defined(__BYTE_ORDER__)
* || !defined(__ORDER_LITTLE_ENDIAN__) ||
* __BYTE_ORDER__!=__ORDER_LITTLE_ENDIAN__
* ... the conversion below ...
* #endif */
{
unsigned i;
for (i = 0; i < SHA3_KECCAK_SPONGE_WORDS; i++) {
const unsigned t1 = (uint32_t)ctx->u.s[i];
const unsigned t2 = (uint32_t)((ctx->u.s[i] >> 16) >> 16);
ctx->u.sb[i * 8 + 0] = (uint8_t)(t1);
ctx->u.sb[i * 8 + 1] = (uint8_t)(t1 >> 8);
ctx->u.sb[i * 8 + 2] = (uint8_t)(t1 >> 16);
ctx->u.sb[i * 8 + 3] = (uint8_t)(t1 >> 24);
ctx->u.sb[i * 8 + 4] = (uint8_t)(t2);
ctx->u.sb[i * 8 + 5] = (uint8_t)(t2 >> 8);
ctx->u.sb[i * 8 + 6] = (uint8_t)(t2 >> 16);
ctx->u.sb[i * 8 + 7] = (uint8_t)(t2 >> 24);
}
}
SHA3_TRACE_BUF("Hash: (first 32 bytes)", ctx->u.sb, 256 / 8);
return (ctx->u.sb);
}
__device__ sha3_return_t sha3_HashBuffer(unsigned bitSize,
enum SHA3_FLAGS flags, const void *in,
unsigned inBytes, void *out,
unsigned outBytes) {
sha3_return_t err;
sha3_context c;
err = sha3_Init(&c, bitSize);
if (err != SHA3_RETURN_OK) return err;
if (sha3_SetFlags(&c, flags) != flags) {
return SHA3_RETURN_BAD_PARAMS;
}
sha3_Update(&c, in, inBytes);
const void *h = sha3_Finalize(&c);
if (outBytes > bitSize / 8) outBytes = bitSize / 8;
memcpy(out, h, outBytes);
return SHA3_RETURN_OK;
}