Skip to content

Commit

Permalink
Merge #891: [PoW][RandomX] Cherry picked fixes from offical randomx repo
Browse files Browse the repository at this point in the history
db1b07e Cherry picked commit 'replace hardcoded literal with its appropriate symbol' from tevador/RandomX@a7733de (codeofalltrades)
94ce75a Cherry picked commit 'Hide tempHash from the public API' from tevador/RandomX@3910d49 (codeofalltrades)
9896ed8 Cherry picked commit 'Combined hash and fill AES loop' from tevador/RandomX@219c02e (codeofalltrades)
f737eb6 Cherry picked commit 'Fix inconsistent class/struct usage' from tevador/RandomX@57545d1 (codeofalltrades)
d3d0e07 Cherry picked commit 'Fix symbol collisions with blake2b' from tevador/RandomX@7567cef (codeofalltrades)
9b68964 Cherry picked change from tevador/RandomX@9a5d504 (codeofalltrades)

Pull request description:

  ### Problem ###
  Keep randomx code current

  ### Solution ###
  Cherry picked fixes from offical randomx repo
  Fix alignment for ARMv8 code  - tevador/RandomX@9a5d504
  Fix inconsistent class/struct usage  - tevador/RandomX@57545d1
  Combined hash and fill AES loop - tevador/RandomX@219c02e
  Hide tempHash from the public API  - tevador/RandomX@3910d49
  Replace hardcoded literal with its appropriate symbol  - tevador/RandomX@a7733de
  Fix symbol collisions with blake2b - tevador/RandomX@7567cef

  ### Unit Testing Results ###
  Start the wallet
  Start mining randomx
  Verify you can spend mined coins

Tree-SHA512: 73eae8ce9e5b2efdd6bc3334641e6627ad30d9900d451dca35767b34563f3e12e8747863ea6779cc24adb33b62d3bec1cdf4d7e083a1f3da4c702d864de9bf4d
  • Loading branch information
codeofalltrades committed Jan 21, 2021
2 parents fc2233e + db1b07e commit fbd5fd4
Show file tree
Hide file tree
Showing 16 changed files with 154 additions and 10 deletions.
81 changes: 81 additions & 0 deletions src/crypto/randomx/aes_hash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,3 +239,84 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {

template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer);
template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);

template<bool softAes>
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;

// initial state
rx_vec_i128 hash_state0 = rx_set_int_vec_i128(AES_HASH_1R_STATE0);
rx_vec_i128 hash_state1 = rx_set_int_vec_i128(AES_HASH_1R_STATE1);
rx_vec_i128 hash_state2 = rx_set_int_vec_i128(AES_HASH_1R_STATE2);
rx_vec_i128 hash_state3 = rx_set_int_vec_i128(AES_HASH_1R_STATE3);

const rx_vec_i128 key0 = rx_set_int_vec_i128(AES_GEN_1R_KEY0);
const rx_vec_i128 key1 = rx_set_int_vec_i128(AES_GEN_1R_KEY1);
const rx_vec_i128 key2 = rx_set_int_vec_i128(AES_GEN_1R_KEY2);
const rx_vec_i128 key3 = rx_set_int_vec_i128(AES_GEN_1R_KEY3);

rx_vec_i128 fill_state0 = rx_load_vec_i128((rx_vec_i128*)fill_state + 0);
rx_vec_i128 fill_state1 = rx_load_vec_i128((rx_vec_i128*)fill_state + 1);
rx_vec_i128 fill_state2 = rx_load_vec_i128((rx_vec_i128*)fill_state + 2);
rx_vec_i128 fill_state3 = rx_load_vec_i128((rx_vec_i128*)fill_state + 3);

constexpr int PREFETCH_DISTANCE = 4096;
const char* prefetchPtr = ((const char*)scratchpad) + PREFETCH_DISTANCE;
scratchpadEnd -= PREFETCH_DISTANCE;

for (int i = 0; i < 2; ++i) {
//process 64 bytes at a time in 4 lanes
while (scratchpadPtr < scratchpadEnd) {
hash_state0 = aesenc<softAes>(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 0));
hash_state1 = aesdec<softAes>(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 1));
hash_state2 = aesenc<softAes>(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 2));
hash_state3 = aesdec<softAes>(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 3));

fill_state0 = aesdec<softAes>(fill_state0, key0);
fill_state1 = aesenc<softAes>(fill_state1, key1);
fill_state2 = aesdec<softAes>(fill_state2, key2);
fill_state3 = aesenc<softAes>(fill_state3, key3);

rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 0, fill_state0);
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 1, fill_state1);
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 2, fill_state2);
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 3, fill_state3);

rx_prefetch_t0(prefetchPtr);

scratchpadPtr += 64;
prefetchPtr += 64;
}
prefetchPtr = (const char*) scratchpad;
scratchpadEnd += PREFETCH_DISTANCE;
}

rx_store_vec_i128((rx_vec_i128*)fill_state + 0, fill_state0);
rx_store_vec_i128((rx_vec_i128*)fill_state + 1, fill_state1);
rx_store_vec_i128((rx_vec_i128*)fill_state + 2, fill_state2);
rx_store_vec_i128((rx_vec_i128*)fill_state + 3, fill_state3);

//two extra rounds to achieve full diffusion
rx_vec_i128 xkey0 = rx_set_int_vec_i128(AES_HASH_1R_XKEY0);
rx_vec_i128 xkey1 = rx_set_int_vec_i128(AES_HASH_1R_XKEY1);

hash_state0 = aesenc<softAes>(hash_state0, xkey0);
hash_state1 = aesdec<softAes>(hash_state1, xkey0);
hash_state2 = aesenc<softAes>(hash_state2, xkey0);
hash_state3 = aesdec<softAes>(hash_state3, xkey0);

hash_state0 = aesenc<softAes>(hash_state0, xkey1);
hash_state1 = aesdec<softAes>(hash_state1, xkey1);
hash_state2 = aesenc<softAes>(hash_state2, xkey1);
hash_state3 = aesdec<softAes>(hash_state3, xkey1);

//output hash
rx_store_vec_i128((rx_vec_i128*)hash + 0, hash_state0);
rx_store_vec_i128((rx_vec_i128*)hash + 1, hash_state1);
rx_store_vec_i128((rx_vec_i128*)hash + 2, hash_state2);
rx_store_vec_i128((rx_vec_i128*)hash + 3, hash_state3);
}

template void hashAndFillAes1Rx4<false>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
template void hashAndFillAes1Rx4<true>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
3 changes: 3 additions & 0 deletions src/crypto/randomx/aes_hash.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,6 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer);

template<bool softAes>
void fillAes4Rx4(void *state, size_t outputSize, void *buffer);

template<bool softAes>
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
2 changes: 1 addition & 1 deletion src/crypto/randomx/allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ namespace randomx {
rx_aligned_free(ptr);
}

template class AlignedAllocator<CacheLineSize>;
template struct AlignedAllocator<CacheLineSize>;

void* LargePageAllocator::allocMemory(size_t count) {
return allocLargePagesMemory(count);
Expand Down
4 changes: 2 additions & 2 deletions src/crypto/randomx/argon2_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -307,13 +307,13 @@ void rxa2_fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instanc

store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0);
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4, l);
rxa2_blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash,
blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash,
ARGON2_PREHASH_SEED_LENGTH);
load_block(&instance->memory[l * instance->lane_length + 0],
blockhash_bytes);

store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 1);
rxa2_blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash,
blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash,
ARGON2_PREHASH_SEED_LENGTH);
load_block(&instance->memory[l * instance->lane_length + 1],
blockhash_bytes);
Expand Down
9 changes: 9 additions & 0 deletions src/crypto/randomx/blake2/blake2.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,15 @@ extern "C" {
1 / !!(sizeof(blake2b_param) == sizeof(uint64_t) * CHAR_BIT)
};

//randomx namespace
#define blake2b_init randomx_blake2b_init
#define blake2b_init_key randomx_blake2b_init_key
#define blake2b_init_param randomx_blake2b_init_param
#define blake2b_update randomx_blake2b_update
#define blake2b_final randomx_blake2b_final
#define blake2b randomx_blake2b
#define blake2b_long randomx_blake2b_long

/* Streaming API */
int blake2b_init(blake2b_state *S, size_t outlen);
int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key,
Expand Down
2 changes: 1 addition & 1 deletion src/crypto/randomx/blake2/blake2b.c
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
}

/* Argon2 Team - Begin Code */
int rxa2_blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) {
int blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) {
uint8_t *out = (uint8_t *)pout;
blake2b_state blake_state;
uint8_t outlen_bytes[sizeof(uint32_t)] = { 0 };
Expand Down
7 changes: 7 additions & 0 deletions src/crypto/randomx/intrin_portable.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ typedef __m128d rx_vec_f128;
#define rx_aligned_alloc(a, b) _mm_malloc(a,b)
#define rx_aligned_free(a) _mm_free(a)
#define rx_prefetch_nta(x) _mm_prefetch((const char *)(x), _MM_HINT_NTA)
#define rx_prefetch_t0(x) _mm_prefetch((const char *)(x), _MM_HINT_T0)

#define rx_load_vec_f128 _mm_load_pd
#define rx_store_vec_f128 _mm_store_pd
Expand Down Expand Up @@ -201,6 +202,7 @@ typedef union{
#define rx_aligned_alloc(a, b) malloc(a)
#define rx_aligned_free(a) free(a)
#define rx_prefetch_nta(x)
#define rx_prefetch_t0(x)

/* Splat 64-bit long long to 2 64-bit long longs */
FORCE_INLINE __m128i vec_splat2sd (int64_t scalar)
Expand Down Expand Up @@ -399,6 +401,10 @@ inline void rx_prefetch_nta(void* ptr) {
asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr));
}

inline void rx_prefetch_t0(const void* ptr) {
asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr));
}

FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
return vld1q_f64((const float64_t*)pd);
}
Expand Down Expand Up @@ -532,6 +538,7 @@ typedef union {
#define rx_aligned_alloc(a, b) malloc(a)
#define rx_aligned_free(a) free(a)
#define rx_prefetch_nta(x)
#define rx_prefetch_t0(x)

FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
rx_vec_f128 x;
Expand Down
2 changes: 1 addition & 1 deletion src/crypto/randomx/jit_compiler_a64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace randomx {

class Program;
class ProgramConfiguration;
struct ProgramConfiguration;
class SuperscalarProgram;
class Instruction;

Expand Down
1 change: 1 addition & 0 deletions src/crypto/randomx/jit_compiler_a64_static.S
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@
# v30 -> E 'or' mask = 0x3*00000000******3*00000000******
# v31 -> scale mask = 0x81f000000000000081f0000000000000

.balign 4
randomx_program_aarch64:
# Save callee-saved registers
sub sp, sp, 192
Expand Down
2 changes: 1 addition & 1 deletion src/crypto/randomx/jit_compiler_fallback.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace randomx {

class Program;
class ProgramConfiguration;
struct ProgramConfiguration;
class SuperscalarProgram;

class JitCompilerFallback {
Expand Down
2 changes: 1 addition & 1 deletion src/crypto/randomx/jit_compiler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ namespace randomx {

void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) {
instructionOffsets.clear();
for (unsigned i = 0; i < 8; ++i) {
for (unsigned i = 0; i < RegistersCount; ++i) {
registerUsage[i] = -1;
}

Expand Down
2 changes: 1 addition & 1 deletion src/crypto/randomx/jit_compiler_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace randomx {

class Program;
class ProgramConfiguration;
struct ProgramConfiguration;
class SuperscalarProgram;
class JitCompilerX86;
class Instruction;
Expand Down
17 changes: 17 additions & 0 deletions src/crypto/randomx/randomx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -507,4 +507,21 @@ extern "C" {
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
}

void randomx_calculate_hash_first(randomx_vm* machine, const void* input, size_t inputSize) {
blake2b(machine->tempHash, sizeof(machine->tempHash), input, inputSize, nullptr, 0);
machine->initScratchpad(machine->tempHash);
}

void randomx_calculate_hash_next(randomx_vm* machine, const void* nextInput, size_t nextInputSize, void* output) {
machine->resetRoundingMode();
for (uint32_t chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) {
machine->run(machine->tempHash);
blake2b(machine->tempHash, sizeof(machine->tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
}
machine->run(machine->tempHash);

// Finish current hash and fill the scratchpad for the next hash at the same time
blake2b(machine->tempHash, sizeof(machine->tempHash), nextInput, nextInputSize, nullptr, 0);
machine->hashAndFill(output, RANDOMX_HASH_SIZE, machine->tempHash);
}
}
19 changes: 18 additions & 1 deletion src/crypto/randomx/randomx.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define RANDOMX_H

#include <stddef.h>

#include <stdint.h>
#define RANDOMX_HASH_SIZE 32
#define RANDOMX_DATASET_ITEM_SIZE 64

Expand Down Expand Up @@ -238,6 +238,23 @@ RANDOMX_EXPORT void randomx_destroy_vm(randomx_vm *machine);
*/
RANDOMX_EXPORT void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output);

/**
* Paired functions used to calculate multiple RandomX hashes more efficiently.
* randomx_calculate_hash_first is called for the first input value.
* randomx_calculate_hash_next will output the hash value of the previous input.
*
* @param machine is a pointer to a randomx_vm structure. Must not be NULL.
* @param tempHash an array of 8 64-bit values used to store intermediate data between calls to randomx_calculate_hash_first and randomx_calculate_hash_next.
* @param input is a pointer to memory to be hashed. Must not be NULL.
* @param inputSize is the number of bytes to be hashed.
* @param nextInput is a pointer to memory to be hashed for the next hash. Must not be NULL.
* @param nextInputSize is the number of bytes to be hashed for the next hash.
* @param output is a pointer to memory where the hash will be stored. Must not
* be NULL and at least RANDOMX_HASH_SIZE bytes must be available for writing.
*/
RANDOMX_EXPORT void randomx_calculate_hash_first(randomx_vm* machine, const void* input, size_t inputSize);
RANDOMX_EXPORT void randomx_calculate_hash_next(randomx_vm* machine, const void* nextInput, size_t nextInputSize, void* output);

#if defined(__cplusplus)
}
#endif
Expand Down
8 changes: 7 additions & 1 deletion src/crypto/randomx/virtual_machine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,13 @@ namespace randomx {
blake2b(out, outSize, &reg, sizeof(RegisterFile), nullptr, 0);
}

template<class Allocator, bool softAes>
template<class Allocator, bool softAes>
void VmBase<Allocator, softAes>::hashAndFill(void* out, size_t outSize, uint64_t *fill_state) {
hashAndFillAes1Rx4<softAes>((void*) getScratchpad(), ScratchpadSize, &reg.a, fill_state);
blake2b(out, outSize, &reg, sizeof(RegisterFile), nullptr, 0);
}

template<class Allocator, bool softAes>
void VmBase<Allocator, softAes>::initScratchpad(void* seed) {
fillAes1Rx4<softAes>(seed, ScratchpadSize, scratchpad);
}
Expand Down
3 changes: 3 additions & 0 deletions src/crypto/randomx/virtual_machine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class randomx_vm {
virtual ~randomx_vm() = 0;
virtual void allocate() = 0;
virtual void getFinalResult(void* out, size_t outSize) = 0;
virtual void hashAndFill(void* out, size_t outSize, uint64_t *fill_state) = 0;
virtual void setDataset(randomx_dataset* dataset) { }
virtual void setCache(randomx_cache* cache) { }
virtual void initScratchpad(void* seed) = 0;
Expand Down Expand Up @@ -67,6 +68,7 @@ class randomx_vm {
uint64_t datasetOffset;
public:
std::string cacheKey;
alignas(16) uint64_t tempHash[8]; //8 64-bit values used to store intermediate data
};

namespace randomx {
Expand All @@ -78,6 +80,7 @@ namespace randomx {
void allocate() override;
void initScratchpad(void* seed) override;
void getFinalResult(void* out, size_t outSize) override;
void hashAndFill(void* out, size_t outSize, uint64_t *fill_state) override;
protected:
void generateProgram(void* seed);
};
Expand Down

0 comments on commit fbd5fd4

Please sign in to comment.