From d864b10871cd4370fe574816b489c819c675ccc7 Mon Sep 17 00:00:00 2001 From: Udalov Max Date: Mon, 22 Apr 2019 20:19:08 +0300 Subject: [PATCH] blake2s: use math.bits rotate functions instead of ad-hoc implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes code more readable and idiomatic. Updates golang/go#31456 Benchstat: name old time/op new time/op delta Write64-8 211ns ± 6% 205ns ± 2% ~ (p=0.243 n=10+9) Write1K-8 3.26µs ± 3% 3.12µs ± 2% -4.44% (p=0.000 n=9+9) Sum64-8 227ns ± 5% 217ns ± 6% -4.58% (p=0.009 n=10+10) Sum1K-8 3.28µs ± 2% 3.31µs ± 4% ~ (p=0.412 n=10+9) name old speed new speed delta Write64-8 303MB/s ± 6% 312MB/s ± 1% ~ (p=0.203 n=10+8) Write1K-8 314MB/s ± 3% 329MB/s ± 2% +4.64% (p=0.000 n=9+9) Sum64-8 281MB/s ± 5% 295MB/s ± 5% +4.93% (p=0.009 n=10+10) Sum1K-8 313MB/s ± 2% 310MB/s ± 4% ~ (p=0.447 n=10+9) Change-Id: Iee0e88f4405d4da1feacddaf24835e86d8ddeff7 Reviewed-on: https://go-review.googlesource.com/c/crypto/+/173278 Reviewed-by: Brad Fitzpatrick Run-TryBot: Brad Fitzpatrick TryBot-Result: Gobot Gobot --- blake2s/blake2s_generic.go | 68 ++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 32 deletions(-) diff --git a/blake2s/blake2s_generic.go b/blake2s/blake2s_generic.go index f7e065378a..24a1ff22ad 100644 --- a/blake2s/blake2s_generic.go +++ b/blake2s/blake2s_generic.go @@ -4,6 +4,10 @@ package blake2s +import ( + "math/bits" +) + // the precomputed values for BLAKE2s // there are 10 16-byte arrays - one for each round // the entries are calculated from the sigma constants. @@ -47,118 +51,118 @@ func hashBlocksGeneric(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte) { v0 += m[s[0]] v0 += v4 v12 ^= v0 - v12 = v12<<(32-16) | v12>>16 + v12 = bits.RotateLeft32(v12, -16) v8 += v12 v4 ^= v8 - v4 = v4<<(32-12) | v4>>12 + v4 = bits.RotateLeft32(v4, -12) v1 += m[s[1]] v1 += v5 v13 ^= v1 - v13 = v13<<(32-16) | v13>>16 + v13 = bits.RotateLeft32(v13, -16) v9 += v13 v5 ^= v9 - v5 = v5<<(32-12) | v5>>12 + v5 = bits.RotateLeft32(v5, -12) v2 += m[s[2]] v2 += v6 v14 ^= v2 - v14 = v14<<(32-16) | v14>>16 + v14 = bits.RotateLeft32(v14, -16) v10 += v14 v6 ^= v10 - v6 = v6<<(32-12) | v6>>12 + v6 = bits.RotateLeft32(v6, -12) v3 += m[s[3]] v3 += v7 v15 ^= v3 - v15 = v15<<(32-16) | v15>>16 + v15 = bits.RotateLeft32(v15, -16) v11 += v15 v7 ^= v11 - v7 = v7<<(32-12) | v7>>12 + v7 = bits.RotateLeft32(v7, -12) v0 += m[s[4]] v0 += v4 v12 ^= v0 - v12 = v12<<(32-8) | v12>>8 + v12 = bits.RotateLeft32(v12, -8) v8 += v12 v4 ^= v8 - v4 = v4<<(32-7) | v4>>7 + v4 = bits.RotateLeft32(v4, -7) v1 += m[s[5]] v1 += v5 v13 ^= v1 - v13 = v13<<(32-8) | v13>>8 + v13 = bits.RotateLeft32(v13, -8) v9 += v13 v5 ^= v9 - v5 = v5<<(32-7) | v5>>7 + v5 = bits.RotateLeft32(v5, -7) v2 += m[s[6]] v2 += v6 v14 ^= v2 - v14 = v14<<(32-8) | v14>>8 + v14 = bits.RotateLeft32(v14, -8) v10 += v14 v6 ^= v10 - v6 = v6<<(32-7) | v6>>7 + v6 = bits.RotateLeft32(v6, -7) v3 += m[s[7]] v3 += v7 v15 ^= v3 - v15 = v15<<(32-8) | v15>>8 + v15 = bits.RotateLeft32(v15, -8) v11 += v15 v7 ^= v11 - v7 = v7<<(32-7) | v7>>7 + v7 = bits.RotateLeft32(v7, -7) v0 += m[s[8]] v0 += v5 v15 ^= v0 - v15 = v15<<(32-16) | v15>>16 + v15 = bits.RotateLeft32(v15, -16) v10 += v15 v5 ^= v10 - v5 = v5<<(32-12) | v5>>12 + v5 = bits.RotateLeft32(v5, -12) v1 += m[s[9]] v1 += v6 v12 ^= v1 - v12 = v12<<(32-16) | v12>>16 + v12 = bits.RotateLeft32(v12, -16) v11 += v12 v6 ^= v11 - v6 = v6<<(32-12) | v6>>12 + v6 = bits.RotateLeft32(v6, -12) v2 += m[s[10]] v2 += v7 v13 ^= v2 - v13 = v13<<(32-16) | v13>>16 + v13 = bits.RotateLeft32(v13, -16) v8 += v13 v7 ^= v8 - v7 = v7<<(32-12) | v7>>12 + v7 = bits.RotateLeft32(v7, -12) v3 += m[s[11]] v3 += v4 v14 ^= v3 - v14 = v14<<(32-16) | v14>>16 + v14 = bits.RotateLeft32(v14, -16) v9 += v14 v4 ^= v9 - v4 = v4<<(32-12) | v4>>12 + v4 = bits.RotateLeft32(v4, -12) v0 += m[s[12]] v0 += v5 v15 ^= v0 - v15 = v15<<(32-8) | v15>>8 + v15 = bits.RotateLeft32(v15, -8) v10 += v15 v5 ^= v10 - v5 = v5<<(32-7) | v5>>7 + v5 = bits.RotateLeft32(v5, -7) v1 += m[s[13]] v1 += v6 v12 ^= v1 - v12 = v12<<(32-8) | v12>>8 + v12 = bits.RotateLeft32(v12, -8) v11 += v12 v6 ^= v11 - v6 = v6<<(32-7) | v6>>7 + v6 = bits.RotateLeft32(v6, -7) v2 += m[s[14]] v2 += v7 v13 ^= v2 - v13 = v13<<(32-8) | v13>>8 + v13 = bits.RotateLeft32(v13, -8) v8 += v13 v7 ^= v8 - v7 = v7<<(32-7) | v7>>7 + v7 = bits.RotateLeft32(v7, -7) v3 += m[s[15]] v3 += v4 v14 ^= v3 - v14 = v14<<(32-8) | v14>>8 + v14 = bits.RotateLeft32(v14, -8) v9 += v14 v4 ^= v9 - v4 = v4<<(32-7) | v4>>7 + v4 = bits.RotateLeft32(v4, -7) } h[0] ^= v0 ^ v8