@@ -35,31 +35,34 @@ namespace Org.BouncyCastle.Crypto.Digests
35
35
36
36
internal static class Blake2b_X86
37
37
{
38
- public static bool IsSupported => Avx2 . IsSupported && BitConverter . IsLittleEndian ;
38
+ internal static bool IsSupported =>
39
+ Org . BouncyCastle . Runtime . Intrinsics . X86 . Avx2 . IsEnabled &&
40
+ Org . BouncyCastle . Runtime . Intrinsics . Vector . IsPackedLittleEndian ;
39
41
40
42
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
41
- public static void Compress ( bool isFinal , Span < ulong > hashBuffer , ReadOnlySpan < byte > message , ulong totalSegmentsLow , ulong totalSegmentsHigh , ReadOnlySpan < ulong > blakeIV )
43
+ internal static void Compress ( Span < ulong > hashBuffer , ReadOnlySpan < ulong > blakeIV , ulong t0 , ulong t1 , ulong f0 ,
44
+ ReadOnlySpan < byte > message )
42
45
{
43
46
if ( ! IsSupported )
44
47
throw new PlatformNotSupportedException ( nameof ( Blake2b_X86 ) ) ;
45
48
46
- Debug . Assert ( message . Length >= Unsafe . SizeOf < ulong > ( ) * 8 ) ;
47
49
Debug . Assert ( hashBuffer . Length >= 8 ) ;
50
+ Debug . Assert ( blakeIV . Length >= 8 ) ;
51
+ Debug . Assert ( message . Length >= 128 ) ;
48
52
49
53
var hashBytes = MemoryMarshal . AsBytes ( hashBuffer ) ;
50
54
var ivBytes = MemoryMarshal . AsBytes ( blakeIV ) ;
51
55
52
- var r_14 = isFinal ? ulong . MaxValue : 0 ;
53
- var t_0 = Vector256 . Create ( totalSegmentsLow , totalSegmentsHigh , r_14 , 0 ) ;
56
+ var t_0 = Vector256 . Create ( t0 , t1 , f0 , 0 ) ;
54
57
55
- Vector256 < ulong > row1 = LoadVector256 < ulong > ( hashBytes ) ;
56
- Vector256 < ulong > row2 = LoadVector256 < ulong > ( hashBytes [ Vector256 < byte > . Count ..] ) ;
57
- Vector256 < ulong > row3 = LoadVector256 < ulong > ( ivBytes ) ;
58
- Vector256 < ulong > row4 = LoadVector256 < ulong > ( ivBytes [ Vector256 < byte > . Count ..] ) ;
58
+ var row1 = MemoryMarshal . Read < Vector256 < ulong > > ( hashBytes ) ;
59
+ var row2 = MemoryMarshal . Read < Vector256 < ulong > > ( hashBytes [ 32 ..] ) ;
60
+ var row3 = MemoryMarshal . Read < Vector256 < ulong > > ( ivBytes ) ;
61
+ var row4 = MemoryMarshal . Read < Vector256 < ulong > > ( ivBytes [ 32 ..] ) ;
59
62
row4 = Avx2 . Xor ( row4 , t_0 ) ;
60
63
61
- Vector256 < ulong > orig_1 = row1 ;
62
- Vector256 < ulong > orig_2 = row2 ;
64
+ var orig_1 = row1 ;
65
+ var orig_2 = row2 ;
63
66
64
67
Perform12Rounds ( message , ref row1 , ref row2 , ref row3 , ref row4 ) ;
65
68
@@ -68,21 +71,19 @@ public static void Compress(bool isFinal, Span<ulong> hashBuffer, ReadOnlySpan<b
68
71
row1 = Avx2 . Xor ( row1 , orig_1 ) ;
69
72
row2 = Avx2 . Xor ( row2 , orig_2 ) ;
70
73
71
- Store ( row1 , hashBytes ) ;
72
- Store ( row2 , hashBytes [ Vector256 < byte > . Count .. ] ) ;
74
+ MemoryMarshal . Write ( hashBytes , ref row1 ) ;
75
+ MemoryMarshal . Write ( hashBytes [ 32 .. ] , ref row2 ) ;
73
76
}
74
77
75
78
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
76
79
private static void Perform12Rounds ( ReadOnlySpan < byte > m , ref Vector256 < ulong > row1 , ref Vector256 < ulong > row2 , ref Vector256 < ulong > row3 , ref Vector256 < ulong > row4 )
77
80
{
78
- Debug . Assert ( m . Length >= 128 ) ;
79
-
80
- #region Rounds
81
+ #region Rounds
81
82
//ROUND 1
82
- var m0 = BroadcastVector128ToVector256 < ulong > ( m ) ;
83
- var m1 = BroadcastVector128ToVector256 < ulong > ( m [ Unsafe . SizeOf < Vector128 < ulong > > ( ) ..] ) ;
84
- var m2 = BroadcastVector128ToVector256 < ulong > ( m [ ( Unsafe . SizeOf < Vector128 < ulong > > ( ) * 2 ) ..] ) ;
85
- var m3 = BroadcastVector128ToVector256 < ulong > ( m [ ( Unsafe . SizeOf < Vector128 < ulong > > ( ) * 3 ) ..] ) ;
83
+ var m0 = Broadcast128ToVector256 < ulong > ( m ) ;
84
+ var m1 = Broadcast128ToVector256 < ulong > ( m [ 16 ..] ) ;
85
+ var m2 = Broadcast128ToVector256 < ulong > ( m [ 32 ..] ) ;
86
+ var m3 = Broadcast128ToVector256 < ulong > ( m [ 48 ..] ) ;
86
87
87
88
var t0 = Avx2 . UnpackLow ( m0 , m1 ) ;
88
89
var t1 = Avx2 . UnpackLow ( m2 , m3 ) ;
@@ -92,10 +93,10 @@ private static void Perform12Rounds(ReadOnlySpan<byte> m, ref Vector256<ulong> r
92
93
t1 = Avx2 . UnpackHigh ( m2 , m3 ) ;
93
94
var b2 = Avx2 . Blend ( t0 . AsUInt32 ( ) , t1 . AsUInt32 ( ) , 0b_1111_0000 ) . AsUInt64 ( ) ;
94
95
95
- var m4 = BroadcastVector128ToVector256 < ulong > ( m [ ( Unsafe . SizeOf < Vector128 < ulong > > ( ) * 4 ) ..] ) ;
96
- var m5 = BroadcastVector128ToVector256 < ulong > ( m [ ( Unsafe . SizeOf < Vector128 < ulong > > ( ) * 5 ) ..] ) ;
97
- var m6 = BroadcastVector128ToVector256 < ulong > ( m [ ( Unsafe . SizeOf < Vector128 < ulong > > ( ) * 6 ) ..] ) ;
98
- var m7 = BroadcastVector128ToVector256 < ulong > ( m [ ( Unsafe . SizeOf < Vector128 < ulong > > ( ) * 7 ) ..] ) ;
96
+ var m4 = Broadcast128ToVector256 < ulong > ( m [ 64 ..] ) ;
97
+ var m5 = Broadcast128ToVector256 < ulong > ( m [ 80 ..] ) ;
98
+ var m6 = Broadcast128ToVector256 < ulong > ( m [ 96 ..] ) ;
99
+ var m7 = Broadcast128ToVector256 < ulong > ( m [ 112 ..] ) ;
99
100
100
101
t0 = Avx2 . UnpackLow ( m7 , m4 ) ;
101
102
t1 = Avx2 . UnpackLow ( m5 , m6 ) ;
@@ -315,14 +316,18 @@ private static void Perform12Rounds(ReadOnlySpan<byte> m, ref Vector256<ulong> r
315
316
b4 = Avx2 . Blend ( t0 . AsUInt32 ( ) , t1 . AsUInt32 ( ) , 0b_1111_0000 ) . AsUInt64 ( ) ;
316
317
317
318
Round ( ref row1 , ref row2 , ref row3 , ref row4 , b1 , b2 , b3 , b4 ) ;
318
- #endregion
319
+ #endregion
319
320
}
320
321
321
322
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
322
- private static void Round ( ref Vector256 < ulong > row1 , ref Vector256 < ulong > row2 , ref Vector256 < ulong > row3 , ref Vector256 < ulong > row4 , Vector256 < ulong > b1 , Vector256 < ulong > b2 , Vector256 < ulong > b3 , Vector256 < ulong > b4 )
323
+ private static void Round ( ref Vector256 < ulong > row1 , ref Vector256 < ulong > row2 , ref Vector256 < ulong > row3 ,
324
+ ref Vector256 < ulong > row4 , Vector256 < ulong > b1 , Vector256 < ulong > b2 , Vector256 < ulong > b3 ,
325
+ Vector256 < ulong > b4 )
323
326
{
324
- Vector256 < byte > r24 = Vector256 . Create ( ( byte ) 3 , 4 , 5 , 6 , 7 , 0 , 1 , 2 , 11 , 12 , 13 , 14 , 15 , 8 , 9 , 10 , 3 , 4 , 5 , 6 , 7 , 0 , 1 , 2 , 11 , 12 , 13 , 14 , 15 , 8 , 9 , 10 ) ;
325
- Vector256 < byte > r16 = Vector256 . Create ( ( byte ) 2 , 3 , 4 , 5 , 6 , 7 , 0 , 1 , 10 , 11 , 12 , 13 , 14 , 15 , 8 , 9 , 2 , 3 , 4 , 5 , 6 , 7 , 0 , 1 , 10 , 11 , 12 , 13 , 14 , 15 , 8 , 9 ) ;
327
+ Vector256 < byte > r24 = Vector256 . Create (
328
+ ( byte ) 3 , 4 , 5 , 6 , 7 , 0 , 1 , 2 , 11 , 12 , 13 , 14 , 15 , 8 , 9 , 10 , 3 , 4 , 5 , 6 , 7 , 0 , 1 , 2 , 11 , 12 , 13 , 14 , 15 , 8 , 9 , 10 ) ;
329
+ Vector256 < byte > r16 = Vector256 . Create (
330
+ ( byte ) 2 , 3 , 4 , 5 , 6 , 7 , 0 , 1 , 10 , 11 , 12 , 13 , 14 , 15 , 8 , 9 , 2 , 3 , 4 , 5 , 6 , 7 , 0 , 1 , 10 , 11 , 12 , 13 , 14 , 15 , 8 , 9 ) ;
326
331
327
332
G1 ( r24 , ref row1 , ref row2 , ref row3 , ref row4 , b1 ) ;
328
333
G2 ( r16 , ref row1 , ref row2 , ref row3 , ref row4 , b2 ) ;
@@ -352,7 +357,8 @@ private static void Diagonalize(ref Vector256<ulong> row1, ref Vector256<ulong>
352
357
}
353
358
354
359
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
355
- private static void G1 ( Vector256 < byte > r24 , ref Vector256 < ulong > row1 , ref Vector256 < ulong > row2 , ref Vector256 < ulong > row3 , ref Vector256 < ulong > row4 , Vector256 < ulong > b0 )
360
+ private static void G1 ( Vector256 < byte > r24 , ref Vector256 < ulong > row1 , ref Vector256 < ulong > row2 ,
361
+ ref Vector256 < ulong > row3 , ref Vector256 < ulong > row4 , Vector256 < ulong > b0 )
356
362
{
357
363
row1 = Avx2 . Add ( Avx2 . Add ( row1 , b0 ) , row2 ) ;
358
364
row4 = Avx2 . Xor ( row4 , row1 ) ;
@@ -364,7 +370,8 @@ private static void G1(Vector256<byte> r24, ref Vector256<ulong> row1, ref Vecto
364
370
}
365
371
366
372
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
367
- private static void G2 ( Vector256 < byte > r16 , ref Vector256 < ulong > row1 , ref Vector256 < ulong > row2 , ref Vector256 < ulong > row3 , ref Vector256 < ulong > row4 , Vector256 < ulong > b0 )
373
+ private static void G2 ( Vector256 < byte > r16 , ref Vector256 < ulong > row1 , ref Vector256 < ulong > row2 ,
374
+ ref Vector256 < ulong > row3 , ref Vector256 < ulong > row4 , Vector256 < ulong > b0 )
368
375
{
369
376
row1 = Avx2 . Add ( Avx2 . Add ( row1 , b0 ) , row2 ) ;
370
377
row4 = Avx2 . Xor ( row4 , row1 ) ;
@@ -376,7 +383,8 @@ private static void G2(Vector256<byte> r16, ref Vector256<ulong> row1, ref Vecto
376
383
}
377
384
378
385
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
379
- private static void Undiagonalize ( ref Vector256 < ulong > row1 , ref Vector256 < ulong > row3 , ref Vector256 < ulong > row4 )
386
+ private static void Undiagonalize ( ref Vector256 < ulong > row1 , ref Vector256 < ulong > row3 ,
387
+ ref Vector256 < ulong > row4 )
380
388
{
381
389
// +-------------------+ +-------------------+
382
390
// | 3 | 0 | 1 | 2 | | 0 | 1 | 2 | 3 |
@@ -392,28 +400,12 @@ private static void Undiagonalize(ref Vector256<ulong> row1, ref Vector256<ulong
392
400
}
393
401
394
402
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
395
- private static Vector256 < T > BroadcastVector128ToVector256 < T > ( ReadOnlySpan < byte > source ) where T : struct
403
+ private static Vector256 < T > Broadcast128ToVector256 < T > ( ReadOnlySpan < byte > source ) where T : struct
396
404
{
397
- Debug . Assert ( source . Length >= Unsafe . SizeOf < Vector128 < byte > > ( ) ) ;
398
-
399
405
var vector = MemoryMarshal . Read < Vector128 < T > > ( source ) ;
400
406
Vector256 < T > result = vector . ToVector256Unsafe ( ) ;
401
407
return result . WithUpper ( vector ) ;
402
408
}
403
-
404
- [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
405
- private static Vector256 < T > LoadVector256 < T > ( ReadOnlySpan < byte > source ) where T : struct
406
- {
407
- Debug . Assert ( source . Length >= Unsafe . SizeOf < Vector256 < byte > > ( ) ) ;
408
- return MemoryMarshal . Read < Vector256 < T > > ( source ) ;
409
- }
410
-
411
- [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
412
- private static void Store < T > ( Vector256 < T > vector , Span < byte > destination ) where T : struct
413
- {
414
- Debug . Assert ( destination . Length >= Unsafe . SizeOf < Vector256 < byte > > ( ) ) ;
415
- MemoryMarshal . Write ( destination , ref vector ) ;
416
- }
417
409
}
418
410
}
419
411
#endif
0 commit comments