Skip to content

Commit

Permalink
content and block checksum (#78)
Browse files Browse the repository at this point in the history
  • Loading branch information
MiloszKrajewski authored Nov 9, 2022
1 parent e5c1631 commit 432f3d6
Show file tree
Hide file tree
Showing 27 changed files with 368 additions and 183 deletions.
3 changes: 3 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
## 1.3.4-beta (2022/11/09)
* ADDED: block and content checksum support

## 1.3.3-beta (2022/11/06)
* FIXED: (hopefully) System.Runtime.CompilerServices.Unsafe versioning issues

Expand Down
9 changes: 6 additions & 3 deletions Common.targets
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
<Project>
<PropertyGroup>
<Version>1.3.3-beta</Version>
<AssemblyVersion>1.3.3</AssemblyVersion>
<FileVersion>1.3.3</FileVersion>
<Version>1.3.4-beta</Version>
<AssemblyVersion>1.3.4</AssemblyVersion>
<FileVersion>1.3.4</FileVersion>
</PropertyGroup>
<PropertyGroup>
<CheckEolTargetFramework>false</CheckEolTargetFramework>
</PropertyGroup>
<PropertyGroup>
<Product>K4os.Compression.LZ4</Product>
Expand Down
2 changes: 1 addition & 1 deletion paket.dependencies
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ storage none
framework auto-detect
source https://www.nuget.org/api/v2

nuget K4os.Hash.xxHash
nuget lz4net
nuget SharpZipLib
nuget BenchmarkDotNet
Expand All @@ -14,6 +13,7 @@ nuget Microsoft.NET.Test.Sdk
# paket is not great with conditional dependencies
# nuget is not great either, but has a little bit more support for conditional sections

# nuget K4os.Hash.xxHash
# nuget System.Memory framework: net462,netstandard2.0
# nuget System.Runtime.CompilerServices.Unsafe framework: net462,netstandard2.0,netstandard2.1
# nuget System.IO.Pipelines framework: net5.0,net6.0
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net5.0</TargetFramework>
<TargetFramework>net6.0</TargetFramework>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugSymbols>true</DebugSymbols>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="utf-8"?>
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net5.0</TargetFramework>
<TargetFramework>net6.0</TargetFramework>
<OutputType>Exe</OutputType>
</PropertyGroup>
<ItemGroup>
Expand Down
2 changes: 1 addition & 1 deletion src/K4os.Compression.LZ4.Roundtrip/paket.references
Original file line number Diff line number Diff line change
@@ -1 +1 @@
K4os.Hash.xxHash

60 changes: 60 additions & 0 deletions src/K4os.Compression.LZ4.Streams.Test/ChecksumTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
using System;
using K4os.Compression.LZ4.Streams.Test.Internal;
using TestHelpers;
using Xunit;

namespace K4os.Compression.LZ4.Streams.Test;

public class ChecksumTests
{
[Theory]
[InlineData(".corpus/mozilla", true, false)]
[InlineData(".corpus/mozilla", false, true)]
[InlineData(".corpus/mozilla", true, true)]
public void ChecksumIsProduced(string filename, bool block, bool content)
{
var source = Tools.FindFile(filename);

using var target = TempFile.Create();
using var decoded = TempFile.Create();

var options = new LZ4Settings
{ Chaining = true, BlockChecksum = block, ContentChecksum = content };
TestedLZ4.Encode(source, target.FileName, 1337, options);
ReferenceLZ4.Decode(target.FileName, decoded.FileName);
Tools.SameFiles(source, decoded.FileName);
}

[Theory]
[InlineData(".corpus/mozilla", "-B4 -BD -BX")]
[InlineData(".corpus/mozilla", "-B4 -BD -BX --no-frame-crc")]
[InlineData(".corpus/mozilla", "-B4 -BD --no-frame-crc")]
public void ChecksumIsVerified(string filename, string options)
{
var source = Tools.FindFile(filename);

using var target = TempFile.Create();
using var decoded = TempFile.Create();

ReferenceLZ4.Encode(options, source, target.FileName);
TestedLZ4.Decode(target.FileName, decoded.FileName, 1337);
Tools.SameFiles(source, decoded.FileName);
}

[Theory]
[InlineData(".corpus/mozilla", true, false)]
[InlineData(".corpus/mozilla", false, true)]
[InlineData(".corpus/mozilla", true, true)]
public void ChecksumIsVerifiedRoundtrip(string filename, bool block, bool content)
{
var source = Tools.FindFile(filename);

using var target = TempFile.Create();
using var decoded = TempFile.Create();

var options = new LZ4Settings { Chaining = true, BlockChecksum = block, ContentChecksum = content };
TestedLZ4.Encode(source, target.FileName, 1337, options);
TestedLZ4.Decode(target.FileName, decoded.FileName, 1337);
Tools.SameFiles(source, decoded.FileName);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ public static void Decode(string input, string output)
throw new InvalidOperationException("Cannot start LZ4.exe");

process.WaitForExit();
if (process.ExitCode != 0)
throw new InvalidOperationException("LZ4.exe reported an error");
}
}
}
10 changes: 8 additions & 2 deletions src/K4os.Compression.LZ4.Streams.Test/Internal/Settings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@ public class Settings
{
public static LZ4Settings ParseSettings(string options)
{
var result = new LZ4Settings { Chaining = false };
var result = new LZ4Settings {
Chaining = false,
ContentChecksum = true,
};

foreach (var option in options.Split(' '))
{
Expand All @@ -29,7 +32,10 @@ public static LZ4Settings ParseSettings(string options)
result.Chaining = true;
break;
case "-BX":
// ignored to be implemented
result.BlockChecksum = true;
break;
case "--no-frame-crc":
result.ContentChecksum = false;
break;
case "-B4":
result.BlockSize = Mem.K64;
Expand Down
5 changes: 4 additions & 1 deletion src/K4os.Compression.LZ4.Streams.Test/Internal/TestedLZ4.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ public class LZ4Settings
public int BlockSize { get; set; } = Mem.K64;
public int ExtraBlocks { get; set; } = 0;
public bool Chaining { get; set; } = true;
public bool BlockChecksum { get; set; } = false;
public bool ContentChecksum { get; set; } = false;
}

public class TestedLZ4
Expand All @@ -36,7 +38,8 @@ public static void Encode(
string original, string encoded, int chuckSize, LZ4Settings settings)
{
var frameInfo = new LZ4Descriptor(
null, false, settings.Chaining, false, null, settings.BlockSize);
null, settings.ContentChecksum, settings.Chaining, settings.BlockChecksum,
null, settings.BlockSize);
using var input = File.OpenRead(original);
using var output = File.Create(encoded);
using var encode = new LZ4EncoderStream(
Expand Down
12 changes: 9 additions & 3 deletions src/K4os.Compression.LZ4.Streams/Frames/LZ4FrameReader.async.cs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@ private async Task<bool> ReadHeader(Token token)
if (hasDictionary)
throw NotImplemented(
"Predefined dictionaries feature is not implemented"); // Peek4(dictionaryId);

if (contentChecksum)
InitializeContentChecksum();

// ReSharper disable once ExpressionIsAlwaysNull
_descriptor = new LZ4Descriptor(
Expand All @@ -113,7 +116,8 @@ private async Task<int> ReadBlock(Token token)
if (blockLength == 0)
{
if (_descriptor.ContentChecksum)
_ = await Peek4(token).Weave();
VerifyContentChecksum(await Peek4(token).Weave());

CloseFrame();
return 0;
}
Expand All @@ -124,9 +128,11 @@ private async Task<int> ReadBlock(Token token)
await ReadData(token, blockLength).Weave();

if (_descriptor.BlockChecksum)
_ = await Peek4(token).Weave();
VerifyBlockChecksum(await Peek4(token).Weave(), blockLength);

return InjectOrDecode(blockLength, uncompressed);
var read = InjectOrDecode(blockLength, uncompressed);
UpdateContentChecksum(read);
return read;
}

private async Task<long?> GetFrameLength(Token token)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,9 @@ public partial class LZ4FrameReader<TStreamReader, TStreamState>
if (hasDictionary)
throw NotImplemented(
"Predefined dictionaries feature is not implemented"); // Peek4(dictionaryId);

if (contentChecksum)
InitializeContentChecksum();

// ReSharper disable once ExpressionIsAlwaysNull
_descriptor = new LZ4Descriptor(
Expand All @@ -120,7 +123,8 @@ public partial class LZ4FrameReader<TStreamReader, TStreamState>
if (blockLength == 0)
{
if (_descriptor.ContentChecksum)
_ = /*await*/ Peek4(token);
VerifyContentChecksum(/*await*/ Peek4(token));

CloseFrame();
return 0;
}
Expand All @@ -131,9 +135,11 @@ public partial class LZ4FrameReader<TStreamReader, TStreamState>
/*await*/ ReadData(token, blockLength);

if (_descriptor.BlockChecksum)
_ = /*await*/ Peek4(token);
VerifyBlockChecksum(/*await*/ Peek4(token), blockLength);

return InjectOrDecode(blockLength, uncompressed);
var read = InjectOrDecode(blockLength, uncompressed);
UpdateContentChecksum(read);
return read;
}

private /*async*/ long? GetFrameLength(Token token)
Expand Down
49 changes: 37 additions & 12 deletions src/K4os.Compression.LZ4.Streams/Frames/LZ4FrameReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
using K4os.Compression.LZ4.Internal;
using K4os.Compression.LZ4.Streams.Abstractions;
using K4os.Compression.LZ4.Streams.Internal;
using K4os.Hash.xxHash;

namespace K4os.Compression.LZ4.Streams.Frames;

Expand All @@ -25,6 +26,9 @@ public partial class LZ4FrameReader<TStreamReader, TStreamState>:

private ILZ4Descriptor _descriptor;
private ILZ4Decoder _decoder;

private XXH32.State _contentChecksum;


private byte[] _buffer;
private int _decoded;
Expand Down Expand Up @@ -112,6 +116,27 @@ private bool Drain(Span<byte> buffer, ref int offset, ref int count, ref int rea
return false;
}

private void VerifyBlockChecksum(uint expected, int blockLength)
{
var actual = XXH32.DigestOf(_buffer, 0, blockLength);
if (actual != expected) throw InvalidChecksum("block");
}

private void InitializeContentChecksum() =>
XXH32.Reset(ref _contentChecksum);

private unsafe void UpdateContentChecksum(int read)
{
var span = new Span<byte>(_decoder.Peek(-read), read);
XXH32.Update(ref _contentChecksum, span);
}

private void VerifyContentChecksum(uint expected)
{
var actual = XXH32.Digest(in _contentChecksum);
if (expected != actual) throw InvalidChecksum("content");
}

/// <inheritdoc />
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool OpenFrame() =>
Expand Down Expand Up @@ -168,6 +193,9 @@ private static InvalidDataException MagicNumberExpected() =>

private static InvalidDataException UnknownFrameVersion(int version) =>
new($"LZ4 frame version {version} is not supported");

private static InvalidDataException InvalidChecksum(string type) =>
new($"Invalid {type} checksum");

/// <summary>
/// Disposes the decoder. Consecutive attempts to read will fail.
Expand All @@ -192,22 +220,19 @@ protected virtual void Dispose(bool disposing)
/// <summary>
/// Releases unmanaged resources.
/// </summary>
protected virtual void ReleaseResources() { }
protected virtual void ReleaseResources() { }

/// <summary>
/// Releases unmanaged resources.
/// </summary>
/// <returns>Task indicating operation is finished.</returns>
protected virtual Task ReleaseResourcesAsync() => Task.CompletedTask;

/// <inheritdoc />
public void Dispose()
{
Dispose(true);
}

public void Dispose() { Dispose(true); }

#if NETSTANDARD2_1_OR_GREATER || NET5_0_OR_GREATER

/// <inheritdoc />
public virtual async ValueTask DisposeAsync()
{
Expand All @@ -221,9 +246,9 @@ public virtual async ValueTask DisposeAsync()
await ReleaseResourcesAsync();
}
}

#endif

// ReSharper disable once UnusedParameter.Local
private int ReadMeta(EmptyToken _, int length, bool optional = false)
{
Expand All @@ -244,7 +269,7 @@ private async Task<int> ReadMeta(CancellationToken token, int length, bool optio
}

// ReSharper disable once UnusedParameter.Local
private void ReadData(EmptyToken _, int length) =>
private void ReadData(EmptyToken _, int length) =>
_reader.TryReadBlock(ref _stream, _buffer, 0, length, false);

private async Task ReadData(CancellationToken token, int length) =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
using ReadableBuffer = System.ReadOnlyMemory<byte>;
using Token = System.Threading.CancellationToken;
#endif
using System;
using K4os.Compression.LZ4.Streams.Internal;

namespace K4os.Compression.LZ4.Streams.Frames;
Expand All @@ -33,6 +32,9 @@ private async Task WriteManyBytes(Token token, ReadableBuffer buffer)
{
if (TryStashFrame())
await FlushMeta(token).Weave();

if (_descriptor.ContentChecksum)
UpdateContentChecksum(buffer.ToSpan());

var offset = 0;
var count = buffer.Length;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
using ReadableBuffer = System.ReadOnlyMemory<byte>;
using Token = System.Threading.CancellationToken;
#endif
using System;
using K4os.Compression.LZ4.Streams.Internal;

namespace K4os.Compression.LZ4.Streams.Frames;
Expand All @@ -40,6 +39,9 @@ private void WriteOneByte(Token token, byte value) =>
{
if (TryStashFrame())
/*await*/ FlushMeta(token);

if (_descriptor.ContentChecksum)
UpdateContentChecksum(buffer.ToSpan());

var offset = 0;
var count = buffer.Length;
Expand Down
Loading

0 comments on commit 432f3d6

Please sign in to comment.