Skip to content

Commit

Permalink
Implementation of streaming compression
Browse files Browse the repository at this point in the history
  • Loading branch information
AlgorithmsAreCool authored and dscheg committed Sep 18, 2020
1 parent f9b360b commit 963af94
Show file tree
Hide file tree
Showing 12 changed files with 524 additions and 17 deletions.
1 change: 1 addition & 0 deletions ZstdNet.Benchmarks/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
BenchmarkDotNet.Artifacts
30 changes: 30 additions & 0 deletions ZstdNet.Benchmarks/CompressionBenchmarks.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
using System;
using BenchmarkDotNet.Attributes;

namespace ZstdNet.Benchmarks
{
[MemoryDiagnoser]
public class CompressionOverheadBenchmarks
{
private const int TestSize = 1024;

private readonly byte[] UncompressedData = new byte[TestSize];
private readonly byte[] CompressedData;

private readonly byte[] Buffer = new byte[Compressor.GetCompressBound(TestSize)];

private readonly Compressor Compressor = new Compressor(new CompressionOptions(1));
private readonly Decompressor Decompressor = new Decompressor();

public CompressionOverheadBenchmarks()
{
var r = new Random(0);
r.NextBytes(UncompressedData);

CompressedData = Compressor.Wrap(UncompressedData);
}

[Benchmark] public void Compress1KBRandom() => Compressor.Wrap(UncompressedData, Buffer, 0);
[Benchmark] public void Decompress1KBRandom() => Decompressor.Unwrap(CompressedData, Buffer, 0);
}
}
12 changes: 12 additions & 0 deletions ZstdNet.Benchmarks/Main.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
using BenchmarkDotNet.Running;

namespace ZstdNet.Benchmarks
{
class Program
{
static void Main()
{
BenchmarkRunner.Run<CompressionOverheadBenchmarks>();
}
}
}
16 changes: 16 additions & 0 deletions ZstdNet.Benchmarks/ZstdNet.Benchmarks.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>netcoreapp2.0</TargetFramework>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="BenchmarkDotNet" Version="0.10.11" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\ZstdNet\ZstdNet.csproj" />
</ItemGroup>

</Project>
138 changes: 138 additions & 0 deletions ZstdNet.Tests/SteamingCompressionTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
using System;
using System.IO;
using NUnit.Framework;

namespace ZstdNet.Tests
{
public enum DataFill
{
Random,
Sequential
}

internal static class DataGenerator
{
private static readonly Random Random = new Random(1234);

public const int LargeBufferSize = 1 * 1024 * 1024;
public const int SmallBufferSize = 1 * 1024;

public static MemoryStream GetSmallStream(DataFill dataFill) => new MemoryStream(GetBuffer(SmallBufferSize, dataFill));
public static MemoryStream GetLargeStream(DataFill dataFill) => new MemoryStream(GetBuffer(LargeBufferSize, dataFill));
public static MemoryStream GetStream(int length, DataFill dataFill) => new MemoryStream(GetBuffer(length, dataFill));

public static byte[] GetSmallBuffer(DataFill dataFill) => GetBuffer(SmallBufferSize, dataFill);
public static byte[] GetLargeBuffer(DataFill dataFill) => GetBuffer(LargeBufferSize, dataFill);

public static byte[] GetBuffer(int length, DataFill dataFill)
{
var buffer = new byte[length];
if(dataFill == DataFill.Random)
Random.NextBytes(buffer);
else
{
for(int i = 0; i < buffer.Length; i++)
buffer[i] = (byte)(i % 256);
}

return buffer;
}
}

[TestFixture]
public class SteamingTests
{
[Test]
public void CompressionImprovesWithDictionary()
{
var trainingData = new byte[100][];
for(int i = 0; i < trainingData.Length; i++)
trainingData[i] = DataGenerator.GetSmallBuffer(DataFill.Random);

var dict = DictBuilder.TrainFromBuffer(trainingData);
var compressionOptions = new CompressionOptions(dict);

var testStream = DataGenerator.GetSmallStream(DataFill.Random);

var normalResultStream = new MemoryStream();
using(var compressionStream = new CompressorStream(normalResultStream))
testStream.CopyTo(compressionStream);

var dictResultStream = new MemoryStream();
using(var compressionStream = new CompressorStream(dictResultStream, compressionOptions))
testStream.CopyTo(compressionStream);

Assert.Greater(normalResultStream.Length, dictResultStream.Length);
}

[Test]
public void CompressionShrinksData()
{
var inStream = DataGenerator.GetLargeStream(DataFill.Sequential);

var outStream = new MemoryStream();
using(var compressionStream = new CompressorStream(outStream))
inStream.CopyTo(compressionStream);

Assert.Greater(inStream.Length, outStream.Length);
}

[Test]
public void RoundTrip_BatchToStreaming()
{
var testBuffer = DataGenerator.GetLargeBuffer(DataFill.Sequential);

byte[] compressedBuffer;
using(var compressor = new Compressor())
compressedBuffer = compressor.Wrap(testBuffer);

var resultStream = new MemoryStream();
using(var decompressionStream = new DecompressorStream(new MemoryStream(compressedBuffer)))
decompressionStream.CopyTo(resultStream);

Validate(testBuffer, resultStream.ToArray());
}

[Test]
public void RoundTrip_StreamingToBatch()
{
var testStream = DataGenerator.GetLargeStream(DataFill.Sequential);

var tempStream = new MemoryStream();
using(var compressorStream = new CompressorStream(testStream))
tempStream.CopyTo(compressorStream);

byte[] resultBuffer;
using(var decompressor = new Decompressor())
resultBuffer = decompressor.Unwrap(tempStream.ToArray());

Validate(tempStream.ToArray(), resultBuffer);
}

[Test]
public void RoundTrip_StreamingStreaming()
{
var testStream = DataGenerator.GetLargeStream(DataFill.Sequential);

var tempStream = new MemoryStream();
using(var compressionStream = new CompressorStream(tempStream))
testStream.CopyTo(compressionStream);

tempStream.Position = 0;

var resultStream = new MemoryStream();
using(var decompressionStream = new DecompressorStream(tempStream))
decompressionStream.CopyTo(resultStream);

Validate(testStream.ToArray(), resultStream.ToArray());
}

private static void Validate(byte[] expected, byte[] actual)
{
Assert.AreEqual(expected.Length, actual.Length, "Decompressed Stream length is different than input stream");

for(int i = 0; i < expected.Length; i++)
Assert.AreEqual(expected[i], actual[i], $"Decompressed byte index {i} is different than input stream");
}
}
}
34 changes: 24 additions & 10 deletions ZstdNet.sln
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 15
VisualStudioVersion = 15.0.26730.3
VisualStudioVersion = 15.0.27130.2010
MinimumVisualStudioVersion = 10.0.40219.1
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ZstdNet", "ZstdNet\ZstdNet.csproj", "{8ADBEB19-A508-471D-87A0-7443EE086E9B}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ZstdNet.Tests", "ZstdNet.Tests\ZstdNet.Tests.csproj", "{8DD3694E-9532-4659-AA9F-BB01CBB9341B}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ZstdNet.Tests", "ZstdNet.Tests\ZstdNet.Tests.csproj", "{8DD3694E-9532-4659-AA9F-BB01CBB9341B}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ZstdNet.Benchmarks", "ZstdNet.Benchmarks\ZstdNet.Benchmarks.csproj", "{6ACD682F-6E1B-4C7E-B60E-66302AD9E2E3}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Expand All @@ -31,16 +33,28 @@ Global
{8ADBEB19-A508-471D-87A0-7443EE086E9B}.Release|x86.Build.0 = Release|Any CPU
{8DD3694E-9532-4659-AA9F-BB01CBB9341B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{8DD3694E-9532-4659-AA9F-BB01CBB9341B}.Debug|Any CPU.Build.0 = Debug|Any CPU
{8DD3694E-9532-4659-AA9F-BB01CBB9341B}.Debug|x64.ActiveCfg = Debug|x64
{8DD3694E-9532-4659-AA9F-BB01CBB9341B}.Debug|x64.Build.0 = Debug|x64
{8DD3694E-9532-4659-AA9F-BB01CBB9341B}.Debug|x86.ActiveCfg = Debug|x86
{8DD3694E-9532-4659-AA9F-BB01CBB9341B}.Debug|x86.Build.0 = Debug|x86
{8DD3694E-9532-4659-AA9F-BB01CBB9341B}.Debug|x64.ActiveCfg = Debug|Any CPU
{8DD3694E-9532-4659-AA9F-BB01CBB9341B}.Debug|x64.Build.0 = Debug|Any CPU
{8DD3694E-9532-4659-AA9F-BB01CBB9341B}.Debug|x86.ActiveCfg = Debug|Any CPU
{8DD3694E-9532-4659-AA9F-BB01CBB9341B}.Debug|x86.Build.0 = Debug|Any CPU
{8DD3694E-9532-4659-AA9F-BB01CBB9341B}.Release|Any CPU.ActiveCfg = Release|Any CPU
{8DD3694E-9532-4659-AA9F-BB01CBB9341B}.Release|Any CPU.Build.0 = Release|Any CPU
{8DD3694E-9532-4659-AA9F-BB01CBB9341B}.Release|x64.ActiveCfg = Release|x64
{8DD3694E-9532-4659-AA9F-BB01CBB9341B}.Release|x64.Build.0 = Release|x64
{8DD3694E-9532-4659-AA9F-BB01CBB9341B}.Release|x86.ActiveCfg = Release|x86
{8DD3694E-9532-4659-AA9F-BB01CBB9341B}.Release|x86.Build.0 = Release|x86
{8DD3694E-9532-4659-AA9F-BB01CBB9341B}.Release|x64.ActiveCfg = Release|Any CPU
{8DD3694E-9532-4659-AA9F-BB01CBB9341B}.Release|x64.Build.0 = Release|Any CPU
{8DD3694E-9532-4659-AA9F-BB01CBB9341B}.Release|x86.ActiveCfg = Release|Any CPU
{8DD3694E-9532-4659-AA9F-BB01CBB9341B}.Release|x86.Build.0 = Release|Any CPU
{6ACD682F-6E1B-4C7E-B60E-66302AD9E2E3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{6ACD682F-6E1B-4C7E-B60E-66302AD9E2E3}.Debug|Any CPU.Build.0 = Debug|Any CPU
{6ACD682F-6E1B-4C7E-B60E-66302AD9E2E3}.Debug|x64.ActiveCfg = Debug|Any CPU
{6ACD682F-6E1B-4C7E-B60E-66302AD9E2E3}.Debug|x64.Build.0 = Debug|Any CPU
{6ACD682F-6E1B-4C7E-B60E-66302AD9E2E3}.Debug|x86.ActiveCfg = Debug|Any CPU
{6ACD682F-6E1B-4C7E-B60E-66302AD9E2E3}.Debug|x86.Build.0 = Debug|Any CPU
{6ACD682F-6E1B-4C7E-B60E-66302AD9E2E3}.Release|Any CPU.ActiveCfg = Release|Any CPU
{6ACD682F-6E1B-4C7E-B60E-66302AD9E2E3}.Release|Any CPU.Build.0 = Release|Any CPU
{6ACD682F-6E1B-4C7E-B60E-66302AD9E2E3}.Release|x64.ActiveCfg = Release|Any CPU
{6ACD682F-6E1B-4C7E-B60E-66302AD9E2E3}.Release|x64.Build.0 = Release|Any CPU
{6ACD682F-6E1B-4C7E-B60E-66302AD9E2E3}.Release|x86.ActiveCfg = Release|Any CPU
{6ACD682F-6E1B-4C7E-B60E-66302AD9E2E3}.Release|x86.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down
20 changes: 15 additions & 5 deletions ZstdNet/ArraySegmentPtr.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,26 @@ namespace ZstdNet
internal struct ArraySegmentPtr : IDisposable
{
public ArraySegmentPtr(ArraySegment<byte> segment)
: this(segment.Array, segment.Offset, segment.Count)
{}

public ArraySegmentPtr(byte[] array)
: this(array, 0, array.Length)
{}

public ArraySegmentPtr(byte[] buffer, int offset, int count)
{
handle = GCHandle.Alloc(segment.Array, GCHandleType.Pinned);
arr = segment.Array;
offset = segment.Offset;
handle = GCHandle.Alloc(buffer, GCHandleType.Pinned);
this.offset = offset;
Length = count;
}

public byte[] Array => handle.Target as byte[];
public readonly int Length;

public static implicit operator IntPtr(ArraySegmentPtr pinner)
{
return Marshal.UnsafeAddrOfPinnedArrayElement(pinner.arr, pinner.offset);
return Marshal.UnsafeAddrOfPinnedArrayElement(pinner.Array, pinner.offset);
}

public void Dispose()
Expand All @@ -23,7 +34,6 @@ public void Dispose()
}

private GCHandle handle;
private readonly byte[] arr;
private readonly int offset;
}
}
2 changes: 2 additions & 0 deletions ZstdNet/CompressionOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ public static int MaxCompressionLevel

public const int DefaultCompressionLevel = 3; // Used by zstd utility by default

internal static CompressionOptions DefaultCompressionOptions { get; } = new CompressionOptions(DefaultCompressionLevel);

public readonly int CompressionLevel;
public readonly byte[] Dictionary;

Expand Down
4 changes: 2 additions & 2 deletions ZstdNet/Compressor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ namespace ZstdNet
public class Compressor : IDisposable
{
public Compressor()
: this(new CompressionOptions(CompressionOptions.DefaultCompressionLevel))
: this(CompressionOptions.DefaultCompressionOptions)
{ }

public Compressor(CompressionOptions options)
Expand Down Expand Up @@ -82,7 +82,7 @@ public int Wrap(ArraySegment<byte> src, byte[] dst, int offset)
var dstCapacity = dst.Length - offset;
size_t dstSize;
using(var srcPtr = new ArraySegmentPtr(src))
using(var dstPtr = new ArraySegmentPtr(new ArraySegment<byte>(dst, offset, dstCapacity)))
using(var dstPtr = new ArraySegmentPtr(dst, offset, dstCapacity))
{
if(Options.Cdict == IntPtr.Zero)
dstSize = ExternMethods.ZSTD_compressCCtx(cctx, dstPtr, (size_t)dstCapacity, srcPtr, (size_t)src.Count, Options.CompressionLevel);
Expand Down
Loading

0 comments on commit 963af94

Please sign in to comment.