Skip to content

Commit

Permalink
Tar: Fix PAX regression when handling the size of really long unseeka…
Browse files Browse the repository at this point in the history
…ble data streams (#88280)

* Fix regression introduced by #84279 preventing PAX entries with really long data streams to get its size correctly stored in the extended attributes when the data stream is unseekable.

* Move tests for large files to a new manual tests project.
  • Loading branch information
carlossanlop authored Jul 17, 2023
1 parent bf78b40 commit 2268fb3
Show file tree
Hide file tree
Showing 9 changed files with 450 additions and 303 deletions.
9 changes: 9 additions & 0 deletions src/libraries/System.Formats.Tar/System.Formats.Tar.sln
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Formats.Tar", "src\S
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Formats.Tar.Tests", "tests\System.Formats.Tar.Tests.csproj", "{6FD1E284-7B50-4077-B73A-5B31CB0E3577}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Formats.Tar.Manual.Tests", "tests\Manual\System.Formats.Tar.Manual.Tests.csproj", "{D2788A26-CDAE-4388-AE4B-A36B0E6DFF9D}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ComInterfaceGenerator", "..\System.Runtime.InteropServices\gen\ComInterfaceGenerator\ComInterfaceGenerator.csproj", "{00477EA4-C3E5-48A9-8CA8-8CCF689E0DB4}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LibraryImportGenerator", "..\System.Runtime.InteropServices\gen\LibraryImportGenerator\LibraryImportGenerator.csproj", "{E89FEF3E-E0B9-41C4-A51C-9759AD1A3B69}"
Expand Down Expand Up @@ -67,6 +69,10 @@ Global
{A00011A0-E609-4A49-B893-EBFC72C98707}.Debug|Any CPU.Build.0 = Debug|Any CPU
{A00011A0-E609-4A49-B893-EBFC72C98707}.Release|Any CPU.ActiveCfg = Release|Any CPU
{A00011A0-E609-4A49-B893-EBFC72C98707}.Release|Any CPU.Build.0 = Release|Any CPU
{D2788A26-CDAE-4388-AE4B-A36B0E6DFF9D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{D2788A26-CDAE-4388-AE4B-A36B0E6DFF9D}.Debug|Any CPU.Build.0 = Debug|Any CPU
{D2788A26-CDAE-4388-AE4B-A36B0E6DFF9D}.Release|Any CPU.ActiveCfg = Release|Any CPU
{D2788A26-CDAE-4388-AE4B-A36B0E6DFF9D}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand All @@ -78,9 +84,12 @@ Global
{E0B882C6-2082-45F2-806E-568461A61975} = {9BE8AFF4-D37B-49AF-AFD3-A15E514AC8AE}
{A00011A0-E609-4A49-B893-EBFC72C98707} = {9BE8AFF4-D37B-49AF-AFD3-A15E514AC8AE}
{9F751C2B-56DD-4604-A3F3-568627F8C006} = {55A8C7E4-925C-4F21-B68B-CEFC19137A4B}
{6FD1E284-7B50-4077-B73A-5B31CB0E3577} = {6CF0D830-3EE9-44B1-B548-EA8750AD7B3E}
{00477EA4-C3E5-48A9-8CA8-8CCF689E0DB4} = {0345BAA8-92BC-4499-B550-21AC44910FD2}
{E89FEF3E-E0B9-41C4-A51C-9759AD1A3B69} = {0345BAA8-92BC-4499-B550-21AC44910FD2}
{50E6D5FD-0E06-4D07-966E-C28E5448A1D3} = {0345BAA8-92BC-4499-B550-21AC44910FD2}
{A00011A0-E609-4A49-B893-EBFC72C98707} = {9BE8AFF4-D37B-49AF-AFD3-A15E514AC8AE}
{D2788A26-CDAE-4388-AE4B-A36B0E6DFF9D} = {6CF0D830-3EE9-44B1-B548-EA8750AD7B3E}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {F9B8DA67-C83B-466D-907C-9541CDBDCFEF}
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -283,8 +283,12 @@ private void WriteEntryInternal(TarEntry entry)

switch (entry.Format)
{
case TarEntryFormat.V7 or TarEntryFormat.Ustar:
entry._header.WriteAs(entry.Format, _archiveStream, buffer);
case TarEntryFormat.V7:
entry._header.WriteAsV7(_archiveStream, buffer);
break;

case TarEntryFormat.Ustar:
entry._header.WriteAsUstar(_archiveStream, buffer);
break;

case TarEntryFormat.Pax:
Expand Down Expand Up @@ -321,7 +325,8 @@ private async Task WriteEntryAsyncInternal(TarEntry entry, CancellationToken can

Task task = entry.Format switch
{
TarEntryFormat.V7 or TarEntryFormat.Ustar => entry._header.WriteAsAsync(entry.Format, _archiveStream, buffer, cancellationToken),
TarEntryFormat.V7 => entry._header.WriteAsV7Async(_archiveStream, buffer, cancellationToken),
TarEntryFormat.Ustar => entry._header.WriteAsUstarAsync(_archiveStream, buffer, cancellationToken),
TarEntryFormat.Pax when entry._header._typeFlag is TarEntryType.GlobalExtendedAttributes => entry._header.WriteAsPaxGlobalExtendedAttributesAsync(_archiveStream, buffer, _nextGlobalExtendedAttributesEntryNumber++, cancellationToken),
TarEntryFormat.Pax => entry._header.WriteAsPaxAsync(_archiveStream, buffer, cancellationToken),
TarEntryFormat.Gnu => entry._header.WriteAsGnuAsync(_archiveStream, buffer, cancellationToken),
Expand Down
93 changes: 93 additions & 0 deletions src/libraries/System.Formats.Tar/tests/Manual/ManualTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections.Generic;
using System.IO;
using Xunit;

namespace System.Formats.Tar.Tests;

[OuterLoop]
[Collection(nameof(DisableParallelization))] // don't create multiple large files at the same time
public class ManualTests : TarTestsBase
{
public static bool ManualTestsEnabled => !string.IsNullOrEmpty(Environment.GetEnvironmentVariable("MANUAL_TESTS"));

public static IEnumerable<object[]> WriteEntry_LongFileSize_TheoryData()
{
foreach (bool unseekableStream in new[] { false, true })
{
foreach (TarEntryFormat entryFormat in new[] { TarEntryFormat.V7, TarEntryFormat.Ustar, TarEntryFormat.Gnu, TarEntryFormat.Pax })
{
yield return new object[] { entryFormat, LegacyMaxFileSize, unseekableStream };
}

// Pax supports unlimited size files.
yield return new object[] { TarEntryFormat.Pax, LegacyMaxFileSize + 1, unseekableStream };
}
}

[ConditionalTheory(nameof(ManualTestsEnabled))]
[MemberData(nameof(WriteEntry_LongFileSize_TheoryData))]
[SkipOnPlatform(TestPlatforms.iOS | TestPlatforms.tvOS | TestPlatforms.Android | TestPlatforms.Browser, "Needs too much disk space.")]
public void WriteEntry_LongFileSize(TarEntryFormat entryFormat, long size, bool unseekableStream)
{
// Write archive with a 8 Gb long entry.
using FileStream tarFile = File.Open(GetTestFilePath(), new FileStreamOptions { Access = FileAccess.ReadWrite, Mode = FileMode.Create, Options = FileOptions.DeleteOnClose });
Stream s = unseekableStream ? new WrappedStream(tarFile, tarFile.CanRead, tarFile.CanWrite, canSeek: false) : tarFile;

using (TarWriter writer = new(s, leaveOpen: true))
{
TarEntry writeEntry = InvokeTarEntryCreationConstructor(entryFormat, entryFormat is TarEntryFormat.V7 ? TarEntryType.V7RegularFile : TarEntryType.RegularFile, "foo");
writeEntry.DataStream = new SimulatedDataStream(size);
writer.WriteEntry(writeEntry);
}

tarFile.Position = 0;

// Read archive back.
using TarReader reader = new TarReader(s);
TarEntry entry = reader.GetNextEntry();
Assert.Equal(size, entry.Length);

Stream dataStream = entry.DataStream;
Assert.Equal(size, dataStream.Length);
Assert.Equal(0, dataStream.Position);

ReadOnlySpan<byte> dummyData = SimulatedDataStream.DummyData.Span;

// Read the first bytes.
Span<byte> buffer = new byte[dummyData.Length];
Assert.Equal(buffer.Length, dataStream.Read(buffer));
AssertExtensions.SequenceEqual(dummyData, buffer);
Assert.Equal(0, dataStream.ReadByte()); // check next byte is correct.
buffer.Clear();

// Read the last bytes.
long dummyDataOffset = size - dummyData.Length - 1;
if (dataStream.CanSeek)
{
Assert.False(unseekableStream);
dataStream.Seek(dummyDataOffset, SeekOrigin.Begin);
}
else
{
Assert.True(unseekableStream);
Span<byte> seekBuffer = new byte[4_096];

while (dataStream.Position < dummyDataOffset)
{
int bufSize = (int)Math.Min(seekBuffer.Length, dummyDataOffset - dataStream.Position);
int res = dataStream.Read(seekBuffer.Slice(0, bufSize));
Assert.True(res > 0, "Unseekable stream finished before expected - Something went very wrong");
}
}

Assert.Equal(0, dataStream.ReadByte()); // check previous byte is correct.
Assert.Equal(buffer.Length, dataStream.Read(buffer));
AssertExtensions.SequenceEqual(dummyData, buffer);
Assert.Equal(size, dataStream.Position);

Assert.Null(reader.GetNextEntry());
}
}
82 changes: 82 additions & 0 deletions src/libraries/System.Formats.Tar/tests/Manual/ManualTestsAsync.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections.Generic;
using System.IO;
using System.Threading.Tasks;
using Xunit;

namespace System.Formats.Tar.Tests;

[OuterLoop]
[Collection(nameof(DisableParallelization))] // don't create multiple large files at the same time
public class ManualTestsAsync : TarTestsBase
{
public static IEnumerable<object[]> WriteEntry_LongFileSize_TheoryDataAsync()
// Fixes error xUnit1015: MemberData needs to be in the same class
=> ManualTests.WriteEntry_LongFileSize_TheoryData();

[ConditionalTheory(nameof(ManualTests.ManualTestsEnabled))]
[MemberData(nameof(WriteEntry_LongFileSize_TheoryDataAsync))]
[SkipOnPlatform(TestPlatforms.iOS | TestPlatforms.tvOS | TestPlatforms.Android | TestPlatforms.Browser, "Needs too much disk space.")]
public async Task WriteEntry_LongFileSizeAsync(TarEntryFormat entryFormat, long size, bool unseekableStream)
{
// Write archive with a 8 Gb long entry.
await using FileStream tarFile = File.Open(GetTestFilePath(), new FileStreamOptions { Access = FileAccess.ReadWrite, Mode = FileMode.Create, Options = FileOptions.DeleteOnClose });
Stream s = unseekableStream ? new WrappedStream(tarFile, tarFile.CanRead, tarFile.CanWrite, canSeek: false) : tarFile;

await using (TarWriter writer = new(s, leaveOpen: true))
{
TarEntry writeEntry = InvokeTarEntryCreationConstructor(entryFormat, entryFormat is TarEntryFormat.V7 ? TarEntryType.V7RegularFile : TarEntryType.RegularFile, "foo");
writeEntry.DataStream = new SimulatedDataStream(size);
await writer.WriteEntryAsync(writeEntry);
}

tarFile.Position = 0;

// Read the archive back.
await using TarReader reader = new TarReader(s);
TarEntry entry = await reader.GetNextEntryAsync();
Assert.Equal(size, entry.Length);

Stream dataStream = entry.DataStream;
Assert.Equal(size, dataStream.Length);
Assert.Equal(0, dataStream.Position);

ReadOnlyMemory<byte> dummyData = SimulatedDataStream.DummyData;

// Read the first bytes.
byte[] buffer = new byte[dummyData.Length];
Assert.Equal(buffer.Length, dataStream.Read(buffer));
AssertExtensions.SequenceEqual(dummyData.Span, buffer);
Assert.Equal(0, dataStream.ReadByte()); // check next byte is correct.
buffer.AsSpan().Clear();

// Read the last bytes.
long dummyDataOffset = size - dummyData.Length - 1;
if (dataStream.CanSeek)
{
Assert.False(unseekableStream);
dataStream.Seek(dummyDataOffset, SeekOrigin.Begin);
}
else
{
Assert.True(unseekableStream);
Memory<byte> seekBuffer = new byte[4_096];

while (dataStream.Position < dummyDataOffset)
{
int bufSize = (int)Math.Min(seekBuffer.Length, dummyDataOffset - dataStream.Position);
int res = await dataStream.ReadAsync(seekBuffer.Slice(0, bufSize));
Assert.True(res > 0, "Unseekable stream finished before expected - Something went very wrong");
}
}

Assert.Equal(0, dataStream.ReadByte()); // check previous byte is correct.
Assert.Equal(buffer.Length, dataStream.Read(buffer));
AssertExtensions.SequenceEqual(dummyData.Span, buffer);
Assert.Equal(size, dataStream.Position);

Assert.Null(await reader.GetNextEntryAsync());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>$(NetCoreAppCurrent)</TargetFramework>
<IncludeRemoteExecutor>true</IncludeRemoteExecutor>
</PropertyGroup>
<ItemGroup>
<Compile Include="ManualTests.cs" />
<Compile Include="ManualTestsAsync.cs" />
<Compile Include="..\TarTestsBase.cs" />
<Compile Include="..\SimulatedDataStream.cs" />
<Compile Include="$(CommonTestPath)TestUtilities\System\DisableParallelization.cs" Link="Common\TestUtilities\System\DisableParallelization.cs" />
<Compile Include="$(CommonTestPath)System\IO\TempDirectory.cs" Link="Common\System\IO\TempDirectory.cs" />
<Compile Include="$(CommonTestPath)System\IO\WrappedStream.cs" Link="Common\System\IO\WrappedStream.cs" />
</ItemGroup>
</Project>
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,8 @@
<Compile Include="TarTestsBase.Ustar.cs" />
<Compile Include="TarTestsBase.V7.cs" />
<Compile Include="TarWriter\TarWriter.WriteEntry.Entry.Roundtrip.Tests.cs" />
<Compile Include="TarWriter\TarWriter.WriteEntry.LongFile.Tests.cs" />
<Compile Include="TarWriter\TarWriter.WriteEntryAsync.File.Tests.cs" />
<Compile Include="TarWriter\TarWriter.WriteEntry.Base.cs" />
<Compile Include="TarWriter\TarWriter.WriteEntryAsync.LongFile.Tests.cs" />
<Compile Include="TarWriter\TarWriter.WriteEntryAsync.Tests.cs" />
<Compile Include="TarWriter\TarWriter.WriteEntryAsync.Entry.Roundtrip.Tests.cs" />
<Compile Include="TarWriter\TarWriter.WriteEntryAsync.Entry.Ustar.Tests.cs" />
Expand All @@ -74,7 +72,6 @@
<Compile Include="$(CommonPath)DisableRuntimeMarshalling.cs" Link="Common\DisableRuntimeMarshalling.cs" />
<Compile Include="$(CommonTestPath)System\IO\ReparsePointUtilities.cs" Link="Common\System\IO\ReparsePointUtilities.cs" />
<Compile Include="$(CommonTestPath)System\IO\WrappedStream.cs" Link="Common\System\IO\WrappedStream.cs" />
<Compile Include="$(CommonTestPath)TestUtilities\System\DisableParallelization.cs" Link="Common\TestUtilities\System\DisableParallelization.cs" />
</ItemGroup>
<!-- Windows specific files -->
<ItemGroup Condition="'$(TargetPlatformIdentifier)' == 'windows'">
Expand Down

This file was deleted.

Loading

0 comments on commit 2268fb3

Please sign in to comment.