Skip to content

Commit fa6bf38

Browse files
iremyuxCopilot
andauthored
Add compression magic number detection to System.Formats.Tar with helpful error messages (#119996)
This PR improves error messages when users attempt to read compressed TAR archives. When TAR header parsing fails due to an invalid checksum field, the code now checks if the file might be a compressed archive (GZIP, BZIP2, XZ, ZIP, ZLIB, 7-Zip, or Zstandard) by examining compression magic numbers. If a compression format is detected, it throws a clear, localized error message indicating the specific compression format detected (e.g., "The file appears to be a GZIP archive. TAR format expected."). Fixes #89056 --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent cf9869e commit fa6bf38

File tree

3 files changed

+107
-3
lines changed

3 files changed

+107
-3
lines changed

src/libraries/System.Formats.Tar/src/Resources/Strings.resx

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@
188188
<value>An attempt was made to move the position before the beginning of the stream.</value>
189189
</data>
190190
<data name="TarInvalidNumber" xml:space="preserve">
191-
<value>Unable to parse number.</value>
191+
<value>The TAR archive is corrupted or invalid.</value>
192192
</data>
193193
<data name="TarInvalidChecksum" xml:space="preserve">
194194
<value>Checksum validation failed. The archive might be corrupted.</value>
@@ -211,4 +211,7 @@
211211
<data name="ExtHeaderInvalidRecords" xml:space="preserve">
212212
<value>The extended header contains invalid records.</value>
213213
</data>
214+
<data name="TarCompressionArchiveDetected" xml:space="preserve">
215+
<value>The file appears to be a {0} archive. TAR format expected.</value>
216+
</data>
214217
</root>

src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs

Lines changed: 102 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,20 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca
370370
{
371371
return null;
372372
}
373-
int checksum = (int)TarHelpers.ParseOctal<uint>(spanChecksum);
373+
374+
int checksum;
375+
try
376+
{
377+
checksum = (int)TarHelpers.ParseOctal<uint>(spanChecksum);
378+
}
379+
catch (InvalidDataException)
380+
{
381+
// Check if this might be a compressed file by looking at the buffer for compression magic numbers
382+
ThrowIfCompressedArchive(buffer);
383+
// If not a compressed file, re-throw the original parsing exception
384+
throw;
385+
}
386+
374387
// Zero checksum means the whole header is empty
375388
if (checksum == 0)
376389
{
@@ -789,5 +802,93 @@ private static bool TryGetNextExtendedAttribute(
789802
buffer = buffer.Slice(newlinePos + 1);
790803
return true;
791804
}
805+
806+
/// <summary>
807+
/// Analyzes the buffer for known compression format magic numbers and throws an InvalidDataException
808+
/// with a specific error message if a compression format is detected.
809+
/// If no compression format is detected, the method returns without throwing.
810+
/// </summary>
811+
/// <exception cref="InvalidDataException">
812+
/// Thrown if a compression format is detected.
813+
/// </exception>
814+
private static void ThrowIfCompressedArchive(ReadOnlySpan<byte> buffer)
815+
{
816+
if (buffer.Length < 2)
817+
{
818+
return;
819+
}
820+
821+
byte firstByte = buffer[0];
822+
switch (firstByte)
823+
{
824+
case 0x37: // 7-Zip
825+
if (buffer.Length >= 6 &&
826+
buffer[1] == 0x7A && buffer[2] == 0xBC &&
827+
buffer[3] == 0xAF && buffer[4] == 0x27 && buffer[5] == 0x1C)
828+
{
829+
throw new InvalidDataException(SR.Format(SR.TarCompressionArchiveDetected, "7-Zip"));
830+
}
831+
break;
832+
833+
case 0x50: // ZIP files start with "PK"
834+
if (buffer.Length >= 2 && buffer[1] == 0x4B)
835+
{
836+
throw new InvalidDataException(SR.Format(SR.TarCompressionArchiveDetected, "ZIP"));
837+
}
838+
break;
839+
840+
case 0x1F: // GZIP
841+
if (buffer.Length >= 2 && buffer[1] == 0x8B)
842+
{
843+
throw new InvalidDataException(SR.Format(SR.TarCompressionArchiveDetected, "GZIP"));
844+
}
845+
break;
846+
847+
case 0x42: // BZIP2 - "BZh"
848+
if (buffer.Length >= 3 && buffer[1] == 0x5A && buffer[2] == 0x68)
849+
{
850+
throw new InvalidDataException(SR.Format(SR.TarCompressionArchiveDetected, "BZIP2"));
851+
}
852+
break;
853+
854+
case 0xFD: // XZ
855+
if (buffer.Length >= 6 &&
856+
buffer[1] == 0x37 && buffer[2] == 0x7A &&
857+
buffer[3] == 0x58 && buffer[4] == 0x5A && buffer[5] == 0x00)
858+
{
859+
throw new InvalidDataException(SR.Format(SR.TarCompressionArchiveDetected, "XZ"));
860+
}
861+
break;
862+
863+
case 0x28: // Could be Zstandard or ZLIB
864+
if (buffer.Length >= 4 &&
865+
buffer[1] == 0xB5 && buffer[2] == 0x2F && buffer[3] == 0xFD)
866+
{
867+
throw new InvalidDataException(SR.Format(SR.TarCompressionArchiveDetected, "Zstandard"));
868+
}
869+
// If not Zstandard, check if it's ZLIB
870+
goto CheckZlib;
871+
872+
// ZLIB (deflate compression) - various compression methods and window sizes
873+
case 0x08:
874+
case 0x18:
875+
case 0x38:
876+
case 0x48:
877+
case 0x58:
878+
case 0x68:
879+
case 0x78:
880+
CheckZlib:
881+
if (buffer.Length >= 2)
882+
{
883+
byte secondByte = buffer[1];
884+
// Check if this is a valid ZLIB header (must be divisible by 31)
885+
if (((firstByte * 256) + secondByte) % 31 == 0)
886+
{
887+
throw new InvalidDataException(SR.Format(SR.TarCompressionArchiveDetected, "ZLIB"));
888+
}
889+
}
890+
break;
891+
}
892+
}
792893
}
793894
}

src/libraries/System.Formats.Tar/tests/TarReader/TarReader.Tests.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ public void TarReader_InvalidChecksum_ThrowsException(bool corrupted)
159159

160160
if (corrupted)
161161
{
162-
Assert.Contains("parse", exception.Message);
162+
Assert.Contains("corrupted", exception.Message);
163163
}
164164
else
165165
{

0 commit comments

Comments
 (0)