Skip to content

Commit c5a0e55

Browse files
authored
Add tests for exotic external tar asset archives, fix some more corner case bugs (#74412)
* Remove unused _readFirstEntry. Remnant from before we created PaxGlobalExtendedAttributesEntry. * Set the position of the freshly copied data stream to 0, so the first user access of the DataStream property gives them a stream ready to read from the beginning. * Process a PAX actual entry's data block only after the extended attributes are analyzed, in case the size is found as an extended attribute and needs to be overriden. * Add tests to verify the entries of the new external tar assets can be read. Verify their DataStream if available. * Add copyData argument to recent alignment padding tests. * Throw an exception sooner and with a clearer message when a data section is unexpected for the entry type. * Allow trailing nulls and spaces in octal fields. Co-authored-by: @am11 Adeel Mujahid <3840695+am11@users.noreply.github.com> * Throw a clearer exception if the unsupported sparse file entry type is encountered. These entries have additional data that indicates the locations of sparse bytes, which cannot be read with just the size field. So to avoid accidentally offseting the reader, we throw. * Tests. * Rename to TrimLeadingNullsAndSpaces Co-authored-by: carlossanlop <carlossanlop@users.noreply.github.com>
1 parent 97613f3 commit c5a0e55

File tree

9 files changed

+705
-65
lines changed

9 files changed

+705
-65
lines changed

src/libraries/System.Formats.Tar/src/Resources/Strings.resx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,9 @@
205205
<value>The entry is a symbolic link or a hard link but the LinkName field is null or empty.</value>
206206
</data>
207207
<data name="TarEntryTypeNotSupported" xml:space="preserve">
208+
<value>Entry type '{0}' not supported.</value>
209+
</data>
210+
<data name="TarEntryTypeNotSupportedInFormat" xml:space="preserve">
208211
<value>Entry type '{0}' not supported in format '{1}'.</value>
209212
</data>
210213
<data name="TarEntryTypeNotSupportedForExtracting" xml:space="preserve">

src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,15 @@ internal sealed partial class TarHeader
2020
// Attempts to retrieve the next header from the specified tar archive stream.
2121
// Throws if end of stream is reached or if any data type conversion fails.
2222
// Returns a valid TarHeader object if the attributes were read successfully, null otherwise.
23-
internal static TarHeader? TryGetNextHeader(Stream archiveStream, bool copyData, TarEntryFormat initialFormat)
23+
internal static TarHeader? TryGetNextHeader(Stream archiveStream, bool copyData, TarEntryFormat initialFormat, bool processDataBlock)
2424
{
2525
// The four supported formats have a header that fits in the default record size
2626
Span<byte> buffer = stackalloc byte[TarHelpers.RecordSize];
2727

2828
archiveStream.ReadExactly(buffer);
2929

3030
TarHeader? header = TryReadAttributes(initialFormat, buffer);
31-
if (header != null)
31+
if (header != null && processDataBlock)
3232
{
3333
header.ProcessDataBlock(archiveStream, copyData);
3434
}
@@ -39,7 +39,7 @@ internal sealed partial class TarHeader
3939
// Asynchronously attempts read all the fields of the next header.
4040
// Throws if end of stream is reached or if any data type conversion fails.
4141
// Returns true if all the attributes were read successfully, false otherwise.
42-
internal static async ValueTask<TarHeader?> TryGetNextHeaderAsync(Stream archiveStream, bool copyData, TarEntryFormat initialFormat, CancellationToken cancellationToken)
42+
internal static async ValueTask<TarHeader?> TryGetNextHeaderAsync(Stream archiveStream, bool copyData, TarEntryFormat initialFormat, bool processDataBlock, CancellationToken cancellationToken)
4343
{
4444
cancellationToken.ThrowIfCancellationRequested();
4545

@@ -50,7 +50,7 @@ internal sealed partial class TarHeader
5050
await archiveStream.ReadExactlyAsync(buffer, cancellationToken).ConfigureAwait(false);
5151

5252
TarHeader? header = TryReadAttributes(initialFormat, buffer.Span);
53-
if (header != null)
53+
if (header != null && processDataBlock)
5454
{
5555
await header.ProcessDataBlockAsync(archiveStream, copyData, cancellationToken).ConfigureAwait(false);
5656
}
@@ -180,7 +180,7 @@ internal void ReplaceNormalAttributesWithExtended(Dictionary<string, string>? di
180180
// will get all the data section read and the stream pointer positioned at the beginning of the next header.
181181
// - Block, Character, Directory, Fifo, HardLink and SymbolicLink typeflag entries have no data section so the archive stream pointer will be positioned at the beginning of the next header.
182182
// - All other typeflag entries with a data section will generate a stream wrapping the data section: SeekableSubReadStream for seekable archive streams, and SubReadStream for unseekable archive streams.
183-
private void ProcessDataBlock(Stream archiveStream, bool copyData)
183+
internal void ProcessDataBlock(Stream archiveStream, bool copyData)
184184
{
185185
bool skipBlockAlignmentPadding = true;
186186

@@ -199,6 +199,10 @@ private void ProcessDataBlock(Stream archiveStream, bool copyData)
199199
case TarEntryType.HardLink:
200200
case TarEntryType.SymbolicLink:
201201
// No data section
202+
if (_size > 0)
203+
{
204+
throw new FormatException(string.Format(SR.TarSizeFieldTooLargeForEntryType, _typeFlag));
205+
}
202206
break;
203207
case TarEntryType.RegularFile:
204208
case TarEntryType.V7RegularFile: // Treated as regular file
@@ -257,6 +261,10 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca
257261
case TarEntryType.HardLink:
258262
case TarEntryType.SymbolicLink:
259263
// No data section
264+
if (_size > 0)
265+
{
266+
throw new FormatException(string.Format(SR.TarSizeFieldTooLargeForEntryType, _typeFlag));
267+
}
260268
break;
261269
case TarEntryType.RegularFile:
262270
case TarEntryType.V7RegularFile: // Treated as regular file
@@ -311,6 +319,8 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca
311319
{
312320
MemoryStream copiedData = new MemoryStream();
313321
TarHelpers.CopyBytes(archiveStream, copiedData, _size);
322+
// Reset position pointer so the user can do the first DataStream read from the beginning
323+
copiedData.Position = 0;
314324
return copiedData;
315325
}
316326

@@ -336,6 +346,8 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca
336346
{
337347
MemoryStream copiedData = new MemoryStream();
338348
await TarHelpers.CopyBytesAsync(archiveStream, copiedData, size, cancellationToken).ConfigureAwait(false);
349+
// Reset position pointer so the user can do the first DataStream read from the beginning
350+
copiedData.Position = 0;
339351
return copiedData;
340352
}
341353

@@ -396,12 +408,13 @@ TarEntryType.LongLink or
396408
TarEntryType.LongPath or
397409
TarEntryType.MultiVolume or
398410
TarEntryType.RenamedOrSymlinked or
399-
TarEntryType.SparseFile or
400411
TarEntryType.TapeVolume => TarEntryFormat.Gnu,
401412

402413
// V7 is the only one that uses 'V7RegularFile'.
403414
TarEntryType.V7RegularFile => TarEntryFormat.V7,
404415

416+
TarEntryType.SparseFile => throw new NotSupportedException(string.Format(SR.TarEntryTypeNotSupported, header._typeFlag)),
417+
405418
// We can quickly determine the *minimum* possible format if the entry type
406419
// is the POSIX 'RegularFile', although later we could upgrade it to PAX or GNU
407420
_ => (header._typeFlag == TarEntryType.RegularFile) ? TarEntryFormat.Ustar : TarEntryFormat.V7

src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,12 @@ internal static TarEntryType GetCorrectTypeFlagForFormat(TarEntryFormat format,
203203
internal static T ParseOctal<T>(ReadOnlySpan<byte> buffer) where T : struct, INumber<T>
204204
{
205205
buffer = TrimEndingNullsAndSpaces(buffer);
206+
buffer = TrimLeadingNullsAndSpaces(buffer);
207+
208+
if (buffer.Length == 0)
209+
{
210+
return T.Zero;
211+
}
206212

207213
T octalFactor = T.CreateTruncating(8u);
208214
T value = T.Zero;
@@ -243,6 +249,17 @@ internal static ReadOnlySpan<byte> TrimEndingNullsAndSpaces(ReadOnlySpan<byte> b
243249
return buffer.Slice(0, trimmedLength);
244250
}
245251

252+
private static ReadOnlySpan<byte> TrimLeadingNullsAndSpaces(ReadOnlySpan<byte> buffer)
253+
{
254+
int newStart = 0;
255+
while (newStart < buffer.Length && buffer[newStart] is 0 or 32)
256+
{
257+
newStart++;
258+
}
259+
260+
return buffer.Slice(newStart);
261+
}
262+
246263
// Returns the ASCII string contained in the specified buffer of bytes,
247264
// removing the trailing null or space chars.
248265
internal static string GetTrimmedAsciiString(ReadOnlySpan<byte> buffer) => GetTrimmedString(buffer, Encoding.ASCII);
@@ -351,7 +368,7 @@ TarEntryType.RegularFile or
351368
throw new FormatException(string.Format(SR.TarInvalidFormat, archiveFormat));
352369
}
353370

354-
throw new InvalidOperationException(string.Format(SR.TarEntryTypeNotSupported, entryType, archiveFormat));
371+
throw new InvalidOperationException(string.Format(SR.TarEntryTypeNotSupportedInFormat, entryType, archiveFormat));
355372
}
356373
}
357374
}

src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ public sealed class TarReader : IDisposable, IAsyncDisposable
1919
private readonly bool _leaveOpen;
2020
private TarEntry? _previouslyReadEntry;
2121
private List<Stream>? _dataStreamsToDispose;
22-
private bool _readFirstEntry;
2322
private bool _reachedEndMarkers;
2423

2524
internal Stream _archiveStream;
@@ -44,7 +43,6 @@ public TarReader(Stream archiveStream, bool leaveOpen = false)
4443

4544
_previouslyReadEntry = null;
4645
_isDisposed = false;
47-
_readFirstEntry = false;
4846
_reachedEndMarkers = false;
4947
}
5048

@@ -124,11 +122,6 @@ public async ValueTask DisposeAsync()
124122
TarHeader? header = TryGetNextEntryHeader(copyData);
125123
if (header != null)
126124
{
127-
if (!_readFirstEntry)
128-
{
129-
_readFirstEntry = true;
130-
}
131-
132125
TarEntry entry = header._format switch
133126
{
134127
TarEntryFormat.Pax => header._typeFlag is TarEntryType.GlobalExtendedAttributes ?
@@ -282,11 +275,6 @@ internal async ValueTask AdvanceDataStreamIfNeededAsync(CancellationToken cancel
282275
TarHeader? header = await TryGetNextEntryHeaderAsync(copyData, cancellationToken).ConfigureAwait(false);
283276
if (header != null)
284277
{
285-
if (!_readFirstEntry)
286-
{
287-
_readFirstEntry = true;
288-
}
289-
290278
TarEntry entry = header._format switch
291279
{
292280
TarEntryFormat.Pax => header._typeFlag is TarEntryType.GlobalExtendedAttributes ?
@@ -319,7 +307,7 @@ internal async ValueTask AdvanceDataStreamIfNeededAsync(CancellationToken cancel
319307
{
320308
Debug.Assert(!_reachedEndMarkers);
321309

322-
TarHeader? header = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Unknown);
310+
TarHeader? header = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Unknown, processDataBlock: true);
323311

324312
if (header == null)
325313
{
@@ -361,7 +349,7 @@ internal async ValueTask AdvanceDataStreamIfNeededAsync(CancellationToken cancel
361349

362350
Debug.Assert(!_reachedEndMarkers);
363351

364-
TarHeader? header = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Unknown, cancellationToken).ConfigureAwait(false);
352+
TarHeader? header = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Unknown, processDataBlock: true, cancellationToken).ConfigureAwait(false);
365353
if (header == null)
366354
{
367355
return null;
@@ -397,9 +385,10 @@ internal async ValueTask AdvanceDataStreamIfNeededAsync(CancellationToken cancel
397385
// and returns the actual entry with the processed extended attributes saved in the _extendedAttributes dictionary.
398386
private bool TryProcessExtendedAttributesHeader(TarHeader extendedAttributesHeader, bool copyData, [NotNullWhen(returnValue: true)] out TarHeader? actualHeader)
399387
{
400-
actualHeader = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Pax);
388+
// Don't process the data block of the actual entry just yet, because there's a slim chance
389+
// that the extended attributes contain a size that we need to override in the header
390+
actualHeader = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Pax, processDataBlock: false);
401391

402-
// Now get the actual entry
403392
if (actualHeader == null)
404393
{
405394
return false;
@@ -417,6 +406,9 @@ TarEntryType.LongLink or
417406
// Replace all the attributes representing standard fields with the extended ones, if any
418407
actualHeader.ReplaceNormalAttributesWithExtended(extendedAttributesHeader.ExtendedAttributes);
419408

409+
// We retrieved the extended attributes, now we can read the data, and always with the right size
410+
actualHeader.ProcessDataBlock(_archiveStream, copyData);
411+
420412
return true;
421413
}
422414

@@ -426,8 +418,9 @@ TarEntryType.LongLink or
426418
{
427419
cancellationToken.ThrowIfCancellationRequested();
428420

429-
// Now get the actual entry
430-
TarHeader? actualHeader = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Pax, cancellationToken).ConfigureAwait(false);
421+
// Don't process the data block of the actual entry just yet, because there's a slim chance
422+
// that the extended attributes contain a size that we need to override in the header
423+
TarHeader? actualHeader = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Pax, processDataBlock: false, cancellationToken).ConfigureAwait(false);
431424
if (actualHeader == null)
432425
{
433426
return null;
@@ -451,6 +444,9 @@ TarEntryType.LongLink or
451444
// Replace all the attributes representing standard fields with the extended ones, if any
452445
actualHeader.ReplaceNormalAttributesWithExtended(extendedAttributesHeader.ExtendedAttributes);
453446

447+
// We retrieved the extended attributes, now we can read the data, and always with the right size
448+
actualHeader.ProcessDataBlock(_archiveStream, copyData);
449+
454450
return actualHeader;
455451
}
456452

@@ -460,7 +456,7 @@ private bool TryProcessGnuMetadataHeader(TarHeader header, bool copyData, out Ta
460456
{
461457
finalHeader = new(TarEntryFormat.Gnu);
462458

463-
TarHeader? secondHeader = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Gnu);
459+
TarHeader? secondHeader = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Gnu, processDataBlock: true);
464460

465461
// Get the second entry, which is the actual entry
466462
if (secondHeader == null)
@@ -478,7 +474,7 @@ private bool TryProcessGnuMetadataHeader(TarHeader header, bool copyData, out Ta
478474
if ((header._typeFlag is TarEntryType.LongLink && secondHeader._typeFlag is TarEntryType.LongPath) ||
479475
(header._typeFlag is TarEntryType.LongPath && secondHeader._typeFlag is TarEntryType.LongLink))
480476
{
481-
TarHeader? thirdHeader = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Gnu);
477+
TarHeader? thirdHeader = TarHeader.TryGetNextHeader(_archiveStream, copyData, TarEntryFormat.Gnu, processDataBlock: true);
482478

483479
// Get the third entry, which is the actual entry
484480
if (thirdHeader == null)
@@ -537,7 +533,7 @@ private bool TryProcessGnuMetadataHeader(TarHeader header, bool copyData, out Ta
537533
cancellationToken.ThrowIfCancellationRequested();
538534

539535
// Get the second entry, which is the actual entry
540-
TarHeader? secondHeader = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Gnu, cancellationToken).ConfigureAwait(false);
536+
TarHeader? secondHeader = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Gnu, processDataBlock: true, cancellationToken).ConfigureAwait(false);
541537
if (secondHeader == null)
542538
{
543539
return null;
@@ -556,7 +552,7 @@ private bool TryProcessGnuMetadataHeader(TarHeader header, bool copyData, out Ta
556552
(header._typeFlag is TarEntryType.LongPath && secondHeader._typeFlag is TarEntryType.LongLink))
557553
{
558554
// Get the third entry, which is the actual entry
559-
TarHeader? thirdHeader = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Gnu, cancellationToken).ConfigureAwait(false);
555+
TarHeader? thirdHeader = await TarHeader.TryGetNextHeaderAsync(_archiveStream, copyData, TarEntryFormat.Gnu, processDataBlock: true, cancellationToken).ConfigureAwait(false);
560556
if (thirdHeader == null)
561557
{
562558
return null;

0 commit comments

Comments
 (0)