Skip to content

Commit 434e7f6

Browse files
authored
Properly handle filesizes larger than 8 Gb (#76707)
* Properly handle filesizes larger than 8 Gb * Add simulated data stream and address feedback * Move tests for 8Gb files to outerloop and disable parallelization * Change TarSizeFieldTooLargeForEntryType on .resx
1 parent 0cb25b3 commit 434e7f6

13 files changed

+402
-47
lines changed

src/libraries/System.Formats.Tar/src/Resources/Strings.resx

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@
238238
<value>The size field is negative in a tar entry.</value>
239239
</data>
240240
<data name="TarSizeFieldTooLargeForEntryType" xml:space="preserve">
241-
<value>The value of the size field for the current entry of type '{0}' is beyond the expected length.</value>
241+
<value>The value of the size field for the current entry of type '{0}' is greater than the expected length.</value>
242242
</data>
243243
<data name="TarSymbolicLinkTargetNotExists" xml:space="preserve">
244244
<value>Cannot create the symbolic link '{0}' because the specified target '{1}' does not exist.</value>
@@ -264,4 +264,7 @@
264264
<data name="TarEntryFieldExceedsMaxLength" xml:space="preserve">
265265
<value>The field '{0}' exceeds the maximum allowed length for this format.</value>
266266
</data>
267+
<data name="TarSizeFieldTooLargeForEntryFormat" xml:space="preserve">
268+
<value>The value of the size field for the current entry of format '{0}' is greater than the format allows.</value>
269+
</data>
267270
</root>

src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,8 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca
376376
return null;
377377
}
378378

379-
long size = (int)TarHelpers.ParseOctal<uint>(buffer.Slice(FieldLocations.Size, FieldLengths.Size));
379+
long size = (long)TarHelpers.ParseOctal<ulong>(buffer.Slice(FieldLocations.Size, FieldLengths.Size));
380+
Debug.Assert(size <= TarHelpers.MaxSizeLength, "size exceeded the max value possible with 11 octal digits. Actual size " + size);
380381
if (size < 0)
381382
{
382383
throw new InvalidDataException(string.Format(SR.TarSizeFieldNegative));

src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs

Lines changed: 53 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,13 @@ internal sealed partial class TarHeader
2828
// Writes the current header as a V7 entry into the archive stream.
2929
internal void WriteAsV7(Stream archiveStream, Span<byte> buffer)
3030
{
31-
long actualLength = WriteV7FieldsToBuffer(buffer);
31+
WriteV7FieldsToBuffer(buffer);
3232

3333
archiveStream.Write(buffer);
3434

3535
if (_dataStream != null)
3636
{
37-
WriteData(archiveStream, _dataStream, actualLength);
37+
WriteData(archiveStream, _dataStream, _size);
3838
}
3939
}
4040

@@ -43,39 +43,37 @@ internal async Task WriteAsV7Async(Stream archiveStream, Memory<byte> buffer, Ca
4343
{
4444
cancellationToken.ThrowIfCancellationRequested();
4545

46-
long actualLength = WriteV7FieldsToBuffer(buffer.Span);
46+
WriteV7FieldsToBuffer(buffer.Span);
4747

4848
await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false);
4949

5050
if (_dataStream != null)
5151
{
52-
await WriteDataAsync(archiveStream, _dataStream, actualLength, cancellationToken).ConfigureAwait(false);
52+
await WriteDataAsync(archiveStream, _dataStream, _size, cancellationToken).ConfigureAwait(false);
5353
}
5454
}
5555

5656
// Writes the V7 header fields to the specified buffer, calculates and writes the checksum, then returns the final data length.
57-
private long WriteV7FieldsToBuffer(Span<byte> buffer)
57+
private void WriteV7FieldsToBuffer(Span<byte> buffer)
5858
{
59-
long actualLength = GetTotalDataBytesToWrite();
59+
_size = GetTotalDataBytesToWrite();
6060
TarEntryType actualEntryType = TarHelpers.GetCorrectTypeFlagForFormat(TarEntryFormat.V7, _typeFlag);
6161

6262
int tmpChecksum = WriteName(buffer);
63-
tmpChecksum += WriteCommonFields(buffer, actualLength, actualEntryType);
63+
tmpChecksum += WriteCommonFields(buffer, actualEntryType);
6464
_checksum = WriteChecksum(tmpChecksum, buffer);
65-
66-
return actualLength;
6765
}
6866

6967
// Writes the current header as a Ustar entry into the archive stream.
7068
internal void WriteAsUstar(Stream archiveStream, Span<byte> buffer)
7169
{
72-
long actualLength = WriteUstarFieldsToBuffer(buffer);
70+
WriteUstarFieldsToBuffer(buffer);
7371

7472
archiveStream.Write(buffer);
7573

7674
if (_dataStream != null)
7775
{
78-
WriteData(archiveStream, _dataStream, actualLength);
76+
WriteData(archiveStream, _dataStream, _size);
7977
}
8078
}
8179

@@ -84,29 +82,27 @@ internal async Task WriteAsUstarAsync(Stream archiveStream, Memory<byte> buffer,
8482
{
8583
cancellationToken.ThrowIfCancellationRequested();
8684

87-
long actualLength = WriteUstarFieldsToBuffer(buffer.Span);
85+
WriteUstarFieldsToBuffer(buffer.Span);
8886

8987
await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false);
9088

9189
if (_dataStream != null)
9290
{
93-
await WriteDataAsync(archiveStream, _dataStream, actualLength, cancellationToken).ConfigureAwait(false);
91+
await WriteDataAsync(archiveStream, _dataStream, _size, cancellationToken).ConfigureAwait(false);
9492
}
9593
}
9694

9795
// Writes the Ustar header fields to the specified buffer, calculates and writes the checksum, then returns the final data length.
98-
private long WriteUstarFieldsToBuffer(Span<byte> buffer)
96+
private void WriteUstarFieldsToBuffer(Span<byte> buffer)
9997
{
100-
long actualLength = GetTotalDataBytesToWrite();
98+
_size = GetTotalDataBytesToWrite();
10199
TarEntryType actualEntryType = TarHelpers.GetCorrectTypeFlagForFormat(TarEntryFormat.Ustar, _typeFlag);
102100

103101
int tmpChecksum = WriteUstarName(buffer);
104-
tmpChecksum += WriteCommonFields(buffer, actualLength, actualEntryType);
102+
tmpChecksum += WriteCommonFields(buffer, actualEntryType);
105103
tmpChecksum += WritePosixMagicAndVersion(buffer);
106104
tmpChecksum += WritePosixAndGnuSharedFields(buffer);
107105
_checksum = WriteChecksum(tmpChecksum, buffer);
108-
109-
return actualLength;
110106
}
111107

112108
// Writes the current header as a PAX Global Extended Attributes entry into the archive stream.
@@ -144,6 +140,7 @@ internal void WriteAsPax(Stream archiveStream, Span<byte> buffer)
144140
// First, we write the preceding extended attributes header
145141
TarHeader extendedAttributesHeader = new(TarEntryFormat.Pax);
146142
// Fill the current header's dict
143+
_size = GetTotalDataBytesToWrite();
147144
CollectExtendedAttributesFromStandardFieldsIfNeeded();
148145
// And pass the attributes to the preceding extended attributes header for writing
149146
extendedAttributesHeader.WriteAsPaxExtendedAttributes(archiveStream, buffer, ExtendedAttributes, isGea: false, globalExtendedAttributesEntryNumber: -1);
@@ -157,12 +154,12 @@ internal void WriteAsPax(Stream archiveStream, Span<byte> buffer)
157154
internal async Task WriteAsPaxAsync(Stream archiveStream, Memory<byte> buffer, CancellationToken cancellationToken)
158155
{
159156
Debug.Assert(_typeFlag is not TarEntryType.GlobalExtendedAttributes);
160-
161157
cancellationToken.ThrowIfCancellationRequested();
162158

163159
// First, we write the preceding extended attributes header
164160
TarHeader extendedAttributesHeader = new(TarEntryFormat.Pax);
165161
// Fill the current header's dict
162+
_size = GetTotalDataBytesToWrite();
166163
CollectExtendedAttributesFromStandardFieldsIfNeeded();
167164
// And pass the attributes to the preceding extended attributes header for writing
168165
await extendedAttributesHeader.WriteAsPaxExtendedAttributesAsync(archiveStream, buffer, ExtendedAttributes, isGea: false, globalExtendedAttributesEntryNumber: -1, cancellationToken).ConfigureAwait(false);
@@ -243,13 +240,13 @@ private static TarHeader GetGnuLongMetadataHeader(TarEntryType entryType, string
243240
// Writes the current header as a GNU entry into the archive stream.
244241
internal void WriteAsGnuInternal(Stream archiveStream, Span<byte> buffer)
245242
{
246-
WriteAsGnuSharedInternal(buffer, out long actualLength);
243+
WriteAsGnuSharedInternal(buffer);
247244

248245
archiveStream.Write(buffer);
249246

250247
if (_dataStream != null)
251248
{
252-
WriteData(archiveStream, _dataStream, actualLength);
249+
WriteData(archiveStream, _dataStream, _size);
253250
}
254251
}
255252

@@ -258,23 +255,23 @@ internal async Task WriteAsGnuInternalAsync(Stream archiveStream, Memory<byte> b
258255
{
259256
cancellationToken.ThrowIfCancellationRequested();
260257

261-
WriteAsGnuSharedInternal(buffer.Span, out long actualLength);
258+
WriteAsGnuSharedInternal(buffer.Span);
262259

263260
await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false);
264261

265262
if (_dataStream != null)
266263
{
267-
await WriteDataAsync(archiveStream, _dataStream, actualLength, cancellationToken).ConfigureAwait(false);
264+
await WriteDataAsync(archiveStream, _dataStream, _size, cancellationToken).ConfigureAwait(false);
268265
}
269266
}
270267

271268
// Shared checksum and data length calculations for GNU entry writing.
272-
private void WriteAsGnuSharedInternal(Span<byte> buffer, out long actualLength)
269+
private void WriteAsGnuSharedInternal(Span<byte> buffer)
273270
{
274-
actualLength = GetTotalDataBytesToWrite();
271+
_size = GetTotalDataBytesToWrite();
275272

276273
int tmpChecksum = WriteName(buffer);
277-
tmpChecksum += WriteCommonFields(buffer, actualLength, TarHelpers.GetCorrectTypeFlagForFormat(TarEntryFormat.Gnu, _typeFlag));
274+
tmpChecksum += WriteCommonFields(buffer, TarHelpers.GetCorrectTypeFlagForFormat(TarEntryFormat.Gnu, _typeFlag));
278275
tmpChecksum += WriteGnuMagicAndVersion(buffer);
279276
tmpChecksum += WritePosixAndGnuSharedFields(buffer);
280277
tmpChecksum += WriteGnuFields(buffer);
@@ -285,45 +282,44 @@ private void WriteAsGnuSharedInternal(Span<byte> buffer, out long actualLength)
285282
// Writes the current header as a PAX Extended Attributes entry into the archive stream.
286283
private void WriteAsPaxExtendedAttributes(Stream archiveStream, Span<byte> buffer, Dictionary<string, string> extendedAttributes, bool isGea, int globalExtendedAttributesEntryNumber)
287284
{
288-
WriteAsPaxExtendedAttributesShared(isGea, globalExtendedAttributesEntryNumber);
289-
_dataStream = GenerateExtendedAttributesDataStream(extendedAttributes);
285+
WriteAsPaxExtendedAttributesShared(isGea, globalExtendedAttributesEntryNumber, extendedAttributes);
290286
WriteAsPaxInternal(archiveStream, buffer);
291287
}
292288

293289
// Asynchronously writes the current header as a PAX Extended Attributes entry into the archive stream and returns the value of the final checksum.
294290
private Task WriteAsPaxExtendedAttributesAsync(Stream archiveStream, Memory<byte> buffer, Dictionary<string, string> extendedAttributes, bool isGea, int globalExtendedAttributesEntryNumber, CancellationToken cancellationToken)
295291
{
296292
cancellationToken.ThrowIfCancellationRequested();
297-
298-
WriteAsPaxExtendedAttributesShared(isGea, globalExtendedAttributesEntryNumber);
299-
_dataStream = GenerateExtendedAttributesDataStream(extendedAttributes);
293+
WriteAsPaxExtendedAttributesShared(isGea, globalExtendedAttributesEntryNumber, extendedAttributes);
300294
return WriteAsPaxInternalAsync(archiveStream, buffer, cancellationToken);
301295
}
302296

303297
// Initializes the name, mode and type flag of a PAX extended attributes entry.
304-
private void WriteAsPaxExtendedAttributesShared(bool isGea, int globalExtendedAttributesEntryNumber)
298+
private void WriteAsPaxExtendedAttributesShared(bool isGea, int globalExtendedAttributesEntryNumber, Dictionary<string, string> extendedAttributes)
305299
{
306300
Debug.Assert(isGea && globalExtendedAttributesEntryNumber >= 0 || !isGea && globalExtendedAttributesEntryNumber < 0);
307301

302+
_dataStream = GenerateExtendedAttributesDataStream(extendedAttributes);
308303
_name = isGea ?
309304
GenerateGlobalExtendedAttributeName(globalExtendedAttributesEntryNumber) :
310305
GenerateExtendedAttributeName();
311306

312307
_mode = TarHelpers.GetDefaultMode(_typeFlag);
308+
_size = GetTotalDataBytesToWrite();
313309
_typeFlag = isGea ? TarEntryType.GlobalExtendedAttributes : TarEntryType.ExtendedAttributes;
314310
}
315311

316312
// Both the Extended Attributes and Global Extended Attributes entry headers are written in a similar way, just the data changes
317313
// This method writes an entry as both entries require, using the data from the current header instance.
318314
private void WriteAsPaxInternal(Stream archiveStream, Span<byte> buffer)
319315
{
320-
WriteAsPaxSharedInternal(buffer, out long actualLength);
316+
WriteAsPaxSharedInternal(buffer);
321317

322318
archiveStream.Write(buffer);
323319

324320
if (_dataStream != null)
325321
{
326-
WriteData(archiveStream, _dataStream, actualLength);
322+
WriteData(archiveStream, _dataStream, _size);
327323
}
328324
}
329325

@@ -333,23 +329,21 @@ private async Task WriteAsPaxInternalAsync(Stream archiveStream, Memory<byte> bu
333329
{
334330
cancellationToken.ThrowIfCancellationRequested();
335331

336-
WriteAsPaxSharedInternal(buffer.Span, out long actualLength);
332+
WriteAsPaxSharedInternal(buffer.Span);
337333

338334
await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false);
339335

340336
if (_dataStream != null)
341337
{
342-
await WriteDataAsync(archiveStream, _dataStream, actualLength, cancellationToken).ConfigureAwait(false);
338+
await WriteDataAsync(archiveStream, _dataStream, _size, cancellationToken).ConfigureAwait(false);
343339
}
344340
}
345341

346342
// Shared checksum and data length calculations for PAX entry writing.
347-
private void WriteAsPaxSharedInternal(Span<byte> buffer, out long actualLength)
343+
private void WriteAsPaxSharedInternal(Span<byte> buffer)
348344
{
349-
actualLength = GetTotalDataBytesToWrite();
350-
351345
int tmpChecksum = WriteName(buffer);
352-
tmpChecksum += WriteCommonFields(buffer, actualLength, TarHelpers.GetCorrectTypeFlagForFormat(TarEntryFormat.Pax, _typeFlag));
346+
tmpChecksum += WriteCommonFields(buffer, TarHelpers.GetCorrectTypeFlagForFormat(TarEntryFormat.Pax, _typeFlag));
353347
tmpChecksum += WritePosixMagicAndVersion(buffer);
354348
tmpChecksum += WritePosixAndGnuSharedFields(buffer);
355349

@@ -446,7 +440,7 @@ private int WriteUstarName(Span<byte> buffer)
446440
}
447441

448442
// Writes all the common fields shared by all formats into the specified spans.
449-
private int WriteCommonFields(Span<byte> buffer, long actualLength, TarEntryType actualEntryType)
443+
private int WriteCommonFields(Span<byte> buffer, TarEntryType actualEntryType)
450444
{
451445
// Don't write an empty LinkName if the entry is a hardlink or symlink
452446
Debug.Assert(!string.IsNullOrEmpty(_linkName) ^ (_typeFlag is not TarEntryType.SymbolicLink and not TarEntryType.HardLink));
@@ -468,11 +462,21 @@ private int WriteCommonFields(Span<byte> buffer, long actualLength, TarEntryType
468462
checksum += FormatOctal(_gid, buffer.Slice(FieldLocations.Gid, FieldLengths.Gid));
469463
}
470464

471-
_size = actualLength;
472-
473465
if (_size > 0)
474466
{
475-
checksum += FormatOctal(_size, buffer.Slice(FieldLocations.Size, FieldLengths.Size));
467+
if (_size <= TarHelpers.MaxSizeLength)
468+
{
469+
checksum += FormatOctal(_size, buffer.Slice(FieldLocations.Size, FieldLengths.Size));
470+
}
471+
else if (_format is not TarEntryFormat.Pax)
472+
{
473+
throw new ArgumentException(SR.Format(SR.TarSizeFieldTooLargeForEntryFormat, _format));
474+
}
475+
else
476+
{
477+
Debug.Assert(_typeFlag is not TarEntryType.ExtendedAttributes and not TarEntryType.GlobalExtendedAttributes);
478+
Debug.Assert(Convert.ToInt64(ExtendedAttributes[PaxEaSize]) > TarHelpers.MaxSizeLength);
479+
}
476480
}
477481

478482
checksum += WriteAsTimestamp(_mTime, buffer.Slice(FieldLocations.MTime, FieldLengths.MTime));
@@ -732,10 +736,14 @@ private void CollectExtendedAttributesFromStandardFieldsIfNeeded()
732736
ExtendedAttributes[PaxEaLinkName] = _linkName;
733737
}
734738

735-
if (_size > 99_999_999)
739+
if (_size > TarHelpers.MaxSizeLength)
736740
{
737741
ExtendedAttributes[PaxEaSize] = _size.ToString();
738742
}
743+
else
744+
{
745+
ExtendedAttributes.Remove(PaxEaSize);
746+
}
739747

740748
// Sets the specified string to the dictionary if it's longer than the specified max byte length; otherwise, remove it.
741749
static void TryAddStringField(Dictionary<string, string> extendedAttributes, string key, string? value, int maxLength)

src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ internal static partial class TarHelpers
1919
{
2020
internal const short RecordSize = 512;
2121
internal const int MaxBufferLength = 4096;
22+
internal const long MaxSizeLength = (1L << 33) - 1; // Max value of 11 octal digits = 2^33 - 1 or 8 Gb.
2223

2324
// Default mode for TarEntry created for a file-type.
2425
private const UnixFileMode DefaultFileMode =

0 commit comments

Comments
 (0)