Skip to content

Commit

Permalink
Merge pull request #2487 from mtrmac/chunked-bic2
Browse files Browse the repository at this point in the history
Record zstd:chunked format, and annotations, in BlobInfoCache
  • Loading branch information
rhatdan authored Aug 7, 2024
2 parents 55a2d1e + 3d38dae commit 8c7c58c
Show file tree
Hide file tree
Showing 15 changed files with 551 additions and 209 deletions.
120 changes: 69 additions & 51 deletions copy/compression.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,16 @@ func blobPipelineDetectCompressionStep(stream *sourceStream, srcInfo types.BlobI
}
stream.reader = reader

if decompressor != nil && format.Name() == compressiontypes.ZstdAlgorithmName {
tocDigest, err := chunkedToc.GetTOCDigest(srcInfo.Annotations)
if err != nil {
return bpDetectCompressionStepData{}, err
}
if tocDigest != nil {
format = compression.ZstdChunked
}

}
res := bpDetectCompressionStepData{
isCompressed: decompressor != nil,
format: format,
Expand All @@ -71,13 +81,14 @@ func blobPipelineDetectCompressionStep(stream *sourceStream, srcInfo types.BlobI

// bpCompressionStepData contains data that the copy pipeline needs about the compression step.
type bpCompressionStepData struct {
operation bpcOperation // What we are actually doing
uploadedOperation types.LayerCompression // Operation to use for updating the blob metadata (matching the end state, not necessarily what we do)
uploadedAlgorithm *compressiontypes.Algorithm // An algorithm parameter for the compressionOperation edits.
uploadedAnnotations map[string]string // Compression-related annotations that should be set on the uploaded blob. WARNING: This is only set after the srcStream.reader is fully consumed.
srcCompressorBaseVariantName string // Compressor base variant name to record in the blob info cache for the source blob.
uploadedCompressorName string // Compressor name to record in the blob info cache for the uploaded blob.
closers []io.Closer // Objects to close after the upload is done, if any.
operation bpcOperation // What we are actually doing
uploadedOperation types.LayerCompression // Operation to use for updating the blob metadata (matching the end state, not necessarily what we do)
uploadedAlgorithm *compressiontypes.Algorithm // An algorithm parameter for the compressionOperation edits.
uploadedAnnotations map[string]string // Compression-related annotations that should be set on the uploaded blob. WARNING: This is only set after the srcStream.reader is fully consumed.
srcCompressorBaseVariantName string // Compressor base variant name to record in the blob info cache for the source blob.
uploadedCompressorBaseVariantName string // Compressor base variant name to record in the blob info cache for the uploaded blob.
uploadedCompressorSpecificVariantName string // Compressor specific variant name to record in the blob info cache for the uploaded blob.
closers []io.Closer // Objects to close after the upload is done, if any.
}

type bpcOperation int
Expand Down Expand Up @@ -129,11 +140,12 @@ func (ic *imageCopier) bpcPreserveEncrypted(stream *sourceStream, _ bpDetectComp
// We can’t do anything with an encrypted blob unless decrypted.
logrus.Debugf("Using original blob without modification for encrypted blob")
return &bpCompressionStepData{
operation: bpcOpPreserveOpaque,
uploadedOperation: types.PreserveOriginal,
uploadedAlgorithm: nil,
srcCompressorBaseVariantName: internalblobinfocache.UnknownCompression,
uploadedCompressorName: internalblobinfocache.UnknownCompression,
operation: bpcOpPreserveOpaque,
uploadedOperation: types.PreserveOriginal,
uploadedAlgorithm: nil,
srcCompressorBaseVariantName: internalblobinfocache.UnknownCompression,
uploadedCompressorBaseVariantName: internalblobinfocache.UnknownCompression,
uploadedCompressorSpecificVariantName: internalblobinfocache.UnknownCompression,
}, nil
}
return nil, nil
Expand All @@ -157,14 +169,19 @@ func (ic *imageCopier) bpcCompressUncompressed(stream *sourceStream, detected bp
Digest: "",
Size: -1,
}
specificVariantName := uploadedAlgorithm.Name()
if specificVariantName == uploadedAlgorithm.BaseVariantName() {
specificVariantName = internalblobinfocache.UnknownCompression
}
return &bpCompressionStepData{
operation: bpcOpCompressUncompressed,
uploadedOperation: types.Compress,
uploadedAlgorithm: uploadedAlgorithm,
uploadedAnnotations: annotations,
srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName,
uploadedCompressorName: uploadedAlgorithm.Name(),
closers: []io.Closer{reader},
operation: bpcOpCompressUncompressed,
uploadedOperation: types.Compress,
uploadedAlgorithm: uploadedAlgorithm,
uploadedAnnotations: annotations,
srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName,
uploadedCompressorBaseVariantName: uploadedAlgorithm.BaseVariantName(),
uploadedCompressorSpecificVariantName: specificVariantName,
closers: []io.Closer{reader},
}, nil
}
return nil, nil
Expand Down Expand Up @@ -197,15 +214,20 @@ func (ic *imageCopier) bpcRecompressCompressed(stream *sourceStream, detected bp
Digest: "",
Size: -1,
}
specificVariantName := ic.compressionFormat.Name()
if specificVariantName == ic.compressionFormat.BaseVariantName() {
specificVariantName = internalblobinfocache.UnknownCompression
}
succeeded = true
return &bpCompressionStepData{
operation: bpcOpRecompressCompressed,
uploadedOperation: types.PreserveOriginal,
uploadedAlgorithm: ic.compressionFormat,
uploadedAnnotations: annotations,
srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName,
uploadedCompressorName: ic.compressionFormat.Name(),
closers: []io.Closer{decompressed, recompressed},
operation: bpcOpRecompressCompressed,
uploadedOperation: types.PreserveOriginal,
uploadedAlgorithm: ic.compressionFormat,
uploadedAnnotations: annotations,
srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName,
uploadedCompressorBaseVariantName: ic.compressionFormat.BaseVariantName(),
uploadedCompressorSpecificVariantName: specificVariantName,
closers: []io.Closer{decompressed, recompressed},
}, nil
}
return nil, nil
Expand All @@ -226,12 +248,13 @@ func (ic *imageCopier) bpcDecompressCompressed(stream *sourceStream, detected bp
Size: -1,
}
return &bpCompressionStepData{
operation: bpcOpDecompressCompressed,
uploadedOperation: types.Decompress,
uploadedAlgorithm: nil,
srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName,
uploadedCompressorName: internalblobinfocache.Uncompressed,
closers: []io.Closer{s},
operation: bpcOpDecompressCompressed,
uploadedOperation: types.Decompress,
uploadedAlgorithm: nil,
srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName,
uploadedCompressorBaseVariantName: internalblobinfocache.Uncompressed,
uploadedCompressorSpecificVariantName: internalblobinfocache.UnknownCompression,
closers: []io.Closer{s},
}, nil
}
return nil, nil
Expand Down Expand Up @@ -276,7 +299,8 @@ func (ic *imageCopier) bpcPreserveOriginal(_ *sourceStream, detected bpDetectCom
// We only record the base variant of the format on upload; we didn’t do anything with
// the TOC, we don’t know whether it matches the blob digest, so we don’t want to trigger
// reuse of any kind between the blob digest and the TOC digest.
uploadedCompressorName: detected.srcCompressorBaseVariantName,
uploadedCompressorBaseVariantName: detected.srcCompressorBaseVariantName,
uploadedCompressorSpecificVariantName: internalblobinfocache.UnknownCompression,
}
}

Expand Down Expand Up @@ -336,32 +360,26 @@ func (d *bpCompressionStepData) recordValidatedDigestData(c *copier, uploadedInf
return fmt.Errorf("Internal error: Unexpected d.operation value %#v", d.operation)
}
}
if d.srcCompressorBaseVariantName == "" || d.uploadedCompressorName == "" {
return fmt.Errorf("internal error: missing compressor names (src base: %q, uploaded: %q)",
d.srcCompressorBaseVariantName, d.uploadedCompressorName)
if d.srcCompressorBaseVariantName == "" || d.uploadedCompressorBaseVariantName == "" || d.uploadedCompressorSpecificVariantName == "" {
return fmt.Errorf("internal error: missing compressor names (src base: %q, uploaded base: %q, uploaded specific: %q)",
d.srcCompressorBaseVariantName, d.uploadedCompressorBaseVariantName, d.uploadedCompressorSpecificVariantName)
}
if d.uploadedCompressorName != internalblobinfocache.UnknownCompression {
if d.uploadedCompressorName != compressiontypes.ZstdChunkedAlgorithmName {
// HACK: Don’t record zstd:chunked algorithms.
// There is already a similar hack in internal/imagedestination/impl/helpers.CandidateMatchesTryReusingBlobOptions,
// and that one prevents reusing zstd:chunked blobs, so recording the algorithm here would be mostly harmless.
//
// We skip that here anyway to work around the inability of blobPipelineDetectCompressionStep to differentiate
// between zstd and zstd:chunked; so we could, in varying situations over time, call RecordDigestCompressorName
// with the same digest and both ZstdAlgorithmName and ZstdChunkedAlgorithmName , which causes warnings about
// inconsistent data to be logged.
c.blobInfoCache.RecordDigestCompressorData(uploadedInfo.Digest, internalblobinfocache.DigestCompressorData{
BaseVariantCompressor: d.uploadedCompressorName,
})
}
if d.uploadedCompressorBaseVariantName != internalblobinfocache.UnknownCompression {
c.blobInfoCache.RecordDigestCompressorData(uploadedInfo.Digest, internalblobinfocache.DigestCompressorData{
BaseVariantCompressor: d.uploadedCompressorBaseVariantName,
SpecificVariantCompressor: d.uploadedCompressorSpecificVariantName,
SpecificVariantAnnotations: d.uploadedAnnotations,
})
}
if srcInfo.Digest != "" && srcInfo.Digest != uploadedInfo.Digest &&
d.srcCompressorBaseVariantName != internalblobinfocache.UnknownCompression {
// If the source is already using some TOC-dependent variant, we either copied the
// blob as is, or perhaps decompressed it; either way we don’t trust the TOC digest,
// so record neither the variant name, nor the TOC digest.
c.blobInfoCache.RecordDigestCompressorData(srcInfo.Digest, internalblobinfocache.DigestCompressorData{
BaseVariantCompressor: d.srcCompressorBaseVariantName,
BaseVariantCompressor: d.srcCompressorBaseVariantName,
SpecificVariantCompressor: internalblobinfocache.UnknownCompression,
SpecificVariantAnnotations: nil,
})
}
return nil
Expand Down
31 changes: 22 additions & 9 deletions copy/single.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"errors"
"fmt"
"io"
"maps"
"reflect"
"slices"
"strings"
Expand Down Expand Up @@ -162,7 +163,7 @@ func (c *copier) copySingleImage(ctx context.Context, unparsedImage *image.Unpar
if format == nil {
format = defaultCompressionFormat
}
if format.Name() == compression.ZstdChunked.Name() {
if format.Name() == compressiontypes.ZstdChunkedAlgorithmName {
if ic.requireCompressionFormatMatch {
return copySingleImageResult{}, errors.New("explicitly requested to combine zstd:chunked with encryption, which is not beneficial; use plain zstd instead")
}
Expand Down Expand Up @@ -888,21 +889,33 @@ func updatedBlobInfoFromReuse(inputInfo types.BlobInfo, reusedBlob private.Reuse
// Handling of compression, encryption, and the related MIME types and the like are all the responsibility
// of the generic code in this package.
res := types.BlobInfo{
Digest: reusedBlob.Digest,
Size: reusedBlob.Size,
URLs: nil, // This _must_ be cleared if Digest changes; clear it in other cases as well, to preserve previous behavior.
Annotations: inputInfo.Annotations, // FIXME: This should remove zstd:chunked annotations (but those annotations being left with incorrect values should not break pulls)
MediaType: inputInfo.MediaType, // Mostly irrelevant, MediaType is updated based on Compression*/CryptoOperation.
Digest: reusedBlob.Digest,
Size: reusedBlob.Size,
URLs: nil, // This _must_ be cleared if Digest changes; clear it in other cases as well, to preserve previous behavior.
// FIXME: This should remove zstd:chunked annotations IF the original was chunked and the new one isn’t
// (but those annotations being left with incorrect values should not break pulls).
Annotations: maps.Clone(inputInfo.Annotations),
MediaType: inputInfo.MediaType, // Mostly irrelevant, MediaType is updated based on Compression*/CryptoOperation.
CompressionOperation: reusedBlob.CompressionOperation,
CompressionAlgorithm: reusedBlob.CompressionAlgorithm,
CryptoOperation: inputInfo.CryptoOperation, // Expected to be unset anyway.
}
// The transport is only expected to fill CompressionOperation and CompressionAlgorithm
// if the blob was substituted; otherwise, fill it in based
// if the blob was substituted; otherwise, it is optional, and if not set, fill it in based
// on what we know from the srcInfos we were given.
if reusedBlob.Digest == inputInfo.Digest {
res.CompressionOperation = inputInfo.CompressionOperation
res.CompressionAlgorithm = inputInfo.CompressionAlgorithm
if res.CompressionOperation == types.PreserveOriginal {
res.CompressionOperation = inputInfo.CompressionOperation
}
if res.CompressionAlgorithm == nil {
res.CompressionAlgorithm = inputInfo.CompressionAlgorithm
}
}
if len(reusedBlob.CompressionAnnotations) != 0 {
if res.Annotations == nil {
res.Annotations = map[string]string{}
}
maps.Copy(res.Annotations, reusedBlob.CompressionAnnotations)
}
return res
}
Expand Down
30 changes: 25 additions & 5 deletions copy/single_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,22 +55,42 @@ func TestUpdatedBlobInfoFromReuse(t *testing.T) {
},
{ // Reuse with substitution
reused: private.ReusedBlob{
Digest: "sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
Size: 513543640,
CompressionOperation: types.Decompress,
CompressionAlgorithm: nil,
Digest: "sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
Size: 513543640,
CompressionOperation: types.Decompress,
CompressionAlgorithm: nil,
CompressionAnnotations: map[string]string{"decompressed": "value"},
},
expected: types.BlobInfo{
Digest: "sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
Size: 513543640,
URLs: nil,
Annotations: map[string]string{"test-annotation-2": "two"},
Annotations: map[string]string{"test-annotation-2": "two", "decompressed": "value"},
MediaType: imgspecv1.MediaTypeImageLayerGzip,
CompressionOperation: types.Decompress,
CompressionAlgorithm: nil,
// CryptoOperation is set to the zero value
},
},
{ // Reuse turning zstd into zstd:chunked
reused: private.ReusedBlob{
Digest: "sha256:6a5a5368e0c2d3e5909184fa28ddfd56072e7ff3ee9a945876f7eee5896ef5bb",
Size: 51354364,
CompressionOperation: types.Compress,
CompressionAlgorithm: &compression.ZstdChunked,
CompressionAnnotations: map[string]string{"zstd-toc": "value"},
},
expected: types.BlobInfo{
Digest: "sha256:6a5a5368e0c2d3e5909184fa28ddfd56072e7ff3ee9a945876f7eee5896ef5bb",
Size: 51354364,
URLs: nil,
Annotations: map[string]string{"test-annotation-2": "two", "zstd-toc": "value"},
MediaType: imgspecv1.MediaTypeImageLayerGzip,
CompressionOperation: types.Compress,
CompressionAlgorithm: &compression.ZstdChunked,
// CryptoOperation is set to the zero value
},
},
} {
res := updatedBlobInfoFromReuse(srcInfo, c.reused)
assert.Equal(t, c.expected, res, fmt.Sprintf("%#v", c.reused))
Expand Down
22 changes: 17 additions & 5 deletions docker/docker_image_dest.go
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,7 @@ func (d *dockerImageDestination) TryReusingBlobWithOptions(ctx context.Context,
return false, private.ReusedBlob{}, errors.New("Can not check for a blob with unknown digest")
}

originalCandidateKnownToBeMissing := false
if impl.OriginalCandidateMatchesTryReusingBlobOptions(options) {
// First, check whether the blob happens to already exist at the destination.
haveBlob, reusedInfo, err := d.tryReusingExactBlob(ctx, info, options.Cache)
Expand All @@ -341,9 +342,17 @@ func (d *dockerImageDestination) TryReusingBlobWithOptions(ctx context.Context,
if haveBlob {
return true, reusedInfo, nil
}
originalCandidateKnownToBeMissing = true
} else {
logrus.Debugf("Ignoring exact blob match, compression %s does not match required %s or MIME types %#v",
optionalCompressionName(options.OriginalCompression), optionalCompressionName(options.RequiredCompression), options.PossibleManifestFormats)
// We can get here with a blob detected to be zstd when the user wants a zstd:chunked.
// In that case we keep originalCandiateKnownToBeMissing = false, so that if we find
// a BIC entry for this blob, we do use that entry and return a zstd:chunked entry
// with the BIC’s annotations.
// This is not quite correct, it only works if the BIC also contains an acceptable _location_.
// Ideally, we could look up just the compression algorithm/annotations for info.digest,
// and use it even if no location candidate exists and the original dandidate is present.
}

// Then try reusing blobs from other locations.
Expand Down Expand Up @@ -387,7 +396,8 @@ func (d *dockerImageDestination) TryReusingBlobWithOptions(ctx context.Context,
// for it in the current repo.
candidateRepo = reference.TrimNamed(d.ref.ref)
}
if candidateRepo.Name() == d.ref.ref.Name() && candidate.Digest == info.Digest {
if originalCandidateKnownToBeMissing &&
candidateRepo.Name() == d.ref.ref.Name() && candidate.Digest == info.Digest {
logrus.Debug("... Already tried the primary destination")
continue
}
Expand Down Expand Up @@ -427,10 +437,12 @@ func (d *dockerImageDestination) TryReusingBlobWithOptions(ctx context.Context,
options.Cache.RecordKnownLocation(d.ref.Transport(), bicTransportScope(d.ref), candidate.Digest, newBICLocationReference(d.ref))

return true, private.ReusedBlob{
Digest: candidate.Digest,
Size: size,
CompressionOperation: candidate.CompressionOperation,
CompressionAlgorithm: candidate.CompressionAlgorithm}, nil
Digest: candidate.Digest,
Size: size,
CompressionOperation: candidate.CompressionOperation,
CompressionAlgorithm: candidate.CompressionAlgorithm,
CompressionAnnotations: candidate.CompressionAnnotations,
}, nil
}

return false, private.ReusedBlob{}, nil
Expand Down
Loading

0 comments on commit 8c7c58c

Please sign in to comment.