-
Notifications
You must be signed in to change notification settings - Fork 276
Tool for extracting UtilityVM files from a container layer into a CIM #2423
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,47 @@ | ||
| //go:build windows | ||
| // +build windows | ||
|
|
||
| package main | ||
|
|
||
| import ( | ||
| "context" | ||
| "flag" | ||
| "fmt" | ||
| "os" | ||
| "time" | ||
|
|
||
| "github.com/Microsoft/hcsshim/pkg/extractuvm" | ||
| ) | ||
|
|
||
| func run() error { | ||
| var ( | ||
| layerTar string | ||
| destPath string | ||
| ) | ||
|
|
||
| flag.StringVar(&layerTar, "layer", "", "Path to the gzipped layer tar") | ||
| flag.StringVar(&destPath, "dest", "", "Path to the destination directory") | ||
| flag.Parse() | ||
|
|
||
| if layerTar == "" || destPath == "" { | ||
| flag.Usage() | ||
| return fmt.Errorf("both -layer and -dest flags are required") | ||
| } | ||
|
|
||
| // 5 minutes should be more than enough to extract all the files | ||
| ctx, cancelFunc := context.WithTimeout(context.Background(), 5*time.Minute) | ||
| defer cancelFunc() | ||
|
|
||
| _, err := extractuvm.MakeUtilityVMCIMFromTar(ctx, layerTar, destPath) | ||
| if err != nil { | ||
| return fmt.Errorf("failed to create UVM CIM: %w", err) | ||
| } | ||
| return nil | ||
| } | ||
|
|
||
| func main() { | ||
| if err := run(); err != nil { | ||
| fmt.Fprintf(os.Stderr, "%s\n", err) | ||
| os.Exit(1) | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,208 @@ | ||
| //go:build windows | ||
| // +build windows | ||
|
|
||
| package extractuvm | ||
|
|
||
| import ( | ||
| "archive/tar" | ||
| "compress/gzip" | ||
| "context" | ||
| "errors" | ||
| "fmt" | ||
| "io" | ||
| "log/slog" | ||
| "os" | ||
| "path/filepath" | ||
|
|
||
| "github.com/Microsoft/hcsshim/pkg/cimfs" | ||
| ) | ||
|
|
||
| const ( | ||
| LevelTrace = slog.Level(-8) | ||
| ) | ||
|
|
||
| func MakeUtilityVMCIMFromTar(ctx context.Context, tarPath, destPath string) (_ *cimfs.BlockCIM, err error) { | ||
| slog.InfoContext(ctx, "Extracting UtilityVM files from tar", "tarPath", tarPath, "destPath", destPath) | ||
|
|
||
| tarFile, err := os.Open(tarPath) | ||
| if err != nil { | ||
| return nil, fmt.Errorf("failed to open layer tar: %w", err) | ||
| } | ||
| defer tarFile.Close() | ||
|
|
||
| err = os.MkdirAll(destPath, 0755) | ||
| if err != nil { | ||
| return nil, fmt.Errorf("failed to create destination directory: %w", err) | ||
| } | ||
|
|
||
| uvmCIM := &cimfs.BlockCIM{ | ||
| Type: cimfs.BlockCIMTypeSingleFile, | ||
| BlockPath: filepath.Join(destPath, "boot.bcim"), | ||
| CimName: "boot.cim", | ||
| } | ||
|
|
||
| w, err := newUVMCIMWriter(uvmCIM) | ||
| if err != nil { | ||
| return nil, fmt.Errorf("failed to create block CIM writer: %w", err) | ||
| } | ||
| defer func() { | ||
| cErr := w.Close(ctx) | ||
| if err == nil { | ||
| err = cErr | ||
| } | ||
| }() | ||
|
|
||
| if err = extractUtilityVMFilesFromTar(ctx, tarFile, w); err != nil { | ||
| return nil, fmt.Errorf("failed to extract UVM layer: %w", err) | ||
| } | ||
| return uvmCIM, nil | ||
| } | ||
|
|
||
| // extractUtilityVMFilesFromTar writes all the files in the tar under the | ||
| // `UitilityVM/Files` directory into the CIM. For windows container image layer tarballs, | ||
| // there is complex web of hardlinks between the files under `Files` & | ||
| // `UtilityVM/Files`. To correctly handle this when extracting UtilityVM files this code | ||
| // makes following assumptions based on the way windows layer tarballs are currently | ||
| // structured: | ||
| // 1. If tar iteration comes across a file of type `TypeLink`, the target that this link points to MUST have already been iterated over. | ||
| // 2. When iterating over the tarball, `Files` directory tree always comes before the `UtilityVM/Files` directory. | ||
| // 3. There are hardlinks under `UtilityVM/Files` that point to files under `Files` but not vice versa. | ||
| // 4. Since this routine is supposed to be used on a base layer tarball, it doesn't expect any whiteout files in the tarball. | ||
| // 5. Files of type `TypeSymlink` are not generally used windows base layers and so the code errors out if it sees such files. Same is the case for files with alternate data streams. | ||
| // 6. There are no directories under `UtilityVM/Files` that are hardlinks to directories under `Files`. Only files can be hardlinks. | ||
| func extractUtilityVMFilesFromTar(ctx context.Context, tarFile *os.File, w *uvmCIMWriter) error { | ||
| gr, err := gzip.NewReader(tarFile) | ||
ambarve marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if err != nil { | ||
| return fmt.Errorf("failed to get gzip reader: %w", err) | ||
| } | ||
| defer gr.Close() | ||
|
|
||
| tr := tar.NewReader(gr) | ||
|
|
||
| // General approach: | ||
| // Iterate over each file in the tar one by one. If we see a file that is under | ||
| // `UtilityVM/Files`: If it is a standard file or a directory add it to the CIM | ||
| // directly. | ||
| // If it is a link, check the target of the link: | ||
| // - If the target is not under `UtilityVM/Files`, save it so that we can copy the | ||
| // target file under at the path of this link. | ||
| // - If the target is also under UtilityVM/Files save it for later addition (we | ||
| // can't add the link yet because the target itself could be another link, so we | ||
| // need to wait until all link targets are resolved and added to the CIM). | ||
|
|
||
| linksToAdd := []*pendingLink{} | ||
| // a map of all the files seen in the tar - this is used to resolve nested links | ||
| tarContents := make(map[string]*tar.Header) | ||
| // a map of all files that we need to copy inside the UtilityVM directory from the | ||
| // outside because there are hardlinks to it inside the UtilityVM directory. | ||
| // There could be multiple links under UtilityVM/Files that end up directly or | ||
| // indirectly pointing to the same target. So we may have to copy the same file at | ||
| // multiple locations. | ||
| // TODO (ambarve): avoid these multiple copies by only copying once and then | ||
| // adding hardlinks. | ||
| copies := make(map[string][]string) | ||
|
|
||
| for { | ||
| hdr, err := tr.Next() | ||
| if err != nil { | ||
| if errors.Is(err, io.EOF) { | ||
| break | ||
| } | ||
| return fmt.Errorf("tar read failed: %w", err) | ||
| } | ||
|
|
||
| if err = validateFileType(hdr); err != nil { | ||
| return err | ||
| } | ||
|
|
||
| tarContents[hdr.Name] = hdr | ||
|
|
||
| if !hasUtilityVMFilesPrefix(hdr.Name) { | ||
| continue | ||
| } | ||
|
|
||
| // At this point we either have a standard file or a link file that is | ||
| // under the UtilityVM\Files directory. | ||
| if hdr.Typeflag == tar.TypeLink { | ||
| if !hasUtilityVMFilesPrefix(hdr.Linkname) { | ||
| // link points to a file outside the UtilityVM\Files | ||
| // directory we need to copy this file, but first resolve | ||
| // the link | ||
| resolvedTarget, err := resolveLink(tarContents, hdr.Linkname) | ||
| if err != nil { | ||
| return fmt.Errorf("failed to resolve link's [%s] target [%s]: %w", hdr.Name, hdr.Linkname, err) | ||
| } | ||
| copies[resolvedTarget] = append(copies[resolvedTarget], hdr.Name) | ||
| slog.Log(ctx, LevelTrace, "adding to list of pending copies", "src", resolvedTarget, "dest", hdr.Name) | ||
| } else { | ||
| linksToAdd = append(linksToAdd, &pendingLink{ | ||
| name: hdr.Name, | ||
| target: hdr.Linkname, | ||
| }) | ||
| slog.Log(ctx, LevelTrace, "adding to list of pending links", "link", hdr.Name, "target", hdr.Linkname) | ||
| } | ||
| continue | ||
| } | ||
| if err = w.Add(hdr, tr, false); err != nil { | ||
| return fmt.Errorf("failed add UtilityVM standard file [%s]: %w", hdr.Name, err) | ||
| } | ||
| slog.Log(ctx, LevelTrace, "added standard file", "path", hdr.Name) | ||
| } | ||
| // close the current gzip reader before making a new one | ||
| if err = gr.Close(); err != nil { | ||
| return fmt.Errorf("failed to close gzip reader after first iteration: %w", err) | ||
| } | ||
|
|
||
| // reiterate tar and add copies | ||
| if _, err = tarFile.Seek(0, 0); err != nil { | ||
| return fmt.Errorf("failed to reset file offset: %w", err) | ||
| } | ||
|
|
||
| gr, err = gzip.NewReader(tarFile) | ||
| if err != nil { | ||
| return fmt.Errorf("failed to get gzip reader: %w", err) | ||
| } | ||
| defer gr.Close() | ||
| tr = tar.NewReader(gr) | ||
|
|
||
| for { | ||
| hdr, err := tr.Next() | ||
| if err != nil { | ||
| if errors.Is(err, io.EOF) { | ||
| break | ||
| } | ||
| return fmt.Errorf("tar read failed: %w", err) | ||
| } | ||
|
|
||
| dsts, ok := copies[hdr.Name] | ||
| if !ok { | ||
| continue | ||
| } | ||
|
|
||
| if !hasFilesPrefix(hdr.Name) { | ||
| return fmt.Errorf("copy src file doesn't have expected prefix: [%s]", hdr.Name) | ||
| } | ||
|
|
||
| for i, dst := range dsts { | ||
| if i == 0 { | ||
| dHdr := *hdr | ||
| dHdr.Name = dst | ||
| // copy the first one, next will be links to the first | ||
| if err = w.Add(&dHdr, tr, true); err != nil { | ||
| return fmt.Errorf("failed to copy resolved link target [%s]: %w", hdr.Name, err) | ||
| } | ||
| } else { | ||
| if err = w.AddLink(dst, dsts[0], true); err != nil { | ||
| return fmt.Errorf("failed to add links to copied file [%s]: %w", dst, err) | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| for _, pl := range linksToAdd { | ||
| if err = w.AddLink(pl.name, pl.target, false); err != nil { | ||
| return fmt.Errorf("failed to add link from [%s] to [%s]: %w", pl.name, pl.target, err) | ||
| } | ||
| } | ||
| return nil | ||
| } | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.