-
Notifications
You must be signed in to change notification settings - Fork 182
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
New RemoveAll function intended to be a replacement for os.RemoveAll usage and be tailored to Windows scenarios. Most importantly, this function works with []uint16 instead of string. This allows it to properly delete Windows filesystem trees that contain files with invalid UTF-16 paths. Signed-off-by: Kevin Parsons <kevpar@microsoft.com>
- Loading branch information
Showing
5 changed files
with
502 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,213 @@ | ||
package fs | ||
|
||
import ( | ||
"errors" | ||
"io/fs" | ||
"unsafe" | ||
|
||
"golang.org/x/sys/windows" | ||
) | ||
|
||
// We want to use findFirstFileExW since it allows us to avoid the shortname lookup. | ||
// Package golang.org/x/sys/windows only defines FindFirstFile, so we define our own here. | ||
// We could use its findNextFileW definition, except that it also has an inaccurate version of Win32finddata[1]. | ||
// Therefore we define our own versions of both, as well as our own findData struct. | ||
// | ||
// [1] This inaccurate version is still usable, as the name and alternate name fields are one character short, | ||
// but the original value is gauranteed to be null terminated, so you just lose the null terminator in the worst case. | ||
// However, this is still annoying to deal with, as you either must find the null in the array, or treat the full array | ||
// as a null terminated string. | ||
|
||
//sys findFirstFileExW(pattern *uint16, infoLevel uint32, data *findData, searchOp uint32, searchFilter unsafe.Pointer, flags uint32) (h windows.Handle, err error) [failretval==windows.InvalidHandle] = kernel32.FindFirstFileExW | ||
//sys findNextFileW(findHandle windows.Handle, data *findData) (err error) = kernel32.FindNextFileW | ||
|
||
const ( | ||
// Part of FINDEX_INFO_LEVELS. | ||
// | ||
// https://learn.microsoft.com/en-us/windows/win32/api/minwinbase/ne-minwinbase-findex_info_levels | ||
findExInfoBasic = 1 | ||
|
||
// Part of FINDEX_SEARCH_OPS. | ||
// | ||
// https://learn.microsoft.com/en-us/windows/win32/api/minwinbase/ne-minwinbase-findex_search_ops | ||
findExSearchNameMatch = 0 | ||
|
||
// Indicates the reparse point is a name surrogate, which means it refers to another filesystem entry. | ||
// This is used for things like symlinks and junction (mount) points. | ||
// | ||
// https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-point-tags#tag-contents | ||
reparseTagNameSurrogate = 0x20000000 | ||
) | ||
|
||
// findData represents Windows's WIN32_FIND_DATAW type. | ||
// The obsolete items at the end of the struct in the docs are not actually present, except on Mac. | ||
// | ||
// https://learn.microsoft.com/en-us/windows/win32/api/minwinbase/ns-minwinbase-win32_find_dataw | ||
type findData struct { | ||
attributes uint32 | ||
creationTime windows.Filetime | ||
accessTime windows.Filetime | ||
writeTime windows.Filetime | ||
fileSizeHigh uint32 | ||
fileSizeLow uint32 | ||
reserved0 uint32 // Holds reparse point tag when file is a reparse point. | ||
reserved1 uint32 | ||
name [windows.MAX_PATH]uint16 | ||
alternateName [14]uint16 | ||
} | ||
|
||
// fileAttributeTagInfo represents Windows's FILE_ATTRIBUTE_TAG_INFO type. | ||
// | ||
// https://learn.microsoft.com/en-us/windows/win32/api/winbase/ns-winbase-file_attribute_tag_infof | ||
type fileAttributeTagInfo struct { | ||
attributes uint32 | ||
tag uint32 | ||
} | ||
|
||
var ( | ||
// Mockable for testing. | ||
removeDirectory = windows.RemoveDirectory | ||
) | ||
|
||
// RemoveAll attempts to be a Windows-specific replacement for os.RemoveAll. | ||
// Specifically, it treats a filesystem path as a []uint16 rather than a string. This allows it to work in some cases | ||
// where a string-based API would not, such as when a filesystem name is not valid UTF-16 (e.g. low surrogates without preceding high surrogates). | ||
// | ||
// RemoveAll handles some dynamic changes to the directory tree while we are deleting it. For instance, | ||
// if a directory is not empty, we delete its children first, and then try again. If new items are added to the | ||
// directory while we do that, we will again recurse and delete them, and keep trying. This matches the behavior of | ||
// os.RemoveAll. | ||
// However, we don't attempt to handle every dynamic case, for instance: | ||
// - If a file has FILE_ATTRIBUTE_READONLY re-set on it after we clear it, we will fail. | ||
// - If an entry is changed from file to directory after we query its attributes, we will fail. | ||
func RemoveAll(path []uint16) error { | ||
attrs, reparseTag, err := getFileInfo(path) | ||
if err == windows.ERROR_FILE_NOT_FOUND { | ||
return nil | ||
} else if err != nil { | ||
return err | ||
} | ||
return removeAll(path, attrs, reparseTag) | ||
} | ||
|
||
// removeAll is the lower-level routine for recursively deleting a file system entry. | ||
// The attributes and reparse point of the top-level item to be deleted must have already been queried and are supplied as an argument. | ||
func removeAll(path []uint16, attrs uint32, reparseTag uint32) error { | ||
if attrs&windows.FILE_ATTRIBUTE_DIRECTORY == 0 { | ||
// File | ||
if attrs&windows.FILE_ATTRIBUTE_READONLY != 0 { | ||
// Read-only flag prevents deletion, so un-set it first. | ||
if err := windows.SetFileAttributes(terminate(path), attrs&^windows.FILE_ATTRIBUTE_READONLY); err == windows.ERROR_FILE_NOT_FOUND { | ||
return nil | ||
} else if err != nil { | ||
return pathErr("SetFileAttributes", path, err) | ||
} | ||
} | ||
if err := windows.DeleteFile(terminate(path)); err == windows.ERROR_FILE_NOT_FOUND { | ||
return nil | ||
} else if err != nil { | ||
return pathErr("DeleteFile", path, err) | ||
} | ||
} else { | ||
// Directory | ||
for { | ||
// First, try to delete the directory. This will only work if it's empty. | ||
// If that fails then enumerate the entries and delete them first. | ||
if err := removeDirectory(terminate(path)); err == nil || err == windows.ERROR_FILE_NOT_FOUND { | ||
return nil | ||
} else if err != windows.ERROR_DIR_NOT_EMPTY || (attrs&windows.FILE_ATTRIBUTE_REPARSE_POINT != 0 && reparseTag&reparseTagNameSurrogate != 0) { | ||
// We either failed for some reason other than the directory not being empty, or because the directory is a name surrogate reparse point. | ||
// We don't want to recurse into a name surrogate (e.g. symlink) because we will end up deleting stuff elsewhere on the system. | ||
// In this case there's nothing else to do for this entry, so just return an error. | ||
return pathErr("RemoveDirectory", path, err) | ||
} | ||
// Wrapping in an anonymous function so that the deferred FindClose call is scoped properly. | ||
// Iterate the directory and remove all children by recursing into removeAll. | ||
if err := func() error { | ||
var fd findData | ||
// findFirstFileExW allows us to avoid the shortname lookup. See comment at top of file for details. | ||
pattern := join(path, []uint16{'*'}) | ||
find, err := findFirstFileExW(terminate(pattern), findExInfoBasic, &fd, findExSearchNameMatch, nil, 0) | ||
if err == windows.ERROR_FILE_NOT_FOUND { | ||
// No children is weird, because we should always have "." and "..". | ||
// If we do hit this, just continue to the next deletion attempt. | ||
return nil | ||
} else if err != nil { | ||
return pathErr("FindFirstFileEx", pattern, err) | ||
} | ||
defer windows.FindClose(find) | ||
for { | ||
var child []uint16 | ||
child, err = truncAtNull(fd.name[:]) | ||
if err != nil { | ||
return err | ||
} | ||
if !equal(child, []uint16{'.'}) && !equal(child, []uint16{'.', '.'}) { // Ignore "." and ".." | ||
if err := removeAll(join(path, child), fd.attributes, fd.reserved0); err != nil { | ||
return err | ||
} | ||
} | ||
err = findNextFileW(find, &fd) | ||
if err == windows.ERROR_NO_MORE_FILES { | ||
break | ||
} else if err != nil { | ||
return pathErr("FindNextFile", path, err) | ||
} | ||
} | ||
return nil | ||
}(); err != nil { | ||
return err | ||
} | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
func getFileInfo(path []uint16) (uint32, uint32, error) { | ||
h, err := windows.CreateFile(terminate(path), 0, 0, nil, windows.OPEN_EXISTING, windows.FILE_OPEN_REPARSE_POINT|windows.FILE_FLAG_BACKUP_SEMANTICS, 0) | ||
if err != nil { | ||
return 0, 0, pathErr("CreateFile", path, err) | ||
} | ||
defer windows.CloseHandle(h) | ||
var ti fileAttributeTagInfo | ||
if err = windows.GetFileInformationByHandleEx(h, windows.FileAttributeTagInfo, (*byte)(unsafe.Pointer(&ti)), uint32(unsafe.Sizeof(ti))); err != nil { | ||
return 0, 0, pathErr("GetFileInformationByHandleEx", path, err) | ||
} | ||
return ti.attributes, ti.tag, nil | ||
} | ||
|
||
func equal(v1, v2 []uint16) bool { | ||
if len(v1) != len(v2) { | ||
return false | ||
} | ||
for i := range v1 { | ||
if v1[i] != v2[i] { | ||
return false | ||
} | ||
} | ||
return true | ||
} | ||
|
||
func join(parent, child []uint16) []uint16 { | ||
return append(append(parent, '\\'), child...) | ||
} | ||
|
||
func pathErr(op string, path []uint16, err error) error { | ||
return &fs.PathError{Op: op, Path: windows.UTF16ToString(path), Err: err} | ||
} | ||
|
||
// terminate takes a []uint16 and returns a null-terminated *uint16. | ||
func terminate(path []uint16) *uint16 { | ||
return &append(path, '\u0000')[0] | ||
} | ||
|
||
// truncAtNull searches the input for a null terminator and returns a slice | ||
// up to that point. It returns an error if there is no null terminator in the input. | ||
func truncAtNull(path []uint16) ([]uint16, error) { | ||
for i, u := range path { | ||
if u == '\u0000' { | ||
return path[:i], nil | ||
} | ||
} | ||
return nil, errors.New("path is not null terminated") | ||
} |
Oops, something went wrong.