Skip to content

Improve Windows path prefix parsing #94887

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions library/std/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@
#![feature(exhaustive_patterns)]
#![feature(intra_doc_pointers)]
#![feature(lang_items)]
#![feature(let_chains)]
#![feature(linkage)]
#![feature(min_specialization)]
#![feature(must_not_suspend)]
Expand Down
42 changes: 20 additions & 22 deletions library/std/src/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,8 @@ pub enum Prefix<'a> {

/// Device namespace prefix, e.g., `\\.\COM42`.
///
/// Device namespace prefixes consist of `\\.\` immediately followed by the
/// device name.
/// Device namespace prefixes consist of `\\.\` (possibly using `/`
/// instead of `\`), immediately followed by the device name.
#[stable(feature = "rust1", since = "1.0.0")]
DeviceNS(#[stable(feature = "rust1", since = "1.0.0")] &'a OsStr),

Expand All @@ -193,7 +193,7 @@ impl<'a> Prefix<'a> {
fn len(&self) -> usize {
use self::Prefix::*;
fn os_str_len(s: &OsStr) -> usize {
os_str_as_u8_slice(s).len()
s.bytes().len()
}
match *self {
Verbatim(x) => 4 + os_str_len(x),
Expand Down Expand Up @@ -299,19 +299,17 @@ where
}
}

// See note at the top of this module to understand why these are used:
//
// These casts are safe as OsStr is internally a wrapper around [u8] on all
// platforms.
//
// Note that currently this relies on the special knowledge that libstd has;
// these types are single-element structs but are not marked repr(transparent)
// or repr(C) which would make these casts allowable outside std.
fn os_str_as_u8_slice(s: &OsStr) -> &[u8] {
unsafe { &*(s as *const OsStr as *const [u8]) }
}
unsafe fn u8_slice_as_os_str(s: &[u8]) -> &OsStr {
// SAFETY: see the comment of `os_str_as_u8_slice`
// SAFETY: See note at the top of this module to understand why this and
// `OsStr::bytes` are used:
//
// This casts are safe as OsStr is internally a wrapper around [u8] on all
// platforms.
//
// Note that currently this relies on the special knowledge that libstd has;
// these types are single-element structs but are not marked
// repr(transparent) or repr(C) which would make these casts not allowable
// outside std.
unsafe { &*(s as *const [u8] as *const OsStr) }
}

Expand All @@ -332,15 +330,15 @@ fn has_physical_root(s: &[u8], prefix: Option<Prefix<'_>>) -> bool {

// basic workhorse for splitting stem and extension
fn rsplit_file_at_dot(file: &OsStr) -> (Option<&OsStr>, Option<&OsStr>) {
if os_str_as_u8_slice(file) == b".." {
if file.bytes() == b".." {
return (Some(file), None);
}

// The unsafety here stems from converting between &OsStr and &[u8]
// and back. This is safe to do because (1) we only look at ASCII
// contents of the encoding and (2) new &OsStr values are produced
// only from ASCII-bounded slices of existing &OsStr values.
let mut iter = os_str_as_u8_slice(file).rsplitn(2, |b| *b == b'.');
let mut iter = file.bytes().rsplitn(2, |b| *b == b'.');
let after = iter.next();
let before = iter.next();
if before == Some(b"") {
Expand All @@ -351,7 +349,7 @@ fn rsplit_file_at_dot(file: &OsStr) -> (Option<&OsStr>, Option<&OsStr>) {
}

fn split_file_at_dot(file: &OsStr) -> (&OsStr, Option<&OsStr>) {
let slice = os_str_as_u8_slice(file);
let slice = file.bytes();
if slice == b".." {
return (file, None);
}
Expand Down Expand Up @@ -1445,17 +1443,17 @@ impl PathBuf {
fn _set_extension(&mut self, extension: &OsStr) -> bool {
let file_stem = match self.file_stem() {
None => return false,
Some(f) => os_str_as_u8_slice(f),
Some(f) => f.bytes(),
};

// truncate until right after the file stem
let end_file_stem = file_stem[file_stem.len()..].as_ptr().addr();
let start = os_str_as_u8_slice(&self.inner).as_ptr().addr();
let start = self.inner.bytes().as_ptr().addr();
let v = self.as_mut_vec();
v.truncate(end_file_stem.wrapping_sub(start));

// add the new extension, if any
let new = os_str_as_u8_slice(extension);
let new = extension.bytes();
if !new.is_empty() {
v.reserve_exact(new.len() + 1);
v.push(b'.');
Expand Down Expand Up @@ -1948,7 +1946,7 @@ impl Path {
}
// The following (private!) function reveals the byte encoding used for OsStr.
fn as_u8_slice(&self) -> &[u8] {
os_str_as_u8_slice(&self.inner)
self.inner.bytes()
}

/// Directly wraps a string slice as a `Path` slice.
Expand Down
12 changes: 6 additions & 6 deletions library/std/src/path/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -971,15 +971,15 @@ pub fn test_decompositions_windows() {
file_prefix: None
);

t!("\\\\?\\C:/foo",
iter: ["\\\\?\\C:/foo"],
t!("\\\\?\\C:/foo/bar",
iter: ["\\\\?\\C:", "\\", "foo/bar"],
has_root: true,
is_absolute: true,
parent: None,
file_name: None,
file_stem: None,
parent: Some("\\\\?\\C:/"),
file_name: Some("foo/bar"),
file_stem: Some("foo/bar"),
extension: None,
file_prefix: None
file_prefix: Some("foo/bar")
);

t!("\\\\.\\foo\\bar",
Expand Down
4 changes: 4 additions & 0 deletions library/std/src/sys/windows/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,10 @@ where
{
// Start off with a stack buf but then spill over to the heap if we end up
// needing more space.
//
// This initial size also works around `GetFullPathNameW` returning
// incorrect size hints for some short paths:
// https://github.com/dylni/normpath/issues/5
let mut stack_buf = [0u16; 512];
let mut heap_buf = Vec::new();
unsafe {
Expand Down
120 changes: 91 additions & 29 deletions library/std/src/sys/windows/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,37 +50,101 @@ pub(crate) fn append_suffix(path: PathBuf, suffix: &OsStr) -> PathBuf {
path.into()
}

struct PrefixParser<'a, const LEN: usize> {
path: &'a OsStr,
prefix: [u8; LEN],
}

impl<'a, const LEN: usize> PrefixParser<'a, LEN> {
#[inline]
fn get_prefix(path: &OsStr) -> [u8; LEN] {
let mut prefix = [0; LEN];
// SAFETY: Only ASCII characters are modified.
for (i, &ch) in path.bytes().iter().take(LEN).enumerate() {
prefix[i] = if ch == b'/' { b'\\' } else { ch };
}
prefix
}

fn new(path: &'a OsStr) -> Self {
Self { path, prefix: Self::get_prefix(path) }
}

fn as_slice(&self) -> PrefixParserSlice<'a, '_> {
PrefixParserSlice {
path: self.path,
prefix: &self.prefix[..LEN.min(self.path.len())],
index: 0,
}
}
}

struct PrefixParserSlice<'a, 'b> {
path: &'a OsStr,
prefix: &'b [u8],
index: usize,
}

impl<'a> PrefixParserSlice<'a, '_> {
fn strip_prefix(&self, prefix: &str) -> Option<Self> {
self.prefix[self.index..]
.starts_with(prefix.as_bytes())
.then(|| Self { index: self.index + prefix.len(), ..*self })
}

fn prefix_bytes(&self) -> &'a [u8] {
&self.path.bytes()[..self.index]
}

fn finish(self) -> &'a OsStr {
// SAFETY: The unsafety here stems from converting between &OsStr and
// &[u8] and back. This is safe to do because (1) we only look at ASCII
// contents of the encoding and (2) new &OsStr values are produced only
// from ASCII-bounded slices of existing &OsStr values.
unsafe { bytes_as_os_str(&self.path.bytes()[self.index..]) }
}
}

pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> {
use Prefix::{DeviceNS, Disk, Verbatim, VerbatimDisk, VerbatimUNC, UNC};

if let Some(path) = strip_prefix(path, r"\\") {
let parser = PrefixParser::<8>::new(path);
let parser = parser.as_slice();
if let Some(parser) = parser.strip_prefix(r"\\") {
// \\
if let Some(path) = strip_prefix(path, r"?\") {

// The meaning of verbatim paths can change when they use a different
// separator.
if let Some(parser) = parser.strip_prefix(r"?\") && !parser.prefix_bytes().iter().any(|&x| x == b'/') {
// \\?\
if let Some(path) = strip_prefix(path, r"UNC\") {
if let Some(parser) = parser.strip_prefix(r"UNC\") {
// \\?\UNC\server\share

let path = parser.finish();
let (server, path) = parse_next_component(path, true);
let (share, _) = parse_next_component(path, true);

Some(VerbatimUNC(server, share))
} else {
let (prefix, _) = parse_next_component(path, true);
let path = parser.finish();

// in verbatim paths only recognize an exact drive prefix
if let Some(drive) = parse_drive_exact(prefix) {
if let Some(drive) = parse_drive_exact(path) {
// \\?\C:
Some(VerbatimDisk(drive))
} else {
// \\?\prefix
let (prefix, _) = parse_next_component(path, true);
Some(Verbatim(prefix))
}
}
} else if let Some(path) = strip_prefix(path, r".\") {
} else if let Some(parser) = parser.strip_prefix(r".\") {
// \\.\COM42
let path = parser.finish();
let (prefix, _) = parse_next_component(path, false);
Some(DeviceNS(prefix))
} else {
let path = parser.finish();
let (server, path) = parse_next_component(path, false);
let (share, _) = parse_next_component(path, false);

Expand All @@ -102,31 +166,26 @@ pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> {
}

// Parses a drive prefix, e.g. "C:" and "C:\whatever"
fn parse_drive(prefix: &OsStr) -> Option<u8> {
fn parse_drive(path: &OsStr) -> Option<u8> {
// In most DOS systems, it is not possible to have more than 26 drive letters.
// See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>.
fn is_valid_drive_letter(drive: &u8) -> bool {
drive.is_ascii_alphabetic()
}

match prefix.bytes() {
match path.bytes() {
[drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()),
_ => None,
}
}

// Parses a drive prefix exactly, e.g. "C:"
fn parse_drive_exact(prefix: &OsStr) -> Option<u8> {
fn parse_drive_exact(path: &OsStr) -> Option<u8> {
// only parse two bytes: the drive letter and the drive separator
if prefix.len() == 2 { parse_drive(prefix) } else { None }
}

fn strip_prefix<'a>(path: &'a OsStr, prefix: &str) -> Option<&'a OsStr> {
// `path` and `prefix` are valid wtf8 and utf8 encoded slices respectively, `path[prefix.len()]`
// is thus a code point boundary and `path[prefix.len()..]` is a valid wtf8 encoded slice.
match path.bytes().strip_prefix(prefix.as_bytes()) {
Some(path) => unsafe { Some(bytes_as_os_str(path)) },
None => None,
if path.bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) {
parse_drive(path)
} else {
None
}
}

Expand Down Expand Up @@ -219,15 +278,7 @@ pub(crate) fn maybe_verbatim(path: &Path) -> io::Result<Vec<u16>> {
// SAFETY: `fill_utf16_buf` ensures the `buffer` and `size` are valid.
// `lpfilename` is a pointer to a null terminated string that is not
// invalidated until after `GetFullPathNameW` returns successfully.
|buffer, size| unsafe {
// While the docs for `GetFullPathNameW` have the standard note
// about needing a `\\?\` path for a long lpfilename, this does not
// appear to be true in practice.
// See:
// https://stackoverflow.com/questions/38036943/getfullpathnamew-and-long-windows-file-paths
// https://googleprojectzero.blogspot.com/2016/02/the-definitive-guide-on-win32-to-nt.html
c::GetFullPathNameW(lpfilename, size, buffer, ptr::null_mut())
},
|buffer, size| unsafe { c::GetFullPathNameW(lpfilename, size, buffer, ptr::null_mut()) },
|mut absolute| {
path.clear();

Expand Down Expand Up @@ -263,9 +314,20 @@ pub(crate) fn maybe_verbatim(path: &Path) -> io::Result<Vec<u16>> {

/// Make a Windows path absolute.
pub(crate) fn absolute(path: &Path) -> io::Result<PathBuf> {
if path.as_os_str().bytes().starts_with(br"\\?\") {
return Ok(path.into());
let path = path.as_os_str();
let prefix = parse_prefix(path);
// Verbatim paths should not be modified.
if prefix.map(|x| x.is_verbatim()).unwrap_or(false) {
// NULs in verbatim paths are rejected for consistency.
if path.bytes().contains(&0) {
return Err(io::const_io_error!(
io::ErrorKind::InvalidInput,
"strings passed to WinAPI cannot contain NULs",
));
}
return Ok(path.to_owned().into());
}

let path = to_u16s(path)?;
let lpfilename = path.as_ptr();
fill_utf16_buf(
Expand Down
20 changes: 20 additions & 0 deletions library/std/src/sys/windows/path/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,23 @@ fn verbatim() {
// A path that contains null is not a valid path.
assert!(maybe_verbatim(Path::new("\0")).is_err());
}

fn parse_prefix(path: &str) -> Option<Prefix<'_>> {
super::parse_prefix(OsStr::new(path))
}

#[test]
fn test_parse_prefix_verbatim() {
let prefix = Some(Prefix::VerbatimDisk(b'C'));
assert_eq!(prefix, parse_prefix(r"\\?\C:/windows/system32/notepad.exe"));
assert_eq!(prefix, parse_prefix(r"\\?\C:\windows\system32\notepad.exe"));
}

#[test]
fn test_parse_prefix_verbatim_device() {
let prefix = Some(Prefix::UNC(OsStr::new("?"), OsStr::new("C:")));
assert_eq!(prefix, parse_prefix(r"//?/C:/windows/system32/notepad.exe"));
assert_eq!(prefix, parse_prefix(r"//?/C:\windows\system32\notepad.exe"));
assert_eq!(prefix, parse_prefix(r"/\?\C:\windows\system32\notepad.exe"));
assert_eq!(prefix, parse_prefix(r"\\?/C:\windows\system32\notepad.exe"));
}