Skip to content

Commit

Permalink
Fix unpacking of filenames with contains UTF-8 characters
Browse files Browse the repository at this point in the history
Change from the C to C.UTF-8 locale, allowing libarchive to handle
filenames in UTF-8. We restrict to change LC_CTYPE only, since
libarchive only needs the charset set.

See on libarchive Website for a more complete description of the issue:

  libarchive/libarchive#587
  https://github.com/libarchive/libarchive/wiki/Filenames

Once we complete the uncompress operation, we restore the original
LC_CTYPE after extraction to avoid side effects.

Signed-off-by: Otavio Salvador <otavio@ossystems.com.br>
  • Loading branch information
otavio authored and Jonathas-Conceicao committed Mar 3, 2021
1 parent 897721b commit f4f0668
Show file tree
Hide file tree
Showing 7 changed files with 130 additions and 0 deletions.
3 changes: 3 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@

### Fixed

* Fix unpacking of filenames with contains UTF-8 characters. [#52]
* Fixed the build script so it enforce the use of `libarchive` 3.2.0 or newer.

[#52]: https://github.com/OSSystems/compress-tools-rs/pull/52

## [0.10.0] - 2021-02-11

### Changed
Expand Down
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ futures-executor = { version = "0.3.5", optional = true }
blocking = { version = "1.0.0", optional = true }
tokio = { version = "1.0.0", features = ["rt-multi-thread", "macros", "fs", "net"], optional = true }
tokio-util = { version = "0.6.0", features = ["compat"], optional = true }
libc = "0.2.86"

[features]
async_support = ["async-trait", "futures-channel", "futures-core", "futures-io", "futures-util", "futures-executor"]
Expand Down
104 changes: 104 additions & 0 deletions src/ffi/locale.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
// Copyright (C) 2021 O.S. Systems Software LTDA
//
// SPDX-License-Identifier: MIT OR Apache-2.0

/// Change from the C to system locale, allowing libarchive to handle filenames
/// in UTF-8. We restrict to change LC_CTYPE only, since libarchive only needs
/// the charset set.
///
/// See on libarchive Website for a more complete description of the issue:
///
/// https://github.com/libarchive/libarchive/issues/587
/// https://github.com/libarchive/libarchive/wiki/Filenames
pub(crate) use inner::UTF8LocaleGuard;

#[cfg(unix)]
mod inner {
pub(crate) struct UTF8LocaleGuard {
save: libc::locale_t,
}

impl UTF8LocaleGuard {
pub(crate) fn new() -> Self {
#[cfg(target_os = "linux")]
let locale = b"\0";

#[cfg(target_os = "macos")]
let locale = b"UTF-8\0";

let utf8_locale = unsafe {
libc::newlocale(
libc::LC_CTYPE_MASK,
locale.as_ptr() as *const libc::c_char,
std::ptr::null_mut(),
)
};

let save = unsafe { libc::uselocale(utf8_locale) };

Self { save }
}
}

impl Drop for UTF8LocaleGuard {
fn drop(&mut self) {
unsafe { libc::uselocale(self.save) };
}
}
}

#[cfg(windows)]
mod inner {
extern "C" {
fn _configthreadlocale(arg1: std::os::raw::c_int) -> std::os::raw::c_int;
}
const _ENABLE_PER_THREAD_LOCALE: std::os::raw::c_int = 1;

pub(crate) struct UTF8LocaleGuard {
save: Option<std::ffi::CString>,
save_thread_config: ::std::os::raw::c_int,
}

impl UTF8LocaleGuard {
pub(crate) fn new() -> Self {
let locale = b".UTF-8\0";

let (save, save_thread_config) = {
let old_locale = unsafe { libc::setlocale(libc::LC_CTYPE, std::ptr::null()) };
(
if old_locale.is_null() {
None
} else {
Some(unsafe { std::ffi::CStr::from_ptr(old_locale) }.to_owned())
},
unsafe { _configthreadlocale(0) },
)
};

unsafe {
_configthreadlocale(_ENABLE_PER_THREAD_LOCALE);
libc::setlocale(
libc::LC_CTYPE,
std::ffi::CStr::from_bytes_with_nul_unchecked(locale).as_ptr(),
)
};

Self {
save,
save_thread_config,
}
}
}

impl Drop for UTF8LocaleGuard {
fn drop(&mut self) {
if let Some(locale) = &self.save {
unsafe { libc::setlocale(libc::LC_CTYPE, locale.as_ptr()) };
}

unsafe {
_configthreadlocale(self.save_thread_config);
}
}
}
}
2 changes: 2 additions & 0 deletions src/ffi/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,7 @@
// SPDX-License-Identifier: MIT OR Apache-2.0

mod generated;
mod locale;

pub(crate) use crate::ffi::generated::*;
pub(crate) use locale::UTF8LocaleGuard;
5 changes: 5 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ pub fn list_archive_files<R>(source: R) -> Result<Vec<String>>
where
R: Read + Seek,
{
let _utf8_guard = ffi::UTF8LocaleGuard::new();
run_with_seekable_archive(source, |archive_reader, _, mut entry| unsafe {
let mut file_list = Vec::new();
#[allow(clippy::vec_init_then_push)]
Expand Down Expand Up @@ -171,6 +172,7 @@ where
R: Read,
W: Write,
{
let _utf8_guard = ffi::UTF8LocaleGuard::new();
run_with_archive(
WriteMode::Buffer,
source,
Expand Down Expand Up @@ -206,6 +208,7 @@ pub fn uncompress_archive<R>(source: R, dest: &Path, ownership: Ownership) -> Re
where
R: Read + Seek,
{
let _utf8_guard = ffi::UTF8LocaleGuard::new();
run_with_archive(
WriteMode::Disk { ownership },
source,
Expand Down Expand Up @@ -273,6 +276,7 @@ where
R: Read + Seek,
W: Write,
{
let _utf8_guard = ffi::UTF8LocaleGuard::new();
run_with_seekable_archive(source, |archive_reader, _, mut entry| unsafe {
loop {
match ffi::archive_read_next_header(archive_reader, &mut entry) {
Expand Down Expand Up @@ -301,6 +305,7 @@ where
F: FnOnce(*mut ffi::archive, *mut ffi::archive, *mut ffi::archive_entry) -> Result<T>,
R: Read,
{
let _utf8_guard = ffi::UTF8LocaleGuard::new();
unsafe {
let archive_entry: *mut ffi::archive_entry = std::ptr::null_mut();
let archive_reader = ffi::archive_read_new();
Expand Down
Binary file added tests/fixtures/utf8.tar
Binary file not shown.
15 changes: 15 additions & 0 deletions tests/integration_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,21 @@ fn uncompress_to_dir_not_preserve_owner() {
);
}

#[test]
fn uncompress_to_dir_with_utf8_pathname() {
let dir = tempfile::TempDir::new().expect("Failed to create the tmp directory");
let mut source = std::fs::File::open("tests/fixtures/utf8.tar").unwrap();

uncompress_archive(&mut source, dir.path(), Ownership::Ignore)
.expect("Failed to uncompress the file");

assert_eq!(
dir.path().join("utf-8-file-name-őúíá").exists(),
true,
"the path doesn't exist"
);
}

#[test]
fn uncompress_same_file_not_preserve_owner() {
uncompress_archive(
Expand Down

0 comments on commit f4f0668

Please sign in to comment.