Skip to content

Commit 83e54d3

Browse files
committed
Fix unpacking of filenames with contains UTF-8 characters
Change from the C to C.UTF-8 locale, allowing libarchive to handle filenames in UTF-8. We restrict to change LC_CTYPE only, since libarchive only needs the charset set. See on libarchive Website for a more complete description of the issue: libarchive/libarchive#587 https://github.com/libarchive/libarchive/wiki/Filenames Once we complete the uncompress operation, we restore the original LC_CTYPE after extraction to avoid side effects. Signed-off-by: Otavio Salvador <otavio@ossystems.com.br>
1 parent dbed3fe commit 83e54d3

File tree

7 files changed

+130
-0
lines changed

7 files changed

+130
-0
lines changed

CHANGES.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,11 @@
66

77
### Fixed
88

9+
* Fix unpacking of filenames with contains UTF-8 characters. [#52]
910
* Fixed the build script so it enforce the use of `libarchive` 3.2.0 or newer.
1011

12+
[#52]: https://github.com/OSSystems/compress-tools-rs/pull/52
13+
1114
## [0.10.0] - 2021-02-11
1215

1316
### Changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ futures-executor = { version = "0.3.5", optional = true }
3333
blocking = { version = "1.0.0", optional = true }
3434
tokio = { version = "1.0.0", features = ["rt-multi-thread", "macros", "fs", "net"], optional = true }
3535
tokio-util = { version = "0.6.0", features = ["compat"], optional = true }
36+
libc = "0.2.86"
3637

3738
[features]
3839
async_support = ["async-trait", "futures-channel", "futures-core", "futures-io", "futures-util", "futures-executor"]

src/ffi/locale.rs

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
// Copyright (C) 2021 O.S. Systems Software LTDA
2+
//
3+
// SPDX-License-Identifier: MIT OR Apache-2.0
4+
5+
/// Change from the C to system locale, allowing libarchive to handle filenames
6+
/// in UTF-8. We restrict to change LC_CTYPE only, since libarchive only needs
7+
/// the charset set.
8+
///
9+
/// See on libarchive Website for a more complete description of the issue:
10+
///
11+
/// https://github.com/libarchive/libarchive/issues/587
12+
/// https://github.com/libarchive/libarchive/wiki/Filenames
13+
pub(crate) use inner::UTF8LocaleGuard;
14+
15+
#[cfg(unix)]
16+
mod inner {
17+
pub(crate) struct UTF8LocaleGuard {
18+
save: libc::locale_t,
19+
}
20+
21+
impl UTF8LocaleGuard {
22+
pub(crate) fn new() -> Self {
23+
#[cfg(target_os = "linux")]
24+
let locale = b"\0";
25+
26+
#[cfg(target_os = "macos")]
27+
let locale = b"UTF-8\0";
28+
29+
let utf8_locale = unsafe {
30+
libc::newlocale(
31+
libc::LC_CTYPE_MASK,
32+
locale.as_ptr() as *const libc::c_char,
33+
std::ptr::null_mut(),
34+
)
35+
};
36+
37+
let save = unsafe { libc::uselocale(utf8_locale) };
38+
39+
Self { save }
40+
}
41+
}
42+
43+
impl Drop for UTF8LocaleGuard {
44+
fn drop(&mut self) {
45+
unsafe { libc::uselocale(self.save) };
46+
}
47+
}
48+
}
49+
50+
#[cfg(windows)]
51+
mod inner {
52+
extern "C" {
53+
fn _configthreadlocale(arg1: std::os::raw::c_int) -> std::os::raw::c_int;
54+
}
55+
const _ENABLE_PER_THREAD_LOCALE: std::os::raw::c_int = 1;
56+
57+
pub(crate) struct UTF8LocaleGuard {
58+
save: Option<std::ffi::CString>,
59+
save_thread_config: ::std::os::raw::c_int,
60+
}
61+
62+
impl UTF8LocaleGuard {
63+
pub(crate) fn new() -> Self {
64+
let locale = b".UTF-8\0";
65+
66+
let (save, save_thread_config) = {
67+
let old_locale = unsafe { libc::setlocale(libc::LC_CTYPE, std::ptr::null()) };
68+
(
69+
if old_locale.is_null() {
70+
None
71+
} else {
72+
Some(unsafe { std::ffi::CStr::from_ptr(old_locale) }.to_owned())
73+
},
74+
unsafe { _configthreadlocale(0) },
75+
)
76+
};
77+
78+
unsafe {
79+
_configthreadlocale(_ENABLE_PER_THREAD_LOCALE);
80+
libc::setlocale(
81+
libc::LC_CTYPE,
82+
std::ffi::CStr::from_bytes_with_nul_unchecked(locale).as_ptr(),
83+
)
84+
};
85+
86+
Self {
87+
save,
88+
save_thread_config,
89+
}
90+
}
91+
}
92+
93+
impl Drop for UTF8LocaleGuard {
94+
fn drop(&mut self) {
95+
if let Some(locale) = &self.save {
96+
unsafe { libc::setlocale(libc::LC_CTYPE, locale.as_ptr()) };
97+
}
98+
99+
unsafe {
100+
_configthreadlocale(self.save_thread_config);
101+
}
102+
}
103+
}
104+
}

src/ffi/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,7 @@
33
// SPDX-License-Identifier: MIT OR Apache-2.0
44

55
mod generated;
6+
mod locale;
67

78
pub(crate) use crate::ffi::generated::*;
9+
pub(crate) use locale::UTF8LocaleGuard;

src/lib.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ pub fn list_archive_files<R>(source: R) -> Result<Vec<String>>
116116
where
117117
R: Read + Seek,
118118
{
119+
let _utf8_guard = ffi::UTF8LocaleGuard::new();
119120
run_with_seekable_archive(source, |archive_reader, _, mut entry| unsafe {
120121
let mut file_list = Vec::new();
121122
#[allow(clippy::vec_init_then_push)]
@@ -171,6 +172,7 @@ where
171172
R: Read,
172173
W: Write,
173174
{
175+
let _utf8_guard = ffi::UTF8LocaleGuard::new();
174176
run_with_archive(
175177
WriteMode::Buffer,
176178
source,
@@ -206,6 +208,7 @@ pub fn uncompress_archive<R>(source: R, dest: &Path, ownership: Ownership) -> Re
206208
where
207209
R: Read + Seek,
208210
{
211+
let _utf8_guard = ffi::UTF8LocaleGuard::new();
209212
run_with_archive(
210213
WriteMode::Disk { ownership },
211214
source,
@@ -273,6 +276,7 @@ where
273276
R: Read + Seek,
274277
W: Write,
275278
{
279+
let _utf8_guard = ffi::UTF8LocaleGuard::new();
276280
run_with_seekable_archive(source, |archive_reader, _, mut entry| unsafe {
277281
loop {
278282
match ffi::archive_read_next_header(archive_reader, &mut entry) {
@@ -301,6 +305,7 @@ where
301305
F: FnOnce(*mut ffi::archive, *mut ffi::archive, *mut ffi::archive_entry) -> Result<T>,
302306
R: Read,
303307
{
308+
let _utf8_guard = ffi::UTF8LocaleGuard::new();
304309
unsafe {
305310
let archive_entry: *mut ffi::archive_entry = std::ptr::null_mut();
306311
let archive_reader = ffi::archive_read_new();

tests/fixtures/utf8.tar

10 KB
Binary file not shown.

tests/integration_test.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,21 @@ fn uncompress_to_dir_not_preserve_owner() {
309309
);
310310
}
311311

312+
#[test]
313+
fn uncompress_to_dir_with_utf8_pathname() {
314+
let dir = tempfile::TempDir::new().expect("Failed to create the tmp directory");
315+
let mut source = std::fs::File::open("tests/fixtures/utf8.tar").unwrap();
316+
317+
uncompress_archive(&mut source, dir.path(), Ownership::Ignore)
318+
.expect("Failed to uncompress the file");
319+
320+
assert_eq!(
321+
dir.path().join("utf-8-file-name-őúíá").exists(),
322+
true,
323+
"the path doesn't exist"
324+
);
325+
}
326+
312327
#[test]
313328
fn uncompress_same_file_not_preserve_owner() {
314329
uncompress_archive(

0 commit comments

Comments
 (0)