Skip to content

Commit 56b56b4

Browse files
brianpanefolkertdev
authored andcommitted
Implement gzseek for writes
1 parent ff267d0 commit 56b56b4

File tree

2 files changed

+238
-29
lines changed

2 files changed

+238
-29
lines changed

libz-rs-sys/src/gz.rs

Lines changed: 50 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -640,14 +640,13 @@ pub unsafe extern "C-unwind" fn gzclose_w(file: gzFile) -> c_int {
640640
return Z_STREAM_ERROR;
641641
}
642642

643-
/* FIXME Uncomment this when seek support is implemented
644643
// Check for a pending seek request
645644
if state.seek {
646645
state.seek = false;
647-
gz_zero(state, state.skip);
648-
ret = state.err;
646+
if gz_zero(state, state.skip as _).is_err() {
647+
ret = state.err;
648+
}
649649
}
650-
*/
651650

652651
// Compress (if not in direct mode) and output any data left in the input buffer.
653652
if gz_comp(state, Z_FINISH).is_err() {
@@ -1134,7 +1133,7 @@ macro_rules! gt_off {
11341133
// - `Ok` on success.
11351134
// - `Err` on error.
11361135
fn gz_skip(state: &mut GzState, mut len: i64) -> Result<(), ()> {
1137-
/* skip over len bytes or reach end-of-file, whichever comes first */
1136+
// Skip over len bytes or reach end-of-file, whichever comes first.
11381137
while len != 0 {
11391138
// Skip over whatever is in output buffer.
11401139
if state.have != 0 {
@@ -1586,14 +1585,12 @@ unsafe fn gz_write(state: &mut GzState, mut buf: *const c_void, mut len: usize)
15861585
return 0;
15871586
}
15881587

1589-
/* FIXME uncomment once seek support is implemented
15901588
if state.seek {
1591-
state.seek = 0;
1592-
if gz_zero(state, state.skip) == -1 {
1589+
state.seek = false;
1590+
if gz_zero(state, state.skip as _).is_err() {
15931591
return 0;
15941592
}
15951593
}
1596-
*/
15971594

15981595
let put = len as c_int;
15991596

@@ -1651,6 +1648,44 @@ unsafe fn gz_write(state: &mut GzState, mut buf: *const c_void, mut len: usize)
16511648
put
16521649
}
16531650

1651+
// Compress `len` null bytes to output.
1652+
//
1653+
// # Returns
1654+
//
1655+
// - `Ok` on success.
1656+
// - `Err` on error.
1657+
fn gz_zero(state: &mut GzState, mut len: usize) -> Result<(), ()> {
1658+
// Consume whatever is left in the input buffer.
1659+
if state.stream.avail_in != 0 && gz_comp(state, Z_NO_FLUSH).is_err() {
1660+
return Err(());
1661+
}
1662+
1663+
// Compress `len` zeros.
1664+
let mut first = true;
1665+
while len != 0 {
1666+
let n = cmp::min(state.in_size, len);
1667+
if first {
1668+
// Safety: `state.input` is non-null here, either because it was initialized
1669+
// before this function was called (enabling the `state.stream.avail_in != 0`
1670+
// case in the check above) or because the call to `gz_comp` initialized it.
1671+
// All initialization paths in this module ensure that, when `state.input` is
1672+
// non-null, it points to `state.in_size` bytes of writable memory. Here we
1673+
// are writing `n` bytes, where `n` is initialized above to be <= `state.in_size`.
1674+
unsafe { state.input.write_bytes(0u8, n) };
1675+
first = false;
1676+
}
1677+
state.stream.avail_in = n as _;
1678+
state.stream.next_in = state.input;
1679+
state.pos += n as i64;
1680+
if gz_comp(state, Z_NO_FLUSH).is_err() {
1681+
return Err(());
1682+
}
1683+
len -= n;
1684+
}
1685+
1686+
Ok(())
1687+
}
1688+
16541689
// Initialize `state` for writing a gzip file. Mark initialization by setting
16551690
// `state.input` to non-null.
16561691
//
@@ -1845,15 +1880,13 @@ pub unsafe extern "C-unwind" fn gzflush(file: gzFile, flush: c_int) -> c_int {
18451880
return Z_STREAM_ERROR;
18461881
}
18471882

1848-
/* FIXME: uncomment this when seek support is implemented
18491883
// Check for seek request.
18501884
if state.seek {
18511885
state.seek = false;
1852-
if gz_zero(state, state.skip) == -1 {
1886+
if gz_zero(state, state.skip as _).is_err() {
18531887
return state.err;
18541888
}
18551889
}
1856-
*/
18571890

18581891
// Compress remaining data with requested flush.
18591892
let _ = gz_comp(state, flush);
@@ -1920,7 +1953,7 @@ pub unsafe extern "C-unwind" fn gzoffset(file: gzFile) -> z_off_t {
19201953
return -1;
19211954
}
19221955

1923-
/* compute and return effective offset in file */
1956+
// Compute and return effective offset in file.
19241957
let offset = unsafe { libc::lseek(state.fd, 0, SEEK_CUR) };
19251958
if offset == -1 {
19261959
return -1;
@@ -1954,15 +1987,13 @@ pub unsafe extern "C-unwind" fn gzputc(file: gzFile, c: c_int) -> c_int {
19541987
return -1;
19551988
}
19561989

1957-
/* FIXME: Uncomment when seek support is implemented.
19581990
// Check for seek request.
19591991
if state.seek {
19601992
state.seek = false;
1961-
if gz_zero(state, state.skip) == -1 {
1993+
if gz_zero(state, state.skip as _).is_err() {
19621994
return -1;
19631995
}
19641996
}
1965-
*/
19661997

19671998
// Try writing to input buffer for speed (state.input == null if buffer not initialized).
19681999
if !state.input.is_null() {
@@ -2135,15 +2166,13 @@ pub unsafe extern "C-unwind" fn gzungetc(c: c_int, file: gzFile) -> c_int {
21352166
let _ = unsafe { gz_look(state) };
21362167
}
21372168

2138-
/* FIXME uncomment when seek support is implemented.
21392169
// Process a skip request.
21402170
if state.seek {
21412171
state.seek = false;
2142-
if gz_skip(state, state.skip) == -1 {
2172+
if gz_skip(state, state.skip).is_err() {
21432173
return -1;
21442174
}
21452175
}
2146-
*/
21472176

21482177
// If output buffer empty, put byte at end (allows more pushing).
21492178
if state.have == 0 {
@@ -2354,15 +2383,13 @@ pub unsafe extern "C-unwind" fn gzsetparams(file: gzFile, level: c_int, strategy
23542383
return Z_OK;
23552384
}
23562385

2357-
/* FIXME: uncomment when seek support is implemented
23582386
// Check for seek request.
23592387
if state.seek {
2360-
state.seek = false`;
2361-
if gz_zero(state, state.skip) == -1 {
2388+
state.seek = false;
2389+
if gz_zero(state, state.skip as _).is_err() {
23622390
return state.err;
23632391
}
23642392
}
2365-
*/
23662393

23672394
// Change compression parameters for subsequent input.
23682395
if !state.input.is_null() {

test-libz-rs-sys/src/gz.rs

Lines changed: 188 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1159,21 +1159,51 @@ fn gzungetc_basic() {
11591159

11601160
// gzread should yield the remaining 10 bytes of uncompressed content from the file,
11611161
// preceded by the 6 bytes we just pushed with gzungetc, for a total of 16 bytes.
1162-
const EXPECTED: &[u8] = b"123456\nfor tests";
1162+
const EXPECTED1: &[u8] = b"123456\nfor tests";
11631163
// Read more than expected to make sure there's no other output following it.
1164-
let mut buf = [0u8; EXPECTED.len() + 1];
1164+
let mut buf = [0u8; EXPECTED1.len() + 1];
11651165
assert_eq!(
11661166
unsafe { gzread(file, buf.as_mut_ptr().cast::<c_void>(), buf.len() as _) },
1167-
EXPECTED.len() as _
1167+
EXPECTED1.len() as _
11681168
);
1169-
assert_eq!(&buf[..EXPECTED.len()], EXPECTED);
1169+
assert_eq!(&buf[..EXPECTED1.len()], EXPECTED1);
11701170

11711171
// The 16-byte output buffer is now empty. Call gzungetc 17 times. The first
1172-
// 16 calls should succeed, and the last one should fail.
1172+
// 16 calls should succeed, and the last one should fail and set an error.
1173+
let mut err = Z_OK;
1174+
assert!(!unsafe { gzerror(file, &mut err) }.is_null());
1175+
assert_eq!(err, Z_OK);
11731176
for _ in 0..16 {
11741177
assert_eq!(unsafe { gzungetc('-' as c_int, file) }, '-' as c_int);
11751178
}
11761179
assert_eq!(unsafe { gzungetc('-' as c_int, file) }, -1);
1180+
assert!(!unsafe { gzerror(file, &mut err) }.is_null());
1181+
assert_eq!(err, Z_DATA_ERROR);
1182+
1183+
// Clear the error state, rewind to the start of the file, and read some data
1184+
// to refill the output buffer.
1185+
unsafe { gzclearerr(file) };
1186+
unsafe { gzrewind(file) };
1187+
assert!(!unsafe { gzerror(file, &mut err) }.is_null());
1188+
assert_eq!(err, Z_OK);
1189+
assert_eq!(unsafe { gzgetc(file) }, 'g' as c_int);
1190+
assert_eq!(unsafe { gzgetc(file) }, 'z' as c_int);
1191+
1192+
// Push a character back into the output buffer with gzungetc, issue a seek
1193+
// request to move to another part of the output stream, and then do another
1194+
// gzungetc. gzread should then return the character pushed in the second
1195+
// gzungetc call, followed by the data at the seek target location. The
1196+
// character pushed by the first gzungetc call should be discarded.
1197+
assert_eq!(unsafe { gzungetc('7' as c_int, file) }, '7' as c_int);
1198+
assert_eq!(unsafe { gzseek(file, 1, libc::SEEK_CUR) }, 2);
1199+
assert_eq!(unsafe { gzungetc('8' as c_int, file) }, '8' as c_int);
1200+
const EXPECTED2: &[u8] = b"8ip\nexample";
1201+
let mut buf = [0u8; EXPECTED2.len()];
1202+
assert_eq!(
1203+
unsafe { gzread(file, buf.as_mut_ptr().cast::<c_void>(), buf.len() as _) },
1204+
EXPECTED2.len() as _
1205+
);
1206+
assert_eq!(&buf, EXPECTED2);
11771207

11781208
assert_eq!(unsafe { gzclose(file) }, Z_OK);
11791209
}
@@ -1649,7 +1679,6 @@ fn gzseek_read() {
16491679
assert_eq!(unsafe { libc::close(fd) }, 0);
16501680

16511681
for file_name in [direct_file_name, gzip_file_name] {
1652-
eprintln!("opening {}", file_name);
16531682
let file = unsafe {
16541683
gzopen(
16551684
CString::new(file_name.as_str()).unwrap().as_ptr(),
@@ -1743,6 +1772,159 @@ fn gzseek_read() {
17431772
}
17441773
}
17451774

1775+
#[test]
1776+
fn gzseek_write() {
1777+
// Create a temporary directory that will be automatically removed when
1778+
// temp_dir goes out of scope.
1779+
let temp_dir_path = temp_base();
1780+
let temp_dir = tempfile::TempDir::new_in(temp_dir_path).unwrap();
1781+
let temp_path = temp_dir.path();
1782+
1783+
// Test both compressed and direct (non-compressed) writes.
1784+
for mode in ["w", "wT"] {
1785+
// Open a file handle for writing.
1786+
let file_name = path(temp_path, "output");
1787+
let file = unsafe {
1788+
gzopen(
1789+
CString::new(file_name.as_str()).unwrap().as_ptr(),
1790+
CString::new(mode).unwrap().as_ptr(),
1791+
)
1792+
};
1793+
assert!(!file.is_null());
1794+
1795+
// Set a small buffer size to help exercise all the code paths.
1796+
const BUF_SIZE: c_uint = 8;
1797+
assert_eq!(unsafe { gzbuffer(file, BUF_SIZE) }, 0);
1798+
1799+
// gzseek forward a few bytes immediately.
1800+
assert_eq!(unsafe { gzseek(file, 3, libc::SEEK_SET) }, 3);
1801+
assert_eq!(unsafe { gztell(file) }, 3);
1802+
1803+
// Write some data, with gzseek calls interleaved. Note: Part of the internal seek
1804+
// implementation is done lazily in the next write call, so we use a combination
1805+
// of all the write functions: gzwrite, gzputc, gzputs, and gzflush. gzsetparams
1806+
// also implements the pending seek, but it is not supported in direct-mode
1807+
// (non-compressed) files, so it is tested separately in the function
1808+
// gzseek_gzsetparams.
1809+
const STRING1: &[u8] = b"0123";
1810+
assert_eq!(
1811+
unsafe { gzwrite(file, STRING1.as_ptr().cast::<c_void>(), STRING1.len() as _) },
1812+
STRING1.len() as _
1813+
);
1814+
assert_eq!(unsafe { gztell(file) }, 7);
1815+
assert_eq!(unsafe { gzseek(file, 1, libc::SEEK_CUR) }, 8);
1816+
assert_eq!(unsafe { gzseek(file, 12, libc::SEEK_SET) }, 12);
1817+
assert_eq!(unsafe { gztell(file) }, 12);
1818+
const STRING2: &[u8] = b"456\0";
1819+
assert_eq!(
1820+
unsafe { gzputs(file, STRING2.as_ptr().cast::<c_char>()) },
1821+
(STRING2.len() - 1) as _
1822+
);
1823+
assert_eq!(unsafe { gztell(file) }, 15);
1824+
assert_eq!(unsafe { gzseek(file, 2, libc::SEEK_CUR) }, 17);
1825+
assert_eq!(unsafe { gzputc(file, b'7' as _) }, b'7' as _);
1826+
assert_eq!(unsafe { gztell(file) }, 18);
1827+
assert_eq!(unsafe { gzseek(file, 1, libc::SEEK_CUR) }, 19);
1828+
assert_eq!(unsafe { gzflush(file, Z_SYNC_FLUSH) }, Z_OK);
1829+
assert_eq!(unsafe { gzputc(file, b'8' as _) }, b'8' as _);
1830+
1831+
// Do one more gzseek at the end, and then close the file handle. This should produce
1832+
// the specified number of zero bytes at the end of the uncompressed data stream.
1833+
assert_eq!(
1834+
unsafe { gzseek(file, (BUF_SIZE * 3 + 1) as _, libc::SEEK_CUR) },
1835+
(20 + BUF_SIZE * 3 + 1) as _
1836+
);
1837+
assert_eq!(unsafe { gzclose(file) }, Z_OK);
1838+
1839+
// Read the file and confirm that each of the gzeek calls produced the expected
1840+
// number of zero bytes.
1841+
let file = unsafe {
1842+
gzopen(
1843+
CString::new(file_name.as_str()).unwrap().as_ptr(),
1844+
CString::new("r").unwrap().as_ptr(),
1845+
)
1846+
};
1847+
assert!(!file.is_null());
1848+
const EXPECTED1: &[u8] = b"\x00\x00\x000123\x00\x00\x00\x00\x00456\x00\x007\x008";
1849+
let mut buf = [127u8; EXPECTED1.len()];
1850+
assert_eq!(
1851+
unsafe { gzread(file, buf.as_mut_ptr().cast::<c_void>(), buf.len() as _) },
1852+
buf.len() as _
1853+
);
1854+
assert_eq!(&buf, EXPECTED1);
1855+
const EXPECTED2: &[u8] = &[0u8; BUF_SIZE as usize * 3 + 1];
1856+
let mut buf = [127u8; EXPECTED2.len() + 1];
1857+
assert_eq!(
1858+
unsafe { gzread(file, buf.as_mut_ptr().cast::<c_void>(), buf.len() as _) },
1859+
EXPECTED2.len() as _
1860+
);
1861+
assert_eq!(&buf[..EXPECTED2.len()], EXPECTED2);
1862+
assert_eq!(unsafe { gzclose(file) }, Z_OK);
1863+
}
1864+
}
1865+
1866+
#[test]
1867+
fn gzseek_gzsetparams() {
1868+
// Create a temporary directory that will be automatically removed when
1869+
// temp_dir goes out of scope.
1870+
let temp_dir_path = temp_base();
1871+
let temp_dir = tempfile::TempDir::new_in(temp_dir_path).unwrap();
1872+
let temp_path = temp_dir.path();
1873+
1874+
// Open a file handle for writing in compressed mode.
1875+
let file_name = path(temp_path, "output.gz");
1876+
let file = unsafe {
1877+
gzopen(
1878+
CString::new(file_name.as_str()).unwrap().as_ptr(),
1879+
CString::new("w").unwrap().as_ptr(),
1880+
)
1881+
};
1882+
assert!(!file.is_null());
1883+
1884+
// Write some content to the file handle.
1885+
const STRING1: &[u8] = b"hello";
1886+
assert_eq!(unsafe { gzwrite(file, STRING1.as_ptr().cast::<c_void>(), STRING1.len() as _) }, STRING1.len() as _);
1887+
1888+
// Call gzseek to schedule a pending write of some zeros to the compressed stream.
1889+
const SEEK_AMOUNT: usize = 4;
1890+
assert_eq!(unsafe { gzseek(file, SEEK_AMOUNT as _, libc::SEEK_CUR) }, 9);
1891+
1892+
// Before doing another write, call gzsetparams. This should write the pending zeros
1893+
// to the current gzip stream before closing the stream and starting a new one.
1894+
assert_eq!(unsafe { gzsetparams(file, 9, 2) }, Z_OK);
1895+
assert_eq!(unsafe { gztell(file) }, (STRING1.len() + SEEK_AMOUNT) as _);
1896+
1897+
// Write some more content to the file handle. This will end up in the second gzip stream
1898+
// in the file.
1899+
const STRING2: &[u8] = b"world";
1900+
assert_eq!(unsafe { gzwrite(file, STRING2.as_ptr().cast::<c_void>(), STRING2.len() as _) }, STRING2.len() as _);
1901+
1902+
// Close the file handle to flush any buffered output to the file.
1903+
assert_eq!(unsafe { gzclose(file) }, Z_OK);
1904+
1905+
// Open the newly created file for reading.
1906+
let file = unsafe {
1907+
gzopen(
1908+
CString::new(file_name.as_str()).unwrap().as_ptr(),
1909+
CString::new("r").unwrap().as_ptr(),
1910+
)
1911+
};
1912+
assert!(!file.is_null());
1913+
1914+
// Read back the content to validate that it was written correctly.
1915+
let mut buf = [127u8; STRING1.len()];
1916+
assert_eq!(unsafe { gzread(file, buf.as_mut_ptr().cast::<c_void>(), buf.len() as _) }, buf.len() as _);
1917+
assert_eq!(&buf, STRING1);
1918+
for _ in 0..SEEK_AMOUNT {
1919+
assert_eq!(unsafe { gzgetc(file) }, 0);
1920+
}
1921+
let mut buf = [127u8; STRING2.len() + 1];
1922+
assert_eq!(unsafe { gzread(file, buf.as_mut_ptr().cast::<c_void>(), buf.len() as _) }, (buf.len() - 1) as _);
1923+
assert_eq!(&buf[..STRING2.len()], STRING2);
1924+
1925+
assert_eq!(unsafe { gzclose(file) }, Z_OK);
1926+
}
1927+
17461928
#[test]
17471929
fn gzseek_error() {
17481930
// gzseek on a null file handle should return -1.

0 commit comments

Comments
 (0)