forked from dathere/qsv
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathslice.rs
122 lines (104 loc) · 3.84 KB
/
slice.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
static USAGE: &str = r#"
Returns the rows in the range specified (starting at 0, half-open interval).
The range does not include headers.
If the start of the range isn't specified, then the slice starts from the first
record in the CSV data.
If the end of the range isn't specified, then the slice continues to the last
record in the CSV data.
This operation can be made much faster by creating an index with 'qsv index'
first. Namely, a slice on an index requires parsing just the rows that are
sliced. Without an index, all rows up to the first row in the slice must be
parsed.
Usage:
qsv slice [options] [<input>]
qsv slice --help
slice options:
-s, --start <arg> The index of the record to slice from.
If negative, starts from the last record.
-e, --end <arg> The index of the record to slice to.
-l, --len <arg> The length of the slice (can be used instead
of --end).
-i, --index <arg> Slice a single record (shortcut for -s N -l 1).
Common options:
-h, --help Display this message
-o, --output <file> Write output to <file> instead of stdout.
-n, --no-headers When set, the first row will not be interpreted
as headers. Otherwise, the first row will always
appear in the output as the header row.
-d, --delimiter <arg> The field delimiter for reading CSV data.
Must be a single character. (default: ,)
"#;
use std::fs;
use serde::Deserialize;
use crate::{
config::{Config, Delimiter},
index::Indexed,
util, CliResult,
};
#[derive(Deserialize)]
struct Args {
arg_input: Option<String>,
flag_start: Option<isize>,
flag_end: Option<usize>,
flag_len: Option<usize>,
flag_index: Option<usize>,
flag_output: Option<String>,
flag_no_headers: bool,
flag_delimiter: Option<Delimiter>,
}
pub fn run(argv: &[&str]) -> CliResult<()> {
let args: Args = util::get_args(USAGE, argv)?;
match args.rconfig().checkutf8(false).indexed()? {
None => args.no_index(),
Some(idxed) => args.with_index(idxed),
}
}
impl Args {
fn no_index(&self) -> CliResult<()> {
let mut rdr = self.rconfig().reader()?;
let mut wtr = self.wconfig().writer()?;
self.rconfig().write_headers(&mut rdr, &mut wtr)?;
let (start, end) = self.range()?;
for r in rdr.byte_records().skip(start).take(end - start) {
wtr.write_byte_record(&r?)?;
}
Ok(wtr.flush()?)
}
fn with_index(&self, mut idx: Indexed<fs::File, fs::File>) -> CliResult<()> {
let mut wtr = self.wconfig().writer()?;
self.rconfig().write_headers(&mut *idx, &mut wtr)?;
let (start, end) = self.range()?;
if end - start == 0 {
return Ok(());
}
idx.seek(start as u64)?;
for r in idx.byte_records().take(end - start) {
wtr.write_byte_record(&r?)?;
}
wtr.flush()?;
Ok(())
}
fn range(&self) -> Result<(usize, usize), String> {
let mut start = None;
if let Some(start_arg) = self.flag_start {
if start_arg < 0 {
start = Some(
(util::count_rows(&self.rconfig()).unwrap() as usize)
.abs_diff(start_arg.unsigned_abs()),
);
} else {
start = Some(start_arg as usize);
}
}
util::range(start, self.flag_end, self.flag_len, self.flag_index)
}
fn rconfig(&self) -> Config {
Config::new(&self.arg_input)
.checkutf8(false)
.delimiter(self.flag_delimiter)
.no_headers(self.flag_no_headers)
}
fn wconfig(&self) -> Config {
Config::new(&self.flag_output)
}
}