forked from dathere/qsv
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcount.rs
117 lines (101 loc) · 3.24 KB
/
count.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
static USAGE: &str = r#"
Prints a count of the number of records in the CSV data.
Note that the count will not include the header row (unless --no-headers is
given).
For examples, see https://github.com/jqnatividad/qsv/blob/master/tests/test_count.rs.
Usage:
qsv count [options] [<input>]
qsv count --help
count options:
-H, --human-readable Comma separate row count.
--width Also return the length of the longest record.
The count and width are separated by a semicolon.
Common options:
-h, --help Display this message
-n, --no-headers When set, the first row will be included in
the count.
"#;
use log::info;
use serde::Deserialize;
use crate::{config::Config, util, CliResult};
#[derive(Deserialize)]
struct Args {
arg_input: Option<String>,
flag_human_readable: bool,
flag_width: bool,
flag_no_headers: bool,
}
pub fn run(argv: &[&str]) -> CliResult<()> {
let args: Args = util::get_args(USAGE, argv)?;
let conf = Config::new(&args.arg_input)
.no_headers(args.flag_no_headers)
// we also want to count the quotes when computing width
.quoting(!args.flag_width)
// and ignore differing column counts as well
.flexible(args.flag_width);
// this comment left here for Logging.md example
// log::debug!(
// "input: {:?}, no_header: {}",
// (args.arg_input).clone().unwrap(),
// &args.flag_no_headers,
// );
let (count, width) = if args.flag_width {
count_input(&conf, args.flag_width)?
} else {
match conf.indexed().unwrap_or_else(|_| {
info!("index is stale");
None
}) {
Some(idx) => {
info!("index used");
(idx.count(), 0)
}
None => count_input(&conf, args.flag_width)?,
}
};
if args.flag_human_readable {
use thousands::Separable;
if args.flag_width {
woutinfo!(
"{};{}",
count.separate_with_commas(),
width.separate_with_commas()
);
} else {
woutinfo!("{}", count.separate_with_commas());
}
} else if args.flag_width {
woutinfo!("{count};{width}");
} else {
woutinfo!("{count}");
}
Ok(())
}
fn count_input(
conf: &Config,
compute_width: bool,
) -> Result<(u64, usize), crate::clitypes::CliError> {
info!("counting...");
let mut rdr = conf.reader()?;
let mut count = 0_u64;
let mut max_width = 0_usize;
let mut record_numfields = 0_usize;
let mut record = csv::ByteRecord::new();
if compute_width {
while rdr.read_byte_record(&mut record)? {
count += 1;
let curr_width = record.as_slice().len();
if curr_width > max_width {
record_numfields = record.len();
max_width = curr_width;
}
}
} else {
while rdr.read_byte_record(&mut record)? {
count += 1;
}
}
// record_numfields is a count of the delimiters
// which we also want to count when returning width
Ok((count, max_width + record_numfields))
}