forked from dathere/qsv
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcount.rs
131 lines (112 loc) · 3.91 KB
/
count.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
static USAGE: &str = r#"
Prints a count of the number of records in the CSV data.
Note that the count will not include the header row (unless --no-headers is
given).
For examples, see https://github.com/jqnatividad/qsv/blob/master/tests/test_count.rs.
Usage:
qsv count [options] [<input>]
qsv count --help
count options:
-H, --human-readable Comma separate row count.
--width Also return the length of the longest record.
The count and width are separated by a semicolon.
Common options:
-h, --help Display this message
-f, --flexible Do not validate if the CSV has different number of
fields per record, increasing performance when counting
without an index. Automatically enabled when --width is set.
-n, --no-headers When set, the first row will be included in
the count.
"#;
use log::info;
use serde::Deserialize;
use crate::{config::Config, util, CliResult};
#[derive(Deserialize)]
struct Args {
arg_input: Option<String>,
flag_human_readable: bool,
flag_width: bool,
flag_flexible: bool,
flag_no_headers: bool,
}
pub fn run(argv: &[&str]) -> CliResult<()> {
let args: Args = util::get_args(USAGE, argv)?;
let conf = Config::new(&args.arg_input)
.no_headers(args.flag_no_headers)
// we also want to count the quotes when computing width
.quoting(!args.flag_width)
// and ignore differing column counts as well
.flexible(args.flag_width || args.flag_flexible);
// this comment left here for Logging.md example
// log::debug!(
// "input: {:?}, no_header: {}",
// (args.arg_input).clone().unwrap(),
// &args.flag_no_headers,
// );
let (count, width) = if args.flag_width {
count_input(&conf, args.flag_width)?
} else {
match conf.indexed().unwrap_or_else(|_| {
info!("index is stale");
None
}) {
Some(idx) => {
info!("index used");
(idx.count(), 0)
},
None => count_input(&conf, args.flag_width)?,
}
};
if args.flag_human_readable {
use thousands::Separable;
if args.flag_width {
woutinfo!(
"{};{}",
count.separate_with_commas(),
width.separate_with_commas()
);
} else {
woutinfo!("{}", count.separate_with_commas());
}
} else if args.flag_width {
woutinfo!("{count};{width}");
} else {
woutinfo!("{count}");
}
Ok(())
}
fn count_input(
conf: &Config,
compute_width: bool,
) -> Result<(u64, usize), crate::clitypes::CliError> {
let mut rdr = conf.reader()?;
let mut count = 0_u64;
let mut max_width = 0_usize;
let mut record_numdelimiters = 0_usize;
let mut record = csv::ByteRecord::new();
if compute_width {
let mut curr_width;
// read the first record to get the number of delimiters
// and the width of the first record
rdr.read_byte_record(&mut record)?;
max_width = record.as_slice().len();
count = 1;
// number of delimiters is number of fields minus 1
// we subtract 1 because the last field doesn't have a delimiter
record_numdelimiters = record.len().saturating_sub(1);
while rdr.read_byte_record(&mut record)? {
count += 1;
curr_width = record.as_slice().len();
if curr_width > max_width {
max_width = curr_width;
}
}
} else {
while rdr.read_byte_record(&mut record)? {
count += 1;
}
}
// record_numdelimiters is a count of the delimiters
// which we also want to count when returning width
Ok((count, max_width + record_numdelimiters))
}