forked from dathere/qsv
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate.rs
143 lines (115 loc) · 4.5 KB
/
generate.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
static USAGE: &str = r#"
Generates test data by profiling a CSV using a Markov decision process
machine learning algorithm.
Also allows you to create and use Data Sample Parser (DSP) profiles
to generate test data without access to the original profiled CSV.
See https://docs.rs/test-data-generation/ for more info.
Examples:
Generate 100 rows of test data based on prod-data.csv and
save it in testdata.csv.
$ qsv generate --rows 100 prod-data.csv > testdata.csv
Generate 100 rows based on prod-data.csv and save it to a
file named testdata.csv. Also create a DSP profile named
prod-profile (which is saved as prod-profile.json in the file system)
$ qsv generate -r 100 prod-data.csv --outdsp prod-profile --output testdata.csv
Generate 100 rows based on an existing DSP profile (prod-profile.json)
and save it to testdata.csv
$ qsv generate -r 100 --indsp prod-profile > testdata.csv
Create a DSP profile (prod-profile.json) based on prod-data.csv.
$ qsv generate prod-data.csv --outdsp prod-profile
Usage:
qsv generate [options] [--rows=<count>] <input>
qsv generate [options] [--rows=<count>] (--indsp=<file>)
qsv generate [options] (--outdsp=<file>) [<input>]
qsv generate --help
generate options:
-r, --rows=<count> Number of rows of test data to generate.
[default: 0]
--outdsp <file> Create a Data Sample Parser (DSP) JSON file
based on the <input> file.
.json file extension automatically added.
--indsp <file> Use a DSP JSON file to generate test data.
.json file extension assumed.
Common options:
-h, --help Display this message
-o, --output <file> Write generated output to <file>.
-d, --delimiter <arg> The field delimiter for reading CSV data.
Must be a single character. (default: ,)
"#;
use std::{
env::temp_dir,
fs,
io::{self, Write},
};
use serde::Deserialize;
use test_data_generation::data_sample_parser::DataSampleParser;
use uuid::Uuid;
use crate::{
config::{Config, Delimiter},
util, CliResult,
};
#[derive(Deserialize)]
struct Args {
arg_input: Option<String>,
flag_rows: u32,
flag_output: Option<String>,
flag_outdsp: Option<String>,
flag_indsp: Option<String>,
flag_delimiter: Option<Delimiter>,
}
pub fn run(argv: &[&str]) -> CliResult<()> {
let args: Args = util::get_args(USAGE, argv)?;
let conf = Config::new(&args.arg_input).delimiter(args.flag_delimiter);
let tdir = temp_dir();
let mut dsp = DataSampleParser::new();
if let Some(indsp) = args.flag_indsp {
// use an existing DSP JSON file, no need to read input CSV
dsp = DataSampleParser::from_file(&indsp);
} else {
// create a DSP profile from the input CSV
let mut rdr = conf.reader()?;
let in_fname = format!("{}.csv", Uuid::new_v4());
let in_fpath = tdir.join(in_fname);
let analyze_csv_path = in_fpath
.clone()
.into_os_string()
.into_string()
.unwrap_or_default();
let headers = rdr.byte_headers()?;
let mut wtr = csv::Writer::from_path(in_fpath)?;
wtr.write_byte_record(headers)?;
let mut record = csv::ByteRecord::new();
while rdr.read_byte_record(&mut record)? {
wtr.write_byte_record(&record)?;
}
wtr.flush()?;
dsp.analyze_csv_file(&analyze_csv_path, None).unwrap();
// --outdsp option invoked. Save DSP JSON file that we
// can use later with --indsp option to generate test data
// without expensive test data profiling
if let Some(outdsp) = args.flag_outdsp {
dsp.save(&outdsp)?;
};
drop(wtr);
}
if args.flag_rows > 0 {
let mut send_to_stdout: bool = false;
let testdata_out = if let Some(path) = args.flag_output {
path
} else {
send_to_stdout = true;
let fname = format!("{}.csv", Uuid::new_v4());
let fpath = tdir.join(fname);
fpath.into_os_string().into_string().unwrap_or_default()
};
dsp.generate_csv(args.flag_rows, &testdata_out, Some(conf.get_delimiter()))
.unwrap();
if send_to_stdout {
let testdata = std::fs::read(&testdata_out)?;
io::stdout().write_all(&testdata)?;
io::stdout().flush()?;
fs::remove_file(&testdata_out)?;
}
}
Ok(())
}