-
Notifications
You must be signed in to change notification settings - Fork 70
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #3 from mintyplanet/transpose
merge Transpose PR
- Loading branch information
Showing
5 changed files
with
147 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,3 +18,4 @@ pub mod sort; | |
pub mod split; | ||
pub mod stats; | ||
pub mod table; | ||
pub mod transpose; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
use config::{Config, Delimiter}; | ||
use csv::ByteRecord; | ||
use std::str; | ||
use util; | ||
use CliResult; | ||
|
||
static USAGE: &'static str = " | ||
Transpose the rows/columns of CSV data. | ||
Note that by default this reads all of the CSV data into memory, | ||
unless --multipass is given. | ||
Usage: | ||
xsv transpose [options] [<input>] | ||
transpose options: | ||
-m, --multipass Process the transpose by making multiple | ||
passes over the dataset. Useful for really | ||
big datasets. Consumes memory relative to | ||
the number of rows. | ||
Note that in general it is faster to | ||
process the transpose in memory. | ||
Common options: | ||
-h, --help Display this message | ||
-o, --output <file> Write output to <file> instead of stdout. | ||
-d, --delimiter <arg> The field delimiter for reading CSV data. | ||
Must be a single character. (default: ,) | ||
"; | ||
|
||
#[derive(Deserialize)] | ||
struct Args { | ||
arg_input: Option<String>, | ||
flag_output: Option<String>, | ||
flag_delimiter: Option<Delimiter>, | ||
flag_multipass: bool, | ||
} | ||
|
||
pub fn run(argv: &[&str]) -> CliResult<()> { | ||
let args: Args = util::get_args(USAGE, argv)?; | ||
|
||
let input_is_stdin = match args.arg_input { | ||
Some(ref s) if s == "-" => true, | ||
None => true, | ||
_ => false, | ||
}; | ||
|
||
if args.flag_multipass && !input_is_stdin { | ||
args.multipass_transpose() | ||
} else { | ||
args.in_memory_transpose() | ||
} | ||
} | ||
|
||
impl Args { | ||
fn in_memory_transpose(&self) -> CliResult<()> { | ||
let mut rdr = self.rconfig().reader()?; | ||
let mut wtr = self.wconfig().writer()?; | ||
let nrows = rdr.byte_headers()?.len(); | ||
|
||
let all = rdr.byte_records().collect::<Result<Vec<_>, _>>()?; | ||
for i in 0..nrows { | ||
let mut record = ByteRecord::new(); | ||
|
||
for row in all.iter() { | ||
record.push_field(&row[i]); | ||
} | ||
wtr.write_byte_record(&record)?; | ||
} | ||
Ok(wtr.flush()?) | ||
} | ||
|
||
fn multipass_transpose(&self) -> CliResult<()> { | ||
let mut wtr = self.wconfig().writer()?; | ||
let nrows = self.rconfig().reader()?.byte_headers()?.len(); | ||
|
||
for i in 0..nrows { | ||
let mut rdr = self.rconfig().reader()?; | ||
|
||
let mut record = ByteRecord::new(); | ||
for row in rdr.byte_records() { | ||
record.push_field(&row?[i]); | ||
} | ||
wtr.write_byte_record(&record)?; | ||
} | ||
Ok(wtr.flush()?) | ||
} | ||
|
||
fn wconfig(&self) -> Config { | ||
Config::new(&self.flag_output) | ||
} | ||
|
||
fn rconfig(&self) -> Config { | ||
Config::new(&self.arg_input) | ||
.delimiter(self.flag_delimiter) | ||
.no_headers(true) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
use workdir::Workdir; | ||
|
||
use {CsvData, qcheck}; | ||
|
||
fn prop_transpose(name: &str, rows: CsvData, streaming: bool) -> bool { | ||
let wrk = Workdir::new(name); | ||
wrk.create("in.csv", rows.clone()); | ||
|
||
let mut cmd = wrk.command("transpose"); | ||
cmd.arg("in.csv"); | ||
if streaming { cmd.arg("--multipass"); } | ||
|
||
let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd); | ||
|
||
let mut expected = vec![]; | ||
|
||
let nrows = rows.len(); | ||
let ncols = if !rows.is_empty() {rows[0].len() } else {0}; | ||
|
||
for i in 0..ncols { | ||
let mut expected_row = vec![]; | ||
for j in 0..nrows { | ||
expected_row.push(rows[j][i].to_owned()); | ||
} | ||
expected.push(expected_row); | ||
} | ||
rassert_eq!(got, expected) | ||
} | ||
|
||
#[test] | ||
fn prop_transpose_in_memory() { | ||
fn p(rows: CsvData) -> bool { | ||
prop_transpose("prop_transpose_in_memory", rows, false) | ||
} | ||
qcheck(p as fn(CsvData) -> bool); | ||
} | ||
|
||
#[test] | ||
fn prop_transpose_multipass() { | ||
fn p(rows: CsvData) -> bool { | ||
prop_transpose("prop_transpose_multipass", rows, true) | ||
} | ||
qcheck(p as fn(CsvData) -> bool); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters