Skip to content

Commit

Permalink
Merge pull request #3 from mintyplanet/transpose
Browse files Browse the repository at this point in the history
merge Transpose PR
  • Loading branch information
jqnatividad authored Dec 27, 2020
2 parents 33bd6bd + 9c4bf45 commit 3fd2edd
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/cmd/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ pub mod sort;
pub mod split;
pub mod stats;
pub mod table;
pub mod transpose;
98 changes: 98 additions & 0 deletions src/cmd/transpose.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
use config::{Config, Delimiter};
use csv::ByteRecord;
use std::str;
use util;
use CliResult;

static USAGE: &'static str = "
Transpose the rows/columns of CSV data.
Note that by default this reads all of the CSV data into memory,
unless --multipass is given.
Usage:
xsv transpose [options] [<input>]
transpose options:
-m, --multipass Process the transpose by making multiple
passes over the dataset. Useful for really
big datasets. Consumes memory relative to
the number of rows.
Note that in general it is faster to
process the transpose in memory.
Common options:
-h, --help Display this message
-o, --output <file> Write output to <file> instead of stdout.
-d, --delimiter <arg> The field delimiter for reading CSV data.
Must be a single character. (default: ,)
";

#[derive(Deserialize)]
struct Args {
arg_input: Option<String>,
flag_output: Option<String>,
flag_delimiter: Option<Delimiter>,
flag_multipass: bool,
}

pub fn run(argv: &[&str]) -> CliResult<()> {
let args: Args = util::get_args(USAGE, argv)?;

let input_is_stdin = match args.arg_input {
Some(ref s) if s == "-" => true,
None => true,
_ => false,
};

if args.flag_multipass && !input_is_stdin {
args.multipass_transpose()
} else {
args.in_memory_transpose()
}
}

impl Args {
fn in_memory_transpose(&self) -> CliResult<()> {
let mut rdr = self.rconfig().reader()?;
let mut wtr = self.wconfig().writer()?;
let nrows = rdr.byte_headers()?.len();

let all = rdr.byte_records().collect::<Result<Vec<_>, _>>()?;
for i in 0..nrows {
let mut record = ByteRecord::new();

for row in all.iter() {
record.push_field(&row[i]);
}
wtr.write_byte_record(&record)?;
}
Ok(wtr.flush()?)
}

fn multipass_transpose(&self) -> CliResult<()> {
let mut wtr = self.wconfig().writer()?;
let nrows = self.rconfig().reader()?.byte_headers()?.len();

for i in 0..nrows {
let mut rdr = self.rconfig().reader()?;

let mut record = ByteRecord::new();
for row in rdr.byte_records() {
record.push_field(&row?[i]);
}
wtr.write_byte_record(&record)?;
}
Ok(wtr.flush()?)
}

fn wconfig(&self) -> Config {
Config::new(&self.flag_output)
}

fn rconfig(&self) -> Config {
Config::new(&self.arg_input)
.delimiter(self.flag_delimiter)
.no_headers(true)
}
}
3 changes: 3 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ macro_rules! command_list {
split Split CSV data into many files
stats Compute basic statistics
table Align CSV data into columns
transpose Transpose rows/columns of CSV data
"
)
}
Expand Down Expand Up @@ -161,6 +162,7 @@ enum Command {
Split,
Stats,
Table,
Transpose,
}

impl Command {
Expand Down Expand Up @@ -196,6 +198,7 @@ impl Command {
Command::Split => cmd::split::run(argv),
Command::Stats => cmd::stats::run(argv),
Command::Table => cmd::table::run(argv),
Command::Transpose => cmd::transpose::run(argv),
}
}
}
Expand Down
44 changes: 44 additions & 0 deletions tests/test_transpose.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
use workdir::Workdir;

use {CsvData, qcheck};

fn prop_transpose(name: &str, rows: CsvData, streaming: bool) -> bool {
let wrk = Workdir::new(name);
wrk.create("in.csv", rows.clone());

let mut cmd = wrk.command("transpose");
cmd.arg("in.csv");
if streaming { cmd.arg("--multipass"); }

let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);

let mut expected = vec![];

let nrows = rows.len();
let ncols = if !rows.is_empty() {rows[0].len() } else {0};

for i in 0..ncols {
let mut expected_row = vec![];
for j in 0..nrows {
expected_row.push(rows[j][i].to_owned());
}
expected.push(expected_row);
}
rassert_eq!(got, expected)
}

#[test]
fn prop_transpose_in_memory() {
fn p(rows: CsvData) -> bool {
prop_transpose("prop_transpose_in_memory", rows, false)
}
qcheck(p as fn(CsvData) -> bool);
}

#[test]
fn prop_transpose_multipass() {
fn p(rows: CsvData) -> bool {
prop_transpose("prop_transpose_multipass", rows, true)
}
qcheck(p as fn(CsvData) -> bool);
}
1 change: 1 addition & 0 deletions tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ mod test_sort;
mod test_split;
mod test_stats;
mod test_table;
mod test_transpose;

fn qcheck<T: Testable>(p: T) {
QuickCheck::new().gen(StdGen::new(thread_rng(), 5)).quickcheck(p);
Expand Down

0 comments on commit 3fd2edd

Please sign in to comment.