Skip to content

Commit

Permalink
move transform fn to util as a general helper fn
Browse files Browse the repository at this point in the history
- as its also used by sort for case-insensitive compare
  • Loading branch information
jqnatividad committed Dec 19, 2022
1 parent 5613f65 commit 8acaa58
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 23 deletions.
31 changes: 8 additions & 23 deletions src/cmd/join.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,11 @@ use crate::{
config::{Config, Delimiter, SeekRead},
index::Indexed,
select::{SelectColumns, Selection},
util, CliResult,
util,
util::ByteString,
CliResult,
};

pub type ByteString = Vec<u8>;

#[derive(Deserialize)]
struct Args {
arg_columns1: SelectColumns,
Expand Down Expand Up @@ -408,7 +408,10 @@ impl<R: io::Read + io::Seek> ValueIndex<R> {
// indexes in one pass.
row_idx.write_u64::<BigEndian>(row.position().unwrap().byte())?;

let fields: Vec<_> = sel.select(&row).map(|v| transform(v, casei)).collect();
let fields: Vec<_> = sel
.select(&row)
.map(|v| util::transform(v, casei))
.collect();
if nulls || !fields.iter().any(std::vec::Vec::is_empty) {
match val_idx.entry(fields) {
Entry::Vacant(v) => {
Expand Down Expand Up @@ -454,23 +457,5 @@ impl<R> fmt::Debug for ValueIndex<R> {

#[inline]
fn get_row_key(sel: &Selection, row: &csv::ByteRecord, casei: bool) -> Vec<ByteString> {
sel.select(row).map(|v| transform(v, casei)).collect()
}

#[inline]
pub fn transform(bs: &[u8], casei: bool) -> ByteString {
if let Ok(s) = str::from_utf8(bs) {
if casei {
let norm: String = s
.trim()
.chars()
.map(|c| c.to_lowercase().next().unwrap())
.collect();
norm.into_bytes()
} else {
s.trim().as_bytes().to_vec()
}
} else {
bs.to_vec()
}
sel.select(row).map(|v| util::transform(v, casei)).collect()
}
20 changes: 20 additions & 0 deletions src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -892,3 +892,23 @@ impl ColumnNameParser {
name
}
}

pub type ByteString = Vec<u8>;

#[inline]
pub fn transform(bs: &[u8], casei: bool) -> ByteString {
if let Ok(s) = str::from_utf8(bs) {
if casei {
let norm: String = s
.trim()
.chars()
.map(|c| c.to_lowercase().next().unwrap())
.collect();
norm.into_bytes()
} else {
s.trim().as_bytes().to_vec()
}
} else {
bs.to_vec()
}
}

0 comments on commit 8acaa58

Please sign in to comment.