Skip to content

Commit

Permalink
Merge branch 'refs/heads/perf-improvments'
Browse files Browse the repository at this point in the history
  • Loading branch information
redindelible committed Apr 27, 2024
2 parents 11f8483 + d0a125e commit 4b360f2
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 9 deletions.
51 changes: 46 additions & 5 deletions dataframe/src/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ impl DataFrame {
Shape { rows: self.rows, cols: self.columns.len() }
}

pub fn hint_rows(&mut self, rows: usize) {
for col in &mut self.columns {
col.hint_rows(rows);
}
}

pub fn hint_complete(&mut self) {
for col in &mut self.columns {
col.hint_complete();
Expand Down Expand Up @@ -180,6 +186,12 @@ impl<D: ColumnData> ColumnInternal for GenericColumn<D> {
}

impl<D: ColumnData> ColumnMutInternal for GenericColumn<D> {
fn hint_rows(&mut self, rows: usize) {
if rows > self.items.len() {
self.items.reserve(rows - self.items.len());
}
}

fn hint_complete(&mut self) {
self.items.shrink_to_fit();
}
Expand All @@ -199,26 +211,55 @@ pub(crate) enum ColumnVariants {

impl Column for ColumnVariants {
fn name(&self) -> &str { self.deref().name() }
fn len(&self) -> usize { self.deref().len() }
fn len(&self) -> usize {
match self {
ColumnVariants::Integer(col) => col.len(),
ColumnVariants::Enum(col) => col.len(),
ColumnVariants::Float(col) => col.len()
}
}
fn data_type(&self) -> DataType { self.deref().data_type() }
fn get_row_data(&self, index: usize) -> Data { self.deref().get_row_data(index) }
fn get_row_data(&self, index: usize) -> Data {
match self {
ColumnVariants::Integer(col) => col.get_row_data(index),
ColumnVariants::Enum(col) => col.get_row_data(index),
ColumnVariants::Float(col) => col.get_row_data(index)
}
}

fn compare(&self, a: usize, b: usize) -> Ordering {
self.deref().compare(a, b)
match self {
ColumnVariants::Integer(col) => col.compare(a, b),
ColumnVariants::Enum(col) => col.compare(a, b),
ColumnVariants::Float(col) => col.compare(a, b)
}
}
}

impl ColumnMut for ColumnVariants {
fn set_row_data(&mut self, index: usize, data: &Data) { self.deref_mut().set_row_data(index, data) }
fn set_row_data(&mut self, index: usize, data: &Data) {
match self {
ColumnVariants::Integer(col) => col.set_row_data(index, data),
ColumnVariants::Enum(col) => col.set_row_data(index, data),
ColumnVariants::Float(col) => col.set_row_data(index, data)
}
}
}

impl ColumnInternal for ColumnVariants {
fn underlying_rows(&self) -> usize { self.deref().underlying_rows() }
}

impl ColumnMutInternal for ColumnVariants {
fn hint_rows(&mut self, rows: usize) { self.deref_mut().hint_rows(rows) }
fn hint_complete(&mut self) { self.deref_mut().hint_complete() }
fn push_data(&mut self, item: &Data) { self.deref_mut().push_data(item) }
fn push_data(&mut self, item: &Data) {
match self {
ColumnVariants::Integer(col) => col.push_data(item),
ColumnVariants::Enum(col) => col.push_data(item),
ColumnVariants::Float(col) => col.push_data(item),
}
}
}

impl Deref for ColumnVariants {
Expand Down
1 change: 1 addition & 0 deletions dataframe/src/column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ pub trait ColumnMut: Column {
}

pub(crate) trait ColumnMutInternal: ColumnMut + ColumnInternal {
fn hint_rows(&mut self, rows: usize);
fn hint_complete(&mut self);
fn push_data(&mut self, item: &Data);
}
4 changes: 2 additions & 2 deletions dataframe/src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@ pub trait ColumnData: Copy + Eq + 'static {

#[derive(Copy, Clone, Default, Debug)]
pub enum Data<'a> {
#[default]
Null,
Integer(i64),
Str(&'a str),
Float(f64),
#[default]
Null
}

impl<'a> Data<'a> {
Expand Down
4 changes: 4 additions & 0 deletions dataframe/src/view.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ impl<'a, C: ColumnInternal> ColumnInternal for ColumnViewMut<'a, C> {
}

impl<'a, C: ColumnMutInternal> ColumnMutInternal for ColumnViewMut<'a, C> {
fn hint_rows(&mut self, rows: usize) {
self.1.hint_rows(rows);
}

fn hint_complete(&mut self) { self.1.hint_complete() }

fn push_data(&mut self, item: &Data) {
Expand Down
7 changes: 6 additions & 1 deletion launch_file/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,18 +95,23 @@ impl LogFormat {
Ok(format)
}

pub fn read_file(&self, file: &mut impl Read, mut on_row_callback: impl FnMut(u64)) -> io::Result<DataFrame> {
pub fn read_file(&self, file: &mut impl Read, file_size: Option<u64>, mut on_row_callback: impl FnMut(u64)) -> io::Result<DataFrame> {
let mut dataframe = DataFrame::new();
dataframe.add_null_col("sensor", DataType::Enum);
dataframe.add_null_col("timestamp", DataType::Integer);

let mut variants: HashMap<u32, (String, Deserializer)> = HashMap::new();
let mut smallest = usize::MAX;
for (name, (disc, format)) in &self.variants {
let mut builder = DeserializerBuilder::new(&mut dataframe);
format.to_fast(&mut builder, name);
let fast_format = builder.finish();
smallest = smallest.min(fast_format.size).max(1);
variants.insert(*disc, (name.clone(), fast_format));
}
if let Some(file_size) = file_size {
dataframe.hint_rows((file_size / (smallest as u64 + 8)) as usize);
}

let num_cols = dataframe.shape().cols;

Expand Down
2 changes: 1 addition & 1 deletion src/import.rs
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ impl ImportLaunchTab {
let mut file = BufReader::new(File::open(source_path)?);
let size: u64 = file.get_ref().metadata().map_or(0, |m| m.len());

format.read_file(&mut file, |offset| {
format.read_file(&mut file, Some(size), |offset| {
progress.set(offset as f32 / size as f32);
})
}));
Expand Down

0 comments on commit 4b360f2

Please sign in to comment.