Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Histogram diff #289

Merged
merged 27 commits into from
Oct 11, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
speed up file loading
  • Loading branch information
gcanat committed Oct 2, 2024
commit d28b97800e54f5b2fc7e7662f307c302a76c5b92
2 changes: 1 addition & 1 deletion text/diff_util/dir_data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ pub struct DirData {
impl DirData {
pub fn load(path: PathBuf) -> io::Result<DirData> {
let mut dir_data = DirData {
path: path,
path,
files: Default::default(),
};
let entries = fs::read_dir(&dir_data.path)?;
Expand Down
81 changes: 44 additions & 37 deletions text/diff_util/file_data.rs
Original file line number Diff line number Diff line change
@@ -1,68 +1,45 @@
use std::{
fs::File,
io::{self, Read},
io,
mem::take,
path::PathBuf,
str::from_utf8,
time::SystemTime,
};

use super::constants::COULD_NOT_UNWRAP_FILENAME;
use plib::BUFSZ;

#[derive(Debug)]
pub struct FileData {
pub struct FileData<'a> {
path: PathBuf,
lines: Vec<String>,
lines: Vec<&'a str>,
modified: SystemTime,
ends_with_newline: bool,
}

impl FileData {
impl<'a> FileData<'a> {
pub fn ends_with_newline(&self) -> bool {
self.ends_with_newline
}

pub fn get_file(path: PathBuf) -> io::Result<Self> {
let mut file = File::open(path.clone())?;
pub fn get_file(path: PathBuf, lines: Vec<&'a str>) -> io::Result<Self> {
let file = File::open(&path)?;
let modified = file.metadata()?.modified()?;
let mut buffer = [0_u8; BUFSZ];
// let mut read_length: usize = 0;
let mut content = String::new();

loop {
let n = file.read(&mut buffer).expect("Couldn't read file");
if n == 0 {
break;
}
let string_slice =
std::str::from_utf8(&buffer[..n]).expect("Couldn't convert to string");
content.push_str(string_slice);
}
let mut lines = content
.split("\n")
.map(|line| line.to_string())
.collect::<Vec<String>>();

let ends_with_newline = content.ends_with('\n');

if ends_with_newline {
lines.push(String::from(""));
}

let result = Self {
Ok(Self {
path,
lines,
modified,
ends_with_newline,
};

Ok(result)
// FIXME: properly detect if file ends with newline
ends_with_newline: false,
})
}

pub fn lines(&self) -> &Vec<String> {
pub fn lines(&self) -> &Vec<&str> {
&self.lines
}

pub fn line(&self, index: usize) -> &String {
pub fn line(&self, index: usize) -> &str {
&self.lines[index]
}

Expand All @@ -84,3 +61,33 @@ impl FileData {
self.path.to_str().unwrap_or(&COULD_NOT_UNWRAP_FILENAME)
}
}

pub struct LineReader<'a> {
pub content: &'a [u8],
}

impl<'a> Iterator for LineReader<'a> {
type Item = &'a str;

fn next(&mut self) -> Option<Self::Item> {
let mut carriage = false;
let mut iter = self.content.iter().enumerate();
let mut line_len = loop {
match iter.next() {
Some((i, b'\n')) => break i + 1,
None => {
return (!self.content.is_empty()).then(|| {
from_utf8(take(&mut self.content)).expect("Failed to convert to str")
});
}
Some((_, &it)) => carriage = it == b'\r',
}
};
let (line, rest) = self.content.split_at(line_len);
if carriage {
line_len -= 1;
}
self.content = rest;
Some(from_utf8(&line[..line_len - 1]).expect("Failed to convert to str"))
}
}
35 changes: 23 additions & 12 deletions text/diff_util/file_diff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use super::{
common::{FormatOptions, OutputFormat},
constants::COULD_NOT_UNWRAP_FILENAME,
diff_exit_status::DiffExitStatus,
file_data::FileData,
file_data::{FileData, LineReader},
functions::{check_existance, is_binary, system_time_to_rfc2822},
hunks::Hunks,
};
Expand All @@ -14,16 +14,16 @@ use crate::diff_util::{

use std::{
collections::HashMap,
fs::File,
fs::{File, read_to_string},
io::{self, BufReader, Read},
os::unix::fs::MetadataExt,
path::PathBuf,
};

#[derive(Debug)]
pub struct FileDiff<'a> {
file1: &'a mut FileData,
file2: &'a mut FileData,
file1: &'a mut FileData<'a>,
file2: &'a mut FileData<'a>,
hunks: Hunks,
format_options: &'a FormatOptions,
are_different: bool,
Expand All @@ -35,8 +35,8 @@ impl<'a> FileDiff<'a> {
}

fn new(
file1: &'a mut FileData,
file2: &'a mut FileData,
file1: &'a mut FileData<'a>,
file2: &'a mut FileData<'a>,
format_options: &'a FormatOptions,
) -> Self {
if format_options.label1.is_none() && format_options.label2.is_some() {
Expand All @@ -61,8 +61,19 @@ impl<'a> FileDiff<'a> {
if is_binary(&path1)? || is_binary(&path2)? {
return Self::binary_file_diff(&path1, &path2);
} else {
let mut file1 = FileData::get_file(path1)?;
let mut file2 = FileData::get_file(path2)?;
let content1 = read_to_string(&path1)?.into_bytes();
let mut lines1 = Vec::new();
for line in (LineReader{ content: &content1 }) {
lines1.push(line);
}

let content2 = read_to_string(&path2)?.into_bytes();
let mut lines2 = Vec::new();
for line in (LineReader{ content: &content2 }) {
lines2.push(line);
}
let mut file1 = FileData::get_file(path1, lines1)?;
let mut file2 = FileData::get_file(path2, lines2)?;

let mut diff = FileDiff::new(&mut file1, &mut file2, format_options);

Expand Down Expand Up @@ -233,19 +244,19 @@ impl<'a> FileDiff<'a> {
// build histogram
let mut hist: HashMap<&str, Vec<i32>> = HashMap::new();
for i in x0..x1 {
if let Some(rec) = hist.get_mut(file1.line(i).as_str()) {
if let Some(rec) = hist.get_mut(file1.line(i)) {
rec[0] += 1_i32;
rec[1] = i as i32;
} else {
hist.insert(file1.line(i).as_str(), vec![1, i as i32, 0, -1]);
hist.insert(file1.line(i), vec![1, i as i32, 0, -1]);
}
}
for i in y0..y1 {
if let Some(rec) = hist.get_mut(file2.line(i).as_str()) {
if let Some(rec) = hist.get_mut(file2.line(i)) {
rec[2] += 1_i32;
rec[3] = i as i32;
} else {
hist.insert(file2.line(i).as_str(), vec![0, -1, 1, i as i32]);
hist.insert(file2.line(i), vec![0, -1, 1, i as i32]);
}
}

Expand Down