Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cache for broken files #204

Merged
merged 1 commit into from
Jan 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion czkawka_cli/src/commands.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ pub struct AllowedExtensions {
short = "x",
long,
help = "Allowed file extension(s)",
long_help = "List of checked files with provided extension(s). There are also helpful macros which allow to easy use a typical extensions like:\nIMAGE(\"jpg,kra,gif,png,bmp,tiff,webp,hdr,svg\"),\nTEXT(\"txt,doc,docx,odt,rtf\"),\nVIDEO(\"mp4,flv,mkv,webm,vob,ogv,gifv,avi,mov,wmv,mpg,m4v,m4p,mpeg,3gp\") or\nMUSIC(\"mp3,flac,ogg,tta,wma,webm\")\n "
long_help = "List of checked files with provided extension(s). There are also helpful macros which allow to easy use a typical extensions like:\nIMAGE(\"jpg,kra,gif,png,bmp,tiff,hdr,svg\"),\nTEXT(\"txt,doc,docx,odt,rtf\"),\nVIDEO(\"mp4,flv,mkv,webm,vob,ogv,gifv,avi,mov,wmv,mpg,m4v,m4p,mpeg,3gp\") or\nMUSIC(\"mp3,flac,ogg,tta,wma,webm\")\n "
)]
pub allowed_extensions: Vec<String>,
}
Expand Down
234 changes: 202 additions & 32 deletions czkawka_core/src/broken_files.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::fs::{File, Metadata};
use std::fs::{File, Metadata, OpenOptions};
use std::io::prelude::*;
use std::path::PathBuf;
use std::path::{Path, PathBuf};
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use std::{fs, thread};

Expand All @@ -11,12 +11,16 @@ use crate::common_items::ExcludedItems;
use crate::common_messages::Messages;
use crate::common_traits::*;
use crossbeam_channel::Receiver;
use directories_next::ProjectDirs;
use rayon::prelude::*;
use std::io::BufWriter;
use std::collections::HashMap;
use std::io::{BufReader, BufWriter};
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::sync::Arc;
use std::thread::sleep;

const CACHE_FILE_NAME: &str = "cache_broken_files.txt";

#[derive(Debug)]
pub struct ProgressData {
pub current_stage: u8,
Expand All @@ -35,13 +39,15 @@ pub enum DeleteMethod {
pub struct FileEntry {
pub path: PathBuf,
pub modified_date: u64,
pub size: u64,
pub type_of_file: TypeOfFile,
pub error_string: String,
}

#[derive(Clone, PartialEq, Eq)]
#[derive(Copy, Clone, PartialEq, Eq)]
pub enum TypeOfFile {
Image,
Unknown = -1,
Image = 0,
}

/// Info struck with helpful information's about results
Expand All @@ -61,7 +67,7 @@ impl Info {
pub struct BrokenFiles {
text_messages: Messages,
information: Info,
files_to_check: Vec<FileEntry>,
files_to_check: HashMap<String, FileEntry>,
broken_files: Vec<FileEntry>,
directories: Directories,
allowed_extensions: Extensions,
Expand All @@ -80,10 +86,10 @@ impl BrokenFiles {
allowed_extensions: Extensions::new(),
directories: Directories::new(),
excluded_items: ExcludedItems::new(),
files_to_check: vec![],
files_to_check: Default::default(),
delete_method: DeleteMethod::None,
stopped_search: false,
broken_files: vec![],
broken_files: Default::default(),
}
}

Expand Down Expand Up @@ -232,13 +238,8 @@ impl BrokenFiles {
}
.to_lowercase();

let type_of_file;

// Checking allowed image extensions
let allowed_image_extensions = ["jpg", "jpeg", "png", "bmp", "ico", "webp", "tiff", "pnm", "tga", "ff", "gif"];
if allowed_image_extensions.iter().any(|e| file_name_lowercase.ends_with(format!(".{}", e).as_str())) {
type_of_file = TypeOfFile::Image;
} else {
let type_of_file = check_extension_avaibility(&file_name_lowercase);
if type_of_file == TypeOfFile::Unknown {
continue 'dir;
}

Expand Down Expand Up @@ -273,12 +274,13 @@ impl BrokenFiles {
continue;
} // Permissions Denied
},
size: metadata.len(),
type_of_file,
error_string: "".to_string(),
};

// Adding files to Vector
self.files_to_check.push(fe);
self.files_to_check.insert(fe.path.to_string_lossy().to_string(), fe);
}
}
}
Expand All @@ -292,6 +294,29 @@ impl BrokenFiles {
fn look_for_broken_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::Sender<ProgressData>>) -> bool {
let system_time = SystemTime::now();

let loaded_hash_map = match load_cache_from_file(&mut self.text_messages) {
Some(t) => t,
None => Default::default(),
};

let mut records_already_cached: HashMap<String, FileEntry> = Default::default();
let mut non_cached_files_to_check: HashMap<String, FileEntry> = Default::default();
for (name, file_entry) in &self.files_to_check {
#[allow(clippy::collapsible_if)]
if !loaded_hash_map.contains_key(name) {
// If loaded data doesn't contains current image info
non_cached_files_to_check.insert(name.clone(), file_entry.clone());
} else {
if file_entry.size != loaded_hash_map.get(name).unwrap().size || file_entry.modified_date != loaded_hash_map.get(name).unwrap().modified_date {
// When size or modification date of image changed, then it is clear that is different image
non_cached_files_to_check.insert(name.clone(), file_entry.clone());
} else {
// Checking may be omitted when already there is entry with same size and modification date
records_already_cached.insert(name.clone(), loaded_hash_map.get(name).unwrap().clone());
}
}
}

let check_was_breaked = AtomicBool::new(false); // Used for breaking from GUI and ending check thread

//// PROGRESS THREAD START
Expand All @@ -304,7 +329,7 @@ impl BrokenFiles {
let mut progress_send = progress_sender.clone();
let progress_thread_run = progress_thread_run.clone();
let atomic_file_counter = atomic_file_counter.clone();
let files_to_check = self.files_to_check.len();
let files_to_check = non_cached_files_to_check.len();
progress_thread_handle = thread::spawn(move || loop {
progress_send
.try_send(ProgressData {
Expand All @@ -323,23 +348,34 @@ impl BrokenFiles {
progress_thread_handle = thread::spawn(|| {});
}
//// PROGRESS THREAD END
self.broken_files = self
.files_to_check
let mut vec_file_entry: Vec<FileEntry> = non_cached_files_to_check
.par_iter()
.map(|file_entry| {
atomic_file_counter.fetch_add(1, Ordering::Relaxed);
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
// This will not break
check_was_breaked.store(true, Ordering::Relaxed);
return None;
}

match image::open(&file_entry.path) {
Ok(_) => Some(None),
Err(t) => {
let mut file_entry = file_entry.clone();
file_entry.error_string = t.to_string();
Some(Some(file_entry))
} // Something is wrong with image
match file_entry.1.type_of_file {
TypeOfFile::Image => {
match image::open(&file_entry.1.path) {
Ok(_) => Some(None),
Err(t) => {
let error_string = t.to_string();
// This error is a problem with image library, remove check when https://github.com/image-rs/jpeg-decoder/issues/130 will be fixed
if !error_string.contains("spectral selection is not allowed in non-progressive scan") {
let mut file_entry = file_entry.1.clone();
file_entry.error_string = error_string;
Some(Some(file_entry))
} else {
Some(None)
}
} // Something is wrong with image
}
}
// This means that cache read invalid value because maybe cache comes from different czkawka version
TypeOfFile::Unknown => Some(None),
}
})
.while_some()
Expand All @@ -351,16 +387,35 @@ impl BrokenFiles {
progress_thread_run.store(false, Ordering::Relaxed);
progress_thread_handle.join().unwrap();

self.information.number_of_broken_files = self.broken_files.len();

// Check if user aborted search(only from GUI)
// Break if stop was clicked
if check_was_breaked.load(Ordering::Relaxed) {
return false;
}

// Just connect loaded results with already calculated
for (_name, file_entry) in records_already_cached {
vec_file_entry.push(file_entry.clone());
}

self.broken_files = vec_file_entry.iter().filter_map(|f| if f.error_string.is_empty() { None } else { Some(f.clone()) }).collect();

// Must save all results to file, old loaded from file with all currently counted results
let mut all_results: HashMap<String, FileEntry> = self.files_to_check.clone();

for file_entry in vec_file_entry {
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
}
for (_name, file_entry) in loaded_hash_map {
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
}
save_cache_to_file(&all_results, &mut self.text_messages);

self.information.number_of_broken_files = self.broken_files.len();

Common::print_time(system_time, SystemTime::now(), "sort_images - reading data from files in parallel".to_string());

// Clean data
self.files_to_check = vec![];
self.files_to_check = Default::default();

true
}
Expand All @@ -370,7 +425,7 @@ impl BrokenFiles {

match self.delete_method {
DeleteMethod::Delete => {
for file_entry in &self.files_to_check {
for file_entry in self.broken_files.iter() {
if fs::remove_file(&file_entry.path).is_err() {
self.text_messages.warnings.push(file_entry.path.display().to_string());
}
Expand Down Expand Up @@ -472,3 +527,118 @@ impl PrintResults for BrokenFiles {
Common::print_time(start_time, SystemTime::now(), "print_entries".to_string());
}
}

fn save_cache_to_file(hashmap_file_entry: &HashMap<String, FileEntry>, text_messages: &mut Messages) {
println!("Allowed to save {} entries", hashmap_file_entry.len());
if let Some(proj_dirs) = ProjectDirs::from("pl", "Qarmin", "Czkawka") {
// Lin: /home/username/.cache/czkawka
// Win: C:\Users\Username\AppData\Local\Qarmin\Czkawka\cache
// Mac: /Users/Username/Library/Caches/pl.Qarmin.Czkawka

let cache_dir = PathBuf::from(proj_dirs.cache_dir());
if cache_dir.exists() {
if !cache_dir.is_dir() {
text_messages.messages.push(format!("Config dir {} is a file!", cache_dir.display()));
return;
}
} else if fs::create_dir_all(&cache_dir).is_err() {
text_messages.messages.push(format!("Cannot create config dir {}", cache_dir.display()));
return;
}
let cache_file = cache_dir.join(CACHE_FILE_NAME);
let file_handler = match OpenOptions::new().truncate(true).write(true).create(true).open(&cache_file) {
Ok(t) => t,
Err(_) => {
text_messages.messages.push(format!("Cannot create or open cache file {}", cache_file.display()));
return;
}
};
let mut writer = BufWriter::new(file_handler);

for file_entry in hashmap_file_entry.values() {
// Only save to cache files which have more than 1KB
if file_entry.size > 1024 {
let string: String = format!("{}//{}//{}//{}", file_entry.path.display(), file_entry.size, file_entry.modified_date, file_entry.error_string);

if writeln!(writer, "{}", string).is_err() {
text_messages.messages.push(format!("Failed to save some data to cache file {}", cache_file.display()));
return;
};
}
}
}
}

fn load_cache_from_file(text_messages: &mut Messages) -> Option<HashMap<String, FileEntry>> {
if let Some(proj_dirs) = ProjectDirs::from("pl", "Qarmin", "Czkawka") {
let cache_dir = PathBuf::from(proj_dirs.cache_dir());
let cache_file = cache_dir.join(CACHE_FILE_NAME);
let file_handler = match OpenOptions::new().read(true).open(&cache_file) {
Ok(t) => t,
Err(_) => {
// text_messages.messages.push(format!("Cannot find or open cache file {}", cache_file.display())); // This shouldn't be write to output
return None;
}
};

let reader = BufReader::new(file_handler);

let mut hashmap_loaded_entries: HashMap<String, FileEntry> = Default::default();

// Read the file line by line using the lines() iterator from std::io::BufRead.
for (index, line) in reader.lines().enumerate() {
let line = match line {
Ok(t) => t,
Err(_) => {
text_messages.warnings.push(format!("Failed to load line number {} from cache file {}", index + 1, cache_file.display()));
return None;
}
};
let uuu = line.split("//").collect::<Vec<&str>>();
if uuu.len() != 4 {
text_messages.warnings.push(format!("Found invalid data in line {} - ({}) in cache file {}", index + 1, line, cache_file.display()));
continue;
}
// Don't load cache data if destination file not exists
if Path::new(uuu[0]).exists() {
hashmap_loaded_entries.insert(
uuu[0].to_string(),
FileEntry {
path: PathBuf::from(uuu[0]),
size: match uuu[1].parse::<u64>() {
Ok(t) => t,
Err(_) => {
text_messages.warnings.push(format!("Found invalid size value in line {} - ({}) in cache file {}", index + 1, line, cache_file.display()));
continue;
}
},
modified_date: match uuu[2].parse::<u64>() {
Ok(t) => t,
Err(_) => {
text_messages.warnings.push(format!("Found invalid modified date value in line {} - ({}) in cache file {}", index + 1, line, cache_file.display()));
continue;
}
},
type_of_file: check_extension_avaibility(&uuu[0].to_lowercase()),
error_string: uuu[3].to_string(),
},
);
}
}

return Some(hashmap_loaded_entries);
}

text_messages.messages.push("Cannot find or open system config dir to save cache file".to_string());
None
}

fn check_extension_avaibility(file_name_lowercase: &str) -> TypeOfFile {
// Checking allowed image extensions
let allowed_image_extensions = ["jpg", "jpeg", "png", "bmp", "ico", "tiff", "pnm", "tga", "ff", "gif"];
if allowed_image_extensions.iter().any(|e| file_name_lowercase.ends_with(format!(".{}", e).as_str())) {
TypeOfFile::Image
} else {
TypeOfFile::Unknown
}
}
2 changes: 1 addition & 1 deletion czkawka_core/src/common_extensions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ impl Extensions {
if allowed_extensions.is_empty() {
return;
}
allowed_extensions = allowed_extensions.replace("IMAGE", "jpg,kra,gif,png,bmp,tiff,webp,hdr,svg");
allowed_extensions = allowed_extensions.replace("IMAGE", "jpg,kra,gif,png,bmp,tiff,hdr,svg");
allowed_extensions = allowed_extensions.replace("VIDEO", "mp4,flv,mkv,webm,vob,ogv,gifv,avi,mov,wmv,mpg,m4v,m4p,mpeg,3gp");
allowed_extensions = allowed_extensions.replace("MUSIC", "mp3,flac,ogg,tta,wma,webm");
allowed_extensions = allowed_extensions.replace("TEXT", "txt,doc,docx,odt,rtf");
Expand Down
4 changes: 2 additions & 2 deletions czkawka_core/src/similar_images.rs
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ impl SimilarImages {
.to_lowercase();

// Checking allowed image extensions
let allowed_image_extensions = ["jpg", "jpeg", "png", "bmp", "ico", "webp", "tiff", "pnm", "tga", "ff", "gif"];
let allowed_image_extensions = ["jpg", "jpeg", "png", "bmp", "ico", "tiff", "pnm", "tga", "ff", "gif"];
if !allowed_image_extensions.iter().any(|e| file_name_lowercase.ends_with(format!(".{}", e).as_str())) {
continue 'dir;
}
Expand Down Expand Up @@ -693,7 +693,7 @@ fn load_hashes_from_file(text_messages: &mut Messages) -> Option<HashMap<String,
let file_handler = match OpenOptions::new().read(true).open(&cache_file) {
Ok(t) => t,
Err(_) => {
text_messages.messages.push(format!("Cannot find or open cache file {}", cache_file.display()));
// text_messages.messages.push(format!("Cannot find or open cache file {}", cache_file.display())); // This shouldn't be write to output
return None;
}
};
Expand Down
Loading