Skip to content

Commit

Permalink
Add finding similar audio by content (#970)
Browse files Browse the repository at this point in the history
* In prehashing check for user clicks, less often

* Similar audio

* Remove ugly time checking

* Fix using cache

* Fix cache and improve performance of validating items

* Remove cache type - cache should be saved to two different cache files(because )

* Working

* Simple multithreading

* Basic Generalization

* Reference folder and swap cleaning

* Split into multiple files

* Commons, improved GUI message

* Simplifying thread run

* Check was stopped

* Fix checking same files

* Make read single file tag more general

* Remove unnnecessary clone

* Reading tags

* Base

* Search

* Gui Fix

* Gui Fix

* Tooltip
  • Loading branch information
qarmin authored May 7, 2023
1 parent 14ccb49 commit 78d00ee
Show file tree
Hide file tree
Showing 32 changed files with 1,880 additions and 1,592 deletions.
52 changes: 26 additions & 26 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions czkawka_core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ pdf = "0.8"

# Needed by audio similarity feature
rusty-chromaprint = "0.1"
symphonia = { version = "0.5", features = ["mp3", "aac", "alac", "flac", "isomp4", "mkv", "ogg", "pcm", "vorbis", "wav"] }
symphonia = { version = "0.5", features = ["all"] }

# Hashes for duplicate files
blake3 = "1.3"
Expand Down Expand Up @@ -74,10 +74,10 @@ num_cpus = "1.15"

# Heif/Heic
libheif-rs = { version = "0.18.0", optional = true } # Do not upgrade now, since Ubuntu 22.04 not works with newer version
anyhow = { version = "1.0", optional = true }
anyhow = { version = "1.0" }

state = "0.5"

[features]
default = []
heif = ["dep:libheif-rs", "dep:anyhow"]
heif = ["dep:libheif-rs"]
39 changes: 7 additions & 32 deletions czkawka_core/src/bad_extensions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,13 @@ use std::mem;
use std::path::PathBuf;
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::sync::Arc;
use std::time::SystemTime;

use crossbeam_channel::Receiver;
use futures::channel::mpsc::UnboundedSender;
use mime_guess::get_mime_extensions;
use rayon::prelude::*;

use crate::common::{prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, Common};
use crate::common::{prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads};
use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData};
use crate::common_directory::Directories;
use crate::common_extensions::Extensions;
Expand Down Expand Up @@ -298,16 +297,12 @@ impl BadExtensions {
.build()
.run();
match result {
DirTraversalResult::SuccessFiles {
start_time,
grouped_file_entries,
warnings,
} => {
DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => {
if let Some(files_to_check) = grouped_file_entries.get(&()) {
self.files_to_check = files_to_check.clone();
}
self.text_messages.warnings.extend(warnings);
Common::print_time(start_time, SystemTime::now(), "check_files");

true
}
DirTraversalResult::SuccessFolders { .. } => {
Expand All @@ -318,24 +313,10 @@ impl BadExtensions {
}

fn look_for_bad_extensions_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) -> bool {
let system_time = SystemTime::now();

let check_was_stopped = AtomicBool::new(false); // Used for breaking from GUI and ending check thread

let progress_thread_run = Arc::new(AtomicBool::new(true));
let atomic_counter = Arc::new(AtomicUsize::new(0));
let progress_thread_handle = prepare_thread_handler_common(
progress_sender,
&progress_thread_run,
&atomic_counter,
1,
1,
self.files_to_check.len(),
CheckingMethod::None,
);
let (progress_thread_handle, progress_thread_run, atomic_counter, check_was_stopped) =
prepare_thread_handler_common(progress_sender, 1, 1, self.files_to_check.len(), CheckingMethod::None);

let mut files_to_check = Default::default();
mem::swap(&mut files_to_check, &mut self.files_to_check);
let files_to_check = mem::take(&mut self.files_to_check);

let mut hashmap_workarounds: HashMap<&str, Vec<&str>> = Default::default();
for (proper, found) in WORKAROUNDS {
Expand All @@ -357,8 +338,6 @@ impl BadExtensions {

self.information.number_of_files_with_bad_extension = self.bad_extensions_files.len();

Common::print_time(system_time, SystemTime::now(), "bad extension finding");

// Clean unused data
self.files_to_check = Default::default();

Expand Down Expand Up @@ -525,7 +504,6 @@ impl DebugPrint for BadExtensions {

impl SaveResults for BadExtensions {
fn save_results_to_file(&mut self, file_name: &str) -> bool {
let start_time: SystemTime = SystemTime::now();
let file_name: String = match file_name {
"" => "results.txt".to_string(),
k => k.to_string(),
Expand Down Expand Up @@ -557,7 +535,7 @@ impl SaveResults for BadExtensions {
} else {
write!(writer, "Not found any files with invalid extension.").unwrap();
}
Common::print_time(start_time, SystemTime::now(), "save_results_to_file");

true
}
}
Expand All @@ -566,12 +544,9 @@ impl PrintResults for BadExtensions {
/// Print information's about duplicated entries
/// Only needed for CLI
fn print_results(&self) {
let start_time: SystemTime = SystemTime::now();
println!("Found {} files with invalid extension.\n", self.information.number_of_files_with_bad_extension);
for file_entry in &self.bad_extensions_files {
println!("{} ----- {}", file_entry.path.display(), file_entry.proper_extensions);
}

Common::print_time(start_time, SystemTime::now(), "print_entries");
}
}
19 changes: 3 additions & 16 deletions czkawka_core/src/big_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,16 @@ use std::fs;
use std::fs::{DirEntry, File, Metadata};
use std::io::{BufWriter, Write};
use std::path::{Path, PathBuf};
use std::sync::atomic::AtomicBool;

use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use std::time::SystemTime;

use crossbeam_channel::Receiver;
use futures::channel::mpsc::UnboundedSender;
use humansize::format_size;
use humansize::BINARY;
use rayon::prelude::*;

use crate::common::Common;
use crate::common::{check_folder_children, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, split_path};
use crate::common_dir_traversal::{common_get_entry_data_metadata, common_read_dir, get_lowercase_name, get_modified_time, CheckingMethod, ProgressData};
use crate::common_directory::Directories;
Expand Down Expand Up @@ -142,7 +140,6 @@ impl BigFile {
}

fn look_for_big_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) -> bool {
let start_time: SystemTime = SystemTime::now();
let mut folders_to_check: Vec<PathBuf> = Vec::with_capacity(1024 * 2); // This should be small enough too not see to big difference and big enough to store most of paths without needing to resize vector
let mut old_map: BTreeMap<u64, Vec<FileEntry>> = Default::default();

Expand All @@ -151,9 +148,7 @@ impl BigFile {
folders_to_check.push(id.clone());
}

let progress_thread_run = Arc::new(AtomicBool::new(true));
let atomic_counter = Arc::new(AtomicUsize::new(0));
let progress_thread_handle = prepare_thread_handler_common(progress_sender, &progress_thread_run, &atomic_counter, 0, 0, 0, CheckingMethod::None);
let (progress_thread_handle, progress_thread_run, atomic_counter, _check_was_stopped) = prepare_thread_handler_common(progress_sender, 0, 0, 0, CheckingMethod::None);

while !folders_to_check.is_empty() {
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
Expand Down Expand Up @@ -213,7 +208,6 @@ impl BigFile {

self.extract_n_biggest_files(old_map);

Common::print_time(start_time, SystemTime::now(), "look_for_big_files");
true
}

Expand Down Expand Up @@ -308,8 +302,6 @@ impl BigFile {

/// Function to delete files, from filed Vector
fn delete_files(&mut self) {
let start_time: SystemTime = SystemTime::now();

match self.delete_method {
DeleteMethod::Delete => {
for (_, file_entry) in &self.big_files {
Expand All @@ -322,8 +314,6 @@ impl BigFile {
//Just do nothing
}
}

Common::print_time(start_time, SystemTime::now(), "delete_files");
}
}

Expand Down Expand Up @@ -365,7 +355,6 @@ impl DebugPrint for BigFile {
impl SaveResults for BigFile {
/// Saving results to provided file
fn save_results_to_file(&mut self, file_name: &str) -> bool {
let start_time: SystemTime = SystemTime::now();
let file_name: String = match file_name {
"" => "results.txt".to_string(),
k => k.to_string(),
Expand Down Expand Up @@ -401,14 +390,13 @@ impl SaveResults for BigFile {
} else {
write!(writer, "Not found any files.").unwrap();
}
Common::print_time(start_time, SystemTime::now(), "save_results_to_file");

true
}
}

impl PrintResults for BigFile {
fn print_results(&self) {
let start_time: SystemTime = SystemTime::now();
if self.search_mode == SearchMode::BiggestFiles {
println!("{} the biggest files.\n\n", self.information.number_of_real_files);
} else {
Expand All @@ -417,6 +405,5 @@ impl PrintResults for BigFile {
for (size, file_entry) in &self.big_files {
println!("{} ({}) - {}", format_size(*size, BINARY), size, file_entry.path.display());
}
Common::print_time(start_time, SystemTime::now(), "print_entries");
}
}
Loading

0 comments on commit 78d00ee

Please sign in to comment.