Skip to content

Commit

Permalink
Fixed missing images with similarity equal to 0
Browse files Browse the repository at this point in the history
  • Loading branch information
qarmin committed Aug 2, 2022
1 parent 7d654a7 commit 5dba0a8
Showing 1 changed file with 31 additions and 12 deletions.
43 changes: 31 additions & 12 deletions czkawka_core/src/similar_images.rs
Original file line number Diff line number Diff line change
Expand Up @@ -729,13 +729,22 @@ impl SimilarImages {
};
//// PROGRESS THREAD END

for hash in &all_hashes {
self.bktree.add(hash.to_vec());
// Don't use hashes with multiple images in bktree, because they will always be master of group and cannot be find by other hashes
let mut additional_chunk_to_check: Vec<_> = Default::default();
let mut hashes_with_multiple_images: HashSet<_> = Default::default(); // Fast way to check if hash have multiple imaages
for (hash, vec_files) in &all_hashed_images {
if vec_files.len() >= 2 {
additional_chunk_to_check.push(hash);
hashes_with_multiple_images.insert(hash);
} else {
self.bktree.add(hash.to_vec());
}
}

let number_of_processors = num_cpus::get();
let chunk_size = all_hashes.len() / number_of_processors;
let chunks: Vec<_> = if chunk_size > 0 { all_hashes.chunks(chunk_size).collect() } else { vec![&all_hashes] };
let mut chunks: Vec<_> = if chunk_size > 0 { all_hashes.chunks(chunk_size).collect() } else { vec![&all_hashes] };
chunks.push(&additional_chunk_to_check);

let parts: Vec<_> = chunks
.into_par_iter()
Expand All @@ -753,14 +762,15 @@ impl SimilarImages {
for (index, hash_to_check) in hashes_to_check.iter().enumerate() {
// Don't check for user stop too often
// Also don't add too ofter data to variables
const CYCLES_COUNTER: usize = 50;
if index % CYCLES_COUNTER == 0 && index != 0 {
const CYCLES_COUNTER: usize = 0b111111;
if ((index & CYCLES_COUNTER) == CYCLES_COUNTER) && index != 0 {
atomic_mode_counter.fetch_add(CYCLES_COUNTER, Ordering::Relaxed);
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
check_was_stopped.store(true, Ordering::Relaxed);
return None;
}
}
hashes_parents.insert(hash_to_check, 0);

let mut found_items = self
.bktree
Expand All @@ -772,9 +782,9 @@ impl SimilarImages {

for (similarity, other_hash) in found_items {
// SSSTART
// Cannot use hash if already is used as master record(have more than 0 children)
// Cannot use hash if already is used as master record(have more than 0 children) or hash have more than one images
if let Some(children_number) = hashes_parents.get(other_hash) {
if *children_number > 0 {
if *children_number > 0 || hashes_with_multiple_images.contains(other_hash) {
continue;
}
}
Expand Down Expand Up @@ -815,7 +825,7 @@ impl SimilarImages {
if let Some(number_of_children) = hashes_parents.get_mut(hash_to_check) {
*number_of_children += 1;
} else {
hashes_parents.insert(hash_to_check, 1);
panic!("This should never happen(At start item should be initialized with 0)");
}
}
// ENND
Expand Down Expand Up @@ -854,7 +864,7 @@ impl SimilarImages {
// SSSTART
// Cannot use hash if already is used as master record(have more than 0 children)
if let Some(children_number) = hashes_parents.get(other_hash) {
if *children_number > 0 {
if *children_number > 0 || hashes_with_multiple_images.contains(other_hash) {
continue;
}
}
Expand Down Expand Up @@ -895,7 +905,7 @@ impl SimilarImages {
if let Some(number_of_children) = hashes_parents.get_mut(hash_to_check) {
*number_of_children += 1;
} else {
hashes_parents.insert(hash_to_check, 1);
hashes_parents.insert(hash_to_check, 1); // This line is different than in first algorithm because at start hashes without children are not zeroed as before
}
}
// ENND
Expand All @@ -906,9 +916,9 @@ impl SimilarImages {
debug_check_for_duplicated_things(hashes_parents.clone(), hashes_similarity.clone(), all_hashed_images.clone(), "LATTER");

// Collecting results

for (parent_hash, child_number) in hashes_parents {
if child_number > 0 {
// If hash contains other hasher OR multiple images are available for checked hash
if child_number > 0 || hashes_with_multiple_images.contains(parent_hash) {
let vec_fe = all_hashed_images.get(parent_hash).unwrap().clone();
collected_similar_images.insert(parent_hash.clone(), vec_fe);
}
Expand Down Expand Up @@ -1368,19 +1378,22 @@ fn debug_check_for_duplicated_things(
all_hashed_images: HashMap<Vec<u8>, Vec<FileEntry>>,
numm: &str,
) {
let mut found_broken_thing = false;
let mut hashmap_hashes: HashSet<_> = Default::default();
let mut hashmap_names: HashSet<_> = Default::default();
for (hash, number_of_children) in &hashes_parents {
if *number_of_children > 0 {
if hashmap_hashes.contains(*hash) {
println!("------1--HASH--{} {:?}", numm, all_hashed_images.get(*hash).unwrap());
found_broken_thing = true;
}
hashmap_hashes.insert(hash.to_vec());

for i in all_hashed_images.get(*hash).unwrap() {
let name = i.path.to_string_lossy().to_string();
if hashmap_names.contains(&name) {
println!("------1--NAME--{} {:?}", numm, name);
found_broken_thing = true;
}
hashmap_names.insert(name);
}
Expand All @@ -1389,15 +1402,21 @@ fn debug_check_for_duplicated_things(
for hash in hashes_similarity.keys() {
if hashmap_hashes.contains(*hash) {
println!("------2--HASH--{} {:?}", numm, all_hashed_images.get(*hash).unwrap());
found_broken_thing = true;
}
hashmap_hashes.insert(hash.to_vec());

for i in all_hashed_images.get(*hash).unwrap() {
let name = i.path.to_string_lossy().to_string();
if hashmap_names.contains(&name) {
println!("------2--NAME--{} {:?}", numm, name);
found_broken_thing = true;
}
hashmap_names.insert(name);
}
}

if found_broken_thing {
panic!();
}
}

0 comments on commit 5dba0a8

Please sign in to comment.