forked from westonplatter/phashion
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtest_phashion.rb
245 lines (191 loc) · 8.48 KB
/
test_phashion.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
require 'helper'
require 'sqlite3'
require 'tempfile'
class TestPhashion < Minitest::Test
def test_text_hash
matches = Tempfile.open('foo') do |f|
100.times do |i|
f.write "hello world #{i}"
end
f.close
a = Phashion.texthash_for f.path
b = Phashion.texthash_for f.path
assert_operator a.length, :>, 0
assert_operator b.length, :>, 0
a.each { |hash| assert_instance_of Phashion::TextHashPoint, hash }
b.each { |hash| assert_instance_of Phashion::TextHashPoint, hash }
Phashion.textmatches_for(a, b)
end
assert_operator matches.length, :>, 0
matches.each { |match| assert_instance_of Phashion::TextMatch, match }
end
def split(hash)
r = hash & 0xFFFFFFFF
l = (hash >> 32) & 0xFFFFFFFF
[l, r]
end
def test_db_bad_arg
db = SQLite3::Database.new ':memory:'
return unless db.respond_to? :enable_load_extension
db.enable_load_extension true
db.load_extension Phashion.so_file
res = db.execute "SELECT hamming_distance('foo', 'bar', 'baz', 'zot')"
assert_equal [[0]], res
end
def test_db_extension
db = SQLite3::Database.new ':memory:'
return unless db.respond_to? :enable_load_extension
db.enable_load_extension true
db.load_extension Phashion.so_file
db.execute <<-SQL
CREATE TABLE "images" (
"id" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
"fingerprint_l" integer NOT NULL,
"fingerprint_r" integer NOT NULL)
SQL
jpg = relative_path '/jpg/Broccoli_Super_Food.jpg'
png = relative_path '/png/Broccoli_Super_Food.png'
hash1 = Phashion.image_hash_for jpg
hash2 = Phashion.image_hash_for png
l, r = split hash1
db.execute "INSERT INTO images (fingerprint_l, fingerprint_r) VALUES (#{l}, #{r})"
expected = Phashion.hamming_distance hash1, hash2
l, r = split hash2
rows = db.execute "SELECT hamming_distance(fingerprint_l, fingerprint_r, #{l}, #{r}) FROM images"
assert_equal expected, rows.first.first
end
def test_mh_hash_for
jpg = relative_path '/jpg/Broccoli_Super_Food.jpg'
png = relative_path '/png/Broccoli_Super_Food.png'
hash1 = Phashion.mh_hash_for jpg
hash2 = Phashion.mh_hash_for png
assert_kind_of Array, hash1
assert_kind_of Array, hash2
assert_in_delta 0.100, Phashion.hamming_distance2(hash1, hash2), 0.033
end
def test_mh_distance_from
jpg = relative_path '/jpg/Broccoli_Super_Food.jpg'
png = relative_path '/png/Broccoli_Super_Food.png'
assert_in_delta 0.100, Phashion::Image.new(jpg).mh_distance_from(Phashion::Image.new(png)), 0.033
end
def test_duplicate_detection
files = %w[86x86-0a1e.jpeg 86x86-83d6.jpeg 86x86-a855.jpeg]
images = files.map { |f| Phashion::Image.new("#{relative_path '/../test/jpg/'}#{f}") }
assert_duplicate images[0], images[1]
assert_duplicate images[1], images[2]
assert_duplicate images[0], images[2]
end
def test_duplicate_detection_2
files = %w[b32aade8c590e2d776c24f35868f0c7a588f51e1.jpeg df9cc82f5b32d7463f36620c61854fde9d939f7f.jpeg
e7397898a7e395c2524978a5e64de0efabf08290.jpeg]
images = files.map { |f| Phashion::Image.new("#{relative_path '/../test/jpg/'}#{f}") }
assert_duplicate images[0], images[1]
assert_duplicate images[1], images[2]
assert_duplicate images[0], images[2]
end
def test_not_duplicate
files = %w[86x86-0a1e.jpeg 86x86-83d6.jpeg 86x86-a855.jpeg avatar.jpg]
images = files.map { |f| Phashion::Image.new("#{relative_path '/../test/jpg/'}#{f}") }
assert_not_duplicate images[0], images[3]
assert_not_duplicate images[1], images[3]
assert_not_duplicate images[2], images[3]
end
def test_multiple_types
jpg = Phashion::Image.new(relative_path('/jpg/Broccoli_Super_Food.jpg'))
png = Phashion::Image.new(relative_path('/png/Broccoli_Super_Food.png'))
gif = Phashion::Image.new(relative_path('/gif/Broccoli_Super_Food.gif'))
assert_duplicate jpg, png
assert_duplicate gif, png
assert_duplicate jpg, gif
end
def test_fingerprint_png_is_different
png1 = Phashion::Image.new(relative_path('/png/Broccoli_Super_Food.png'))
png2 = Phashion::Image.new(relative_path('/png/linux.png'))
png3 = Phashion::Image.new(relative_path('/png/grass.png'))
png4 = Phashion::Image.new(relative_path('/png/Broccoli_Super_Food.png'))
fingerprints = []
fingerprints << png1.fingerprint
fingerprints << png2.fingerprint
fingerprints << png3.fingerprint
fingerprints << png4.fingerprint
assert fingerprints.uniq.size == 3, 'array should contain 3 unique fingerprints'
end
def test_duplicate_with_custom_distance_threshold
# NOTE: this test depends on the smaller_jpg test still asserting a distance of 2
# note-2: threshold is a :less-than-or-equal-to comparison, which is a change from version 1.0.8
jpg = Phashion::Image.new(relative_path('/jpg/Broccoli_Super_Food.jpg'))
jpg_x = Phashion::Image.new(relative_path('/jpg/Broccoli_Super_Food.100px.jpg'))
refute(jpg.duplicate?(jpg_x, threshold: 1))
assert(jpg.duplicate?(jpg_x, threshold: 2))
end
def test_duplicate_meta_methods
# NOTE: this test depends on the smaller_jpg test still asserting a distance of 2
# note-2: threshold is a :less-than-or-equal-to comparison, which is a change from version 1.0.8
jpg = Phashion::Image.new(relative_path('/jpg/Broccoli_Super_Food.jpg'))
jpg_x = Phashion::Image.new(relative_path('/jpg/Broccoli_Super_Food.100px.jpg'))
refute(jpg.dupe_at_threshold_1?(jpg_x))
assert(jpg.dupe_at_threshold_2?(jpg_x))
assert_raises(NoMethodError) { jpg.dupe_at_threshold_100?(jpg_x) }
end
### distance methods
def test_distance_from_jpg_to_png_dupe
jpg = Phashion::Image.new(relative_path('/jpg/Broccoli_Super_Food.jpg'))
png = Phashion::Image.new(relative_path('/png/Broccoli_Super_Food.png'))
assert_equal(jpg.distance_from(png), 0)
end
def test_distance_from_lossy_jpg
jpg = Phashion::Image.new(relative_path('/jpg/Broccoli_Super_Food.jpg'))
jpg_x = Phashion::Image.new(relative_path('/jpg/Broccoli_Super_Food.lossy.jpg'))
assert_equal(jpg.distance_from(jpg_x), 0)
end
def test_distance_from_smaller_jpg
jpg = Phashion::Image.new(relative_path('/jpg/Broccoli_Super_Food.jpg'))
jpg_x = Phashion::Image.new(relative_path('/jpg/Broccoli_Super_Food.100px.jpg'))
assert_equal(jpg.distance_from(jpg_x), 2)
end
def test_distance_from_color_correction
jpg = Phashion::Image.new(relative_path('/jpg/Broccoli_Super_Food.jpg'))
jpg_x = Phashion::Image.new(relative_path('/jpg/Broccoli_Super_Food.color-corrected.jpg'))
assert_equal(jpg.distance_from(jpg_x), 2)
end
def test_distance_from_black_and_white
jpg = Phashion::Image.new(relative_path('/jpg/Broccoli_Super_Food.jpg'))
jpg_x = Phashion::Image.new(relative_path('/jpg/Broccoli_Super_Food.bw.jpg'))
assert_equal(jpg.distance_from(jpg_x), 2)
end
def test_distance_from_bounding_box
# Control-image is cropped to remove empty whitespace around image details
# from 500x349 to 466x312
jpg = Phashion::Image.new(relative_path('/jpg/Broccoli_Super_Food.jpg'))
jpg_x = Phashion::Image.new(relative_path('/jpg/Broccoli_Super_Food.bounding-box.jpg'))
assert_equal(jpg.distance_from(jpg_x), 12)
end
def test_distance_from_rotation_of_5degrees_c2
jpg = Phashion::Image.new(relative_path('/jpg/Broccoli_Super_Food.jpg'))
jpg_x = Phashion::Image.new(relative_path('/jpg/Broccoli_Super_Food.rotate5cw.jpg'))
assert_equal(jpg.distance_from(jpg_x), 14)
end
def test_distance_from_horizontal_flip
jpg = Phashion::Image.new(relative_path('/jpg/Broccoli_Super_Food.jpg'))
jpg_x = Phashion::Image.new(relative_path('/jpg/Broccoli_Super_Food.horizontal-flip.jpg'))
assert_operator(jpg.distance_from(jpg_x), :>, Phashion::DEFAULT_DUPE_THRESHOLD)
end
def test_duplicate_with_module_method
jpg = Phashion::Image.new(relative_path('/jpg/Broccoli_Super_Food.jpg'))
jpg_x = Phashion::Image.new(relative_path('/jpg/Broccoli_Super_Food.bw.jpg'))
assert_duplicate_with_module_method(jpg, jpg_x)
end
private
def relative_path(path)
"#{File.dirname(__FILE__)}#{path}"
end
def assert_duplicate(a, b)
assert a.duplicate?(b), "#{a.filename} not dupe of #{b.filename}"
end
def assert_not_duplicate(a, b)
assert !a.duplicate?(b), "#{a.filename} dupe of #{b.filename}"
end
def assert_duplicate_with_module_method(a, b)
assert Phashion.duplicate?(a.fingerprint, b.fingerprint), "#{a.filename} not dupe of #{b.filename}"
end
end