Skip to content

Commit cacbb7f

Browse files
author
Andy Ferris
committed
Make unique(f, itr) and unique!(f, itr) faster
Avoid creation of a `Set{Any}`.
1 parent 5cbbed3 commit cacbb7f

File tree

1 file changed

+57
-16
lines changed

1 file changed

+57
-16
lines changed

base/set.jl

Lines changed: 57 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -167,15 +167,39 @@ julia> unique(x -> x^2, [1, -1, 3, -3, 4])
167167
"""
168168
function unique(f, C)
169169
out = Vector{eltype(C)}()
170-
seen = Set()
171-
for x in C
170+
171+
s = iterate(C)
172+
if s === nothing
173+
return out
174+
end
175+
(x, i) = s
176+
y = f(x)
177+
seen = Set{typeof(y)}()
178+
push!(seen, y)
179+
push!(out, x)
180+
181+
return _unique!(f, out, C, seen, i)
182+
end
183+
184+
function _unique!(f, out::AbstractVector, C, seen::Set, i)
185+
s = iterate(C, i)
186+
while s !== nothing
187+
(x, i) = s
172188
y = f(x)
173-
if !in(y, seen)
174-
push!(seen, y)
189+
if y seen
175190
push!(out, x)
191+
if y isa eltype(seen)
192+
push!(seen, y)
193+
else
194+
seen2 = convert(Set{promote_typejoin(eltype(seen), typeof(y))}, seen)
195+
push!(seen2, y)
196+
return _unique!(f, out, C, seen2, i)
197+
end
176198
end
199+
s = iterate(C, i)
177200
end
178-
out
201+
202+
return out
179203
end
180204

181205
"""
@@ -205,22 +229,39 @@ julia> unique!(iseven, [2, 3, 5, 7, 9])
205229
```
206230
"""
207231
function unique!(f, A::AbstractVector)
208-
seen = Set()
209-
idxs = eachindex(A)
210-
y = iterate(idxs)
211-
count = 0
212-
for x in A
213-
t = f(x)
214-
if t seen
215-
push!(seen,t)
232+
if length(A) <= 1
233+
return A
234+
end
235+
236+
i = firstindex(A)
237+
x = @inbounds A[i]
238+
y = f(x)
239+
seen = Set{typeof(y)}()
240+
push!(seen, y)
241+
return _unique!(f, A, seen, 1, i+1)
242+
end
243+
244+
function _unique!(f, A::AbstractVector, seen::Set, count::Integer, i::Integer)
245+
while i <= lastindex(A)
246+
x = @inbounds A[i]
247+
y = f(x)
248+
if y seen
216249
count += 1
217-
A[y[1]] = x
218-
y = iterate(idxs, y[2])
250+
@inbounds A[count] = x
251+
if y isa eltype(seen)
252+
push!(seen, y)
253+
else
254+
seen2 = convert(Set{promote_typejoin(eltype(seen), typeof(y))}, seen)
255+
push!(seen2, y)
256+
return _unique!(f, A, seen2, count, i+1)
257+
end
219258
end
259+
i += 1
220260
end
221-
resize!(A, count)
261+
return resize!(A, count)
222262
end
223263

264+
224265
# If A is not grouped, then we will need to keep track of all of the elements that we have
225266
# seen so far.
226267
_unique!(A::AbstractVector) = unique!(identity, A::AbstractVector)

0 commit comments

Comments
 (0)