Skip to content

fix #32550: issetequal with duplicate values #32826

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 8, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 89 additions & 55 deletions base/abstractset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ sizehint!(s::AbstractSet, n) = nothing

copy!(dst::AbstractSet, src::AbstractSet) = union!(empty!(dst), src)

## set operations (union, intersection, symmetric difference)

"""
union(s, itrs...)
∪(s, itrs...)
Expand Down Expand Up @@ -77,11 +79,11 @@ max_values(::Type{Nothing}) = 1

function union!(s::AbstractSet{T}, itr) where T
haslength(itr) && sizehint!(s, length(s) + length(itr))
for x=itr
for x in itr
push!(s, x)
length(s) == max_values(T) && break
end
s
return s
end

"""
Expand Down Expand Up @@ -220,41 +222,16 @@ function symdiff!(s::AbstractSet, itr)
for x in itr
x in s ? delete!(s, x) : push!(s, x)
end
s
return s
end

==(l::AbstractSet, r::AbstractSet) = length(l) == length(r) && l ⊆ r
# convenience functions for AbstractSet
# (if needed, only their synonyms ⊊ and ⊆ must be specialized)
<( l::AbstractSet, r::AbstractSet) = l ⊊ r
<=(l::AbstractSet, r::AbstractSet) = l ⊆ r
## non-strict subset comparison

function issubset(l, r)
if haslength(r)
rlen = length(r)
#This threshold was empirically determined by repeatedly
#sampling using these two methods (see #26198)
lenthresh = 70

if rlen > lenthresh && !isa(r, AbstractSet)
return issubset(l, Set(r))
end
end

for elt in l
if !in(elt, r)
return false
end
end
return true
end
# use the implementation below when it becomes as efficient
# issubset(l, r) = all(_in(r), l)
const ⊆ = issubset
⊇(l, r) = r ⊆ l
function ⊇ end
"""
issubset(a, b)
⊆(a,b) -> Bool
issubset(a, b) -> Bool
⊆(a, b) -> Bool
⊇(b, a) -> Bool

Determine whether every element of `a` is also in `b`, using [`in`](@ref).
Expand All @@ -273,29 +250,35 @@ true
"""
issubset, ⊆, ⊇

"""
issetequal(a, b)

Determine whether `a` and `b` have the same elements. Equivalent
to `a ⊆ b && b ⊆ a`.
function issubset(l, r)
if haslength(r)
rlen = length(r)
if isa(l, AbstractSet)
# check l for too many unique elements
length(l) > rlen && return false
end
# if r is big enough, convert it to a Set
# threshold empirically determined by repeatedly
# sampling using these two methods (see #26198)
if rlen > 70 && !isa(r, AbstractSet)
return issubset(l, Set(r))
end
end
for elt in l
elt in r || return false
end
return true
end

# Examples
```jldoctest
julia> issetequal([1, 2], [1, 2, 3])
false
⊇(l, r) = r ⊆ l

julia> issetequal([1, 2], [2, 1])
true
```
"""
issetequal(l, r) = length(l) == length(r) && l ⊆ r
issetequal(l::AbstractSet, r::AbstractSet) = l == r
## strict subset comparison

⊊(l, r) = length(l) < length(r) && l ⊆ r
⊋(l, r) = r ⊊ l
function ⊊ end
function ⊋ end
"""
⊊(a, b)
⊋(b, a)
⊊(a, b) -> Bool
⊋(b, a) -> Bool

Determines if `a` is a subset of, but not equal to, `b`.

Expand All @@ -310,11 +293,15 @@ false
"""
⊊, ⊋

⊈(l, r) = !⊆(l, r)
⊉(l, r) = r ⊈ l
⊊(l::AbstractSet, r) = length(l) < length(r) && l ⊆ r
⊊(l, r) = Set(l) ⊊ r
⊋(l, r) = r ⊊ l

function ⊈ end
function ⊉ end
"""
⊈(a, b)
⊉(b, a)
⊈(a, b) -> Bool
⊉(b, a) -> Bool

Negation of `⊆` and `⊇`, i.e. checks that `a` is not a subset of `b`.

Expand All @@ -329,6 +316,53 @@ false
"""
⊈, ⊉

⊈(l, r) = !⊆(l, r)
⊉(l, r) = r ⊈ l

## set equality comparison

"""
issetequal(a, b) -> Bool

Determine whether `a` and `b` have the same elements. Equivalent
to `a ⊆ b && b ⊆ a` but more efficient when possible.

# Examples
```jldoctest
julia> issetequal([1, 2], [1, 2, 3])
false

julia> issetequal([1, 2], [2, 1])
true
```
"""
issetequal(l::AbstractSet, r::AbstractSet) = l == r
issetequal(l::AbstractSet, r) = issetequal(l, Set(r))

function issetequal(l, r::AbstractSet)
if haslength(l)
# check r for too many unique elements
length(l) < length(r) && return false
end
return issetequal(Set(l), r)
end

function issetequal(l, r)
haslength(l) && return issetequal(l, Set(r))
haslength(r) && return issetequal(r, Set(l))
return issetequal(Set(l), Set(r))
end

## partial ordering of sets by containment

==(l::AbstractSet, r::AbstractSet) = length(l) == length(r) && l ⊆ r
# convenience functions for AbstractSet
# (if needed, only their synonyms ⊊ and ⊆ must be specialized)
<( l::AbstractSet, r::AbstractSet) = l ⊊ r
<=(l::AbstractSet, r::AbstractSet) = l ⊆ r

## filtering sets

filter(pred, s::AbstractSet) = mapfilter(pred, push!, s, emptymutable(s))

# it must be safe to delete the current element while iterating over s:
Expand Down
23 changes: 14 additions & 9 deletions test/sets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -592,11 +592,16 @@ end
end

@testset "⊆, ⊊, ⊈, ⊇, ⊋, ⊉, <, <=, issetequal" begin
a = [1, 2]
b = [2, 1, 3]
for C = (Tuple, identity, Set, BitSet, Base.IdSet{Int})
A = C(a)
B = C(b)
a = [2, 1, 2]
b = [2, 3, 1, 3]
ua = unique(a)
ub = unique(b)
for TA in (Tuple, identity, Set, BitSet, Base.IdSet{Int}),
TB in (Tuple, identity, Set, BitSet, Base.IdSet{Int}),
uA = false:true,
uB = false:true
A = TA(uA ? ua : a)
B = TB(uB ? ub : b)
@test A ⊆ B
@test A ⊊ B
@test !(A ⊈ B)
Expand All @@ -611,6 +616,10 @@ end
@test !(B ⊉ A)
@test !issetequal(A, B)
@test !issetequal(B, A)
for T = (Tuple, identity, Set, BitSet, Base.IdSet{Int})
@test issetequal(A, T(A))
@test issetequal(B, T(B))
end
if A isa AbstractSet && B isa AbstractSet
@test A <= B
@test A < B
Expand All @@ -621,10 +630,6 @@ end
@test B >= A
@test B > A
end
for D = (Tuple, identity, Set, BitSet)
@test issetequal(A, D(A))
@test !issetequal(A, D(B))
end
end
end

Expand Down