From 34a8a88e3bb6cf9f096540caaaceac8591a8df43 Mon Sep 17 00:00:00 2001 From: Lyndon White Date: Tue, 14 Nov 2017 01:14:19 +0800 Subject: [PATCH 1/4] Make counters use inc!, dec!, reset! and merge!, also adds setindex! (#334) * make counters use inc! dec! and rest! * update tests fully * bring back eltype forcing and makie work in 0.7 as well --- src/DataStructures.jl | 6 +-- src/accumulator.jl | 100 ++++++++++++++++++++++++++++++--------- test/test_accumulator.jl | 55 +++++++++++++++++---- 3 files changed, 126 insertions(+), 35 deletions(-) diff --git a/src/DataStructures.jl b/src/DataStructures.jl index 4e475b05a..a17ebc5ca 100644 --- a/src/DataStructures.jl +++ b/src/DataStructures.jl @@ -2,7 +2,7 @@ __precompile__() module DataStructures - import Base: <, <=, ==, length, isempty, start, next, done, + import Base: <, <=, ==, length, isempty, start, next, done, delete!, show, dump, empty!, getindex, setindex!, get, get!, in, haskey, keys, merge, copy, cat, push!, pop!, shift!, unshift!, insert!, @@ -20,12 +20,12 @@ module DataStructures export deque, enqueue!, dequeue!, dequeue_pair!, update!, reverse_iter export capacity, num_blocks, front, back, top, top_with_handle, sizehint! - export Accumulator, counter + export Accumulator, counter, reset!, inc!, dec! + export ClassifiedCollections export classified_lists, classified_sets, classified_counters export IntDisjointSets, DisjointSets, num_groups, find_root, in_same_set, root_union! - export push! export AbstractHeap, compare, extract_all! export BinaryHeap, binary_minheap, binary_maxheap, nlargest, nsmallest diff --git a/src/accumulator.jl b/src/accumulator.jl index 6d0ad79ab..45da7d2b6 100644 --- a/src/accumulator.jl +++ b/src/accumulator.jl @@ -9,30 +9,33 @@ end Accumulator(::Type{T}, ::Type{V}) where {T,V<:Number} = Accumulator{T,V}(Dict{T,V}()) counter(T::Type) = Accumulator(T,Int) -counter(dct::Dict{T,Int}) where {T} = Accumulator{T,Int}(copy(dct)) +counter(dct::Dict{T,V}) where {T,V<:Integer} = Accumulator{T,V}(copy(dct)) """ - counter{T}(seq::AbstractArray) + counter(seq) Returns an `Accumulator` object containing the elements from `seq`. """ -function counter(seq::AbstractArray{T}) where T - ct = counter(T) +function counter(seq) + ct = counter(eltype_for_accumulator(seq)) for x in seq - push!(ct, x) + inc!(ct, x) end return ct end -function counter(gen::T) where {T<:Base.Generator} - ct = counter(Base._default_eltype(T)) - for x in gen - push!(ct, x) +eltype_for_accumulator(seq::T) where T = eltype(T) +function eltype_for_accumulator(seq::T) where {T<:Base.Generator} + @static if VERSION < v"0.7.0-DEV.2104" + Base._default_eltype(T) + else + Base.@default_eltype(T) end - return ct end -copy(ct::Accumulator{T,V}) where {T,V<:Number} = Accumulator{T,V}(copy(ct.map)) + + +copy(ct::Accumulator) = Accumulator(copy(ct.map)) length(a::Accumulator) = length(a.map) @@ -44,6 +47,9 @@ get(ct::Accumulator, x, default) = get(ct.map, x, default) getindex(ct::Accumulator{T,V}, x) where {T,V} = get(ct.map, x, zero(V)) +setindex!(ct::Accumulator, x, v) = setindex!(ct.map, x, v) + + haskey(ct::Accumulator, x) = haskey(ct.map, x) keys(ct::Accumulator) = keys(ct.map) @@ -61,32 +67,80 @@ done(ct::Accumulator, state) = done(ct.map, state) # manipulation -push!(ct::Accumulator, x, a::Number) = (ct.map[x] = ct[x] + a) -push!(ct::Accumulator{T,V}, x) where {T,V} = push!(ct, x, one(V)) +""" + inc!(ct, x, [v=1]) + +Increments the count for `x` by `v` (defaulting to one) +""" +inc!(ct::Accumulator, x, a::Number) = (ct[x] += a) +inc!(ct::Accumulator{T,V}, x) where {T,V} = inc!(ct, x, one(V)) + +# inc! is preferred over push!, but we need to provide push! for the Bag interpreation +# which is used by classified_collections.jl +push!(ct::Accumulator, x) = inc!(ct, x) +push!(ct::Accumulator, x, a::Number) = inc!(ct, x, a) # To remove ambiguities related to Accumulator now being a subtype of Associative -push!(ct::Accumulator{T,V}, x::T) where T<:Pair where V = push!(ct, x, one(V)) -push!(ct::Accumulator{T,V}, x::Pair) where {T,V} = push!(ct, convert(T, x)) +push!(ct::Accumulator, x::Pair) = inc!(ct, x) + + + +""" + dec!(ct, x, [v=1]) + +Decrements the count for `x` by `v` (defaulting to one) +""" +dec!(ct::Accumulator, x, a::Number) = (ct[x] -= a) +dec!(ct::Accumulator{T,V}, x) where {T,V} = dec!(ct, x, one(V)) + +#TODO: once we are done deprecating `pop!` for `reset!` then add `pop!` as an alias for `dec!` + +""" + merge!(ct1, others...) -function push!(ct::Accumulator, r::Accumulator) - for (x, v) in r - push!(ct, x, v) +Merges the other counters into `ctl`, +summing the counts for all elements. +""" +function merge!(ct::Accumulator, other::Accumulator) + for (x, v) in other + inc!(ct, x, v) end ct end -pop!(ct::Accumulator, x) = pop!(ct.map, x) function merge!(ct1::Accumulator, others::Accumulator...) for ct in others - push!(ct1,ct) + merge!(ct1,ct) end return ct1 end -merge(ct1::Accumulator) = ct1 -function merge(ct1::Accumulator{T,V}, others::Accumulator{T,V}...) where {T,V<:Number} + +""" + merge(counters...) + +Creates a new counter with total counts equal to the sum of the counts in the counters given as arguments. + +See also merge! +""" +function merge(ct1::Accumulator, others::Accumulator...) ct = copy(ct1) merge!(ct,others...) - return ct end + +""" + reset!(ct::Accumulator, x) + +Resets the count of `x` to zero. +Returns its former count. +""" +reset!(ct::Accumulator, x) = pop!(ct.map, x) + + + +## Deprecations +@deprecate pop!(ct::Accumulator, x) reset!(ct, x) +@deprecate push!(ct1::Accumulator, ct2::Accumulator) merge!(ct1,ct2) + + diff --git a/test/test_accumulator.jl b/test/test_accumulator.jl index add6ad88e..2807248da 100644 --- a/test/test_accumulator.jl +++ b/test/test_accumulator.jl @@ -9,16 +9,31 @@ @test !haskey(ct, "abc") @test isempty(collect(keys(ct))) - push!(ct, "a") + # Test setindex! + ct["b"] = 2 + @test ct["b"] == 2 + ct["b"] = 0 + @test ct["b"] == 0 + + + + inc!(ct, "a") @test haskey(ct, "a") @test ct["a"] == 1 - push!(ct, "b", 2) + inc!(ct, "b", 2) @test haskey(ct, "b") @test ct["b"] == 2 + # Test dec! + dec!(ct, "b") + @test ct["b"] == 1 + dec!(ct, "b", 16) + @test ct["b"] == -15 + ct["b"] = 2 + # Test convert - push!(ct, "b", 0x3) + inc!(ct, "b", 0x3) @test ct["b"] == 5 @test !haskey(ct, "abc") @@ -39,7 +54,7 @@ @test ct2["b"] == 2 @test ct2["c"] == 2 - push!(ct, ct2) + merge!(ct, ct2) @test ct["a"] == 4 @test ct["b"] == 7 @test ct["c"] == 2 @@ -60,7 +75,7 @@ @test ctm["b"] == 22 @test ctm["c"] == 2 - @test pop!(ctm, "b") == 22 + @test reset!(ctm, "b") == 22 @test !haskey(ctm, "b") @test ctm["b"] == 0 @@ -70,7 +85,7 @@ @test push!(ct4, 1=>2) == 2 ct5 = counter(Dict([("a",10), ("b",20)])) - @test merge(ct5)===ct5 + @test merge(ct5)==ct5 @test merge!(ct5)===ct5 @test merge(ct5,ct5,ct5)==counter(Dict([("a",30), ("b",60)])) @@ -85,13 +100,13 @@ ct6 = counter(["a", "b" , "b", "c", "c", "c"]) for ii in split("a b c") - push!(ct6, ii) + inc!(ct6, ii) end @test ct6["a"] == 2 @test ct6["b"] == 3 @test ct6["c"] == 4 for ii in split("a b") - pop!(ct6, ii) + reset!(ct6, ii) end @test ct6["a"] == 0 @test ct6["b"] == 0 @@ -99,10 +114,32 @@ s = ["y", "el", "sol", "se", "fue"] @test counter(length(x) for x in s) == counter(map(length, s)) - + + + # non-integer uses + acc = Accumulator(Symbol, Float16) + acc[:a] = 1.5 + @test acc[:a] ≈ 1.5 + push!(acc, :a, 2.5) + @test acc[:a] ≈ 4.0 + dec!(acc, :a) + @test acc[:a] ≈ 3.0 + # ambiguity resolution ct7 = counter(Int) @test_throws MethodError push!(ct7, 1=>2) + + #deprecations + ctd = counter([1,2,3]) + @test ctd[3]==1 + + println("\nThe following warning is expected:") + @test pop!(ctd, 3)==1 + println("\nThe following warning is expected:") + @test push!(counter([1,2,3]),counter([1,2,3])) == merge!(counter([1,2,3]), counter([1,2,3])) + end # @testset Accumulators + + From cb224268357105d50952613ad40ce75382bc5144 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A1ll=20Haraldsson?= Date: Fri, 17 Nov 2017 01:11:23 +0000 Subject: [PATCH 2/4] Doc: "unrolled linked list" change [ci skip] (#342) --- doc/source/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/index.rst b/doc/source/index.rst index c4f0db3b3..ac42afb1e 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -3,7 +3,7 @@ DataStructures.jl This package implements a variety of data structures, including -* Deque (based on block-list) +* Deque (implemented with an `unrolled linked list `_) * CircularBuffer * CircularDeque (based on a circular buffer) * Stack From 4603f9881b4a8ae43c8d4b86fcf373466779efc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A1ll=20Haraldsson?= Date: Fri, 17 Nov 2017 01:12:15 +0000 Subject: [PATCH 3/4] Make "unrolled link list" clear, and link to Wikipedia [ci skip] (#341) * Update README.rst * Fixed for rst --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 8bcf576b5..900f3b295 100644 --- a/README.rst +++ b/README.rst @@ -24,7 +24,7 @@ DataStructures.jl This package implements a variety of data structures, including -* Deque (based on block-list) +* Deque (implemented with an `unrolled linked list `_) * CircularBuffer * CircularDeque * Stack From 71e6001be48e311307596887cc687f7b329e7101 Mon Sep 17 00:00:00 2001 From: Lyndon White Date: Fri, 17 Nov 2017 09:13:52 +0800 Subject: [PATCH 4/4] add get!(func, ::SortedDict, key) (#336) * =add get!(func, ::SortedDict, key) * Add get() do * Add tests for get() do * fix missing brace * fix typos (committing via git webinterface is a trap) * =Fix typos in tests --- src/sorted_dict.jl | 26 ++++++++++++++------------ test/test_sorted_containers.jl | 21 +++++++++++++++++++++ 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/src/sorted_dict.jl b/src/sorted_dict.jl index 71e660390..18244d0e8 100644 --- a/src/sorted_dict.jl +++ b/src/sorted_dict.jl @@ -184,37 +184,39 @@ end end - - @inline orderobject(m::SortedDict) = m.bt.ord @inline function haskey(m::SortedDict, k_) - i, exactfound = findkey(m.bt,convert(keytype(m),k_)) + i, exactfound = findkey(m.bt, convert(keytype(m), k_)) exactfound end -@inline function get(m::SortedDict{K,D,Ord}, k_, default_) where {K,D,Ord <: Ordering} - i, exactfound = findkey(m.bt, convert(K,k_)) - return exactfound ? m.bt.data[i].d : convert(D,default_) +function get(default_::Union{Function,Type}, m::SortedDict{K,D}, k_) where {K,D} + i, exactfound = findkey(m.bt, convert(K, k_)) + return exactfound ? m.bt.data[i].d : convert(D, default_()) end +get(m::SortedDict, k_, default_) = get(()->default_, m, k_) + -function get!(m::SortedDict{K,D,Ord}, k_, default_) where {K,D,Ord <: Ordering} +function get!(default_::Union{Function,Type}, m::SortedDict{K,D}, k_) where {K,D} k = convert(K,k_) i, exactfound = findkey(m.bt, k) if exactfound return m.bt.data[i].d else - default = convert(D,default_) + default = convert(D, default_()) insert!(m.bt,k, default, false) return default end end +get!(m::SortedDict, k_, default_) = get!(()->default_, m, k_) + function getkey(m::SortedDict{K,D,Ord}, k_, default_) where {K,D,Ord <: Ordering} - i, exactfound = findkey(m.bt, convert(K,k_)) + i, exactfound = findkey(m.bt, convert(K, k_)) exactfound ? m.bt.data[i].k : convert(K, default_) end @@ -222,14 +224,14 @@ end ## key @inline function delete!(m::SortedDict, k_) - i, exactfound = findkey(m.bt,convert(keytype(m),k_)) + i, exactfound = findkey(m.bt, convert(keytype(m), k_)) !exactfound && throw(KeyError(k_)) delete!(m.bt, i) m end @inline function pop!(m::SortedDict, k_) - i, exactfound = findkey(m.bt,convert(keytype(m),k_)) + i, exactfound = findkey(m.bt, convert(keytype(m), k_)) !exactfound && throw(KeyError(k_)) d = m.bt.data[i].d delete!(m.bt, i) @@ -274,7 +276,7 @@ function mergetwo!(m::SortedDict{K,D,Ord}, end function packcopy(m::SortedDict{K,D,Ord}) where {K,D,Ord <: Ordering} - w = SortedDict(Dict{K,D}(),orderobject(m)) + w = SortedDict(Dict{K,D}(), orderobject(m)) mergetwo!(w,m) w end diff --git a/test/test_sorted_containers.jl b/test/test_sorted_containers.jl index 6099fe76c..ee22dfc5f 100644 --- a/test/test_sorted_containers.jl +++ b/test/test_sorted_containers.jl @@ -569,9 +569,30 @@ end # issue #216 @test DataStructures.isordered(SortedDict{Int, String}) + + + # check for get! and get + dfc = SortedDict{Int, Vector{Int}}() + x1 = get!(dfc,1,[1]) + @test x1 == [1] + @test x1 === dfc[1] + @test x1 === get!(dfc, 1, [1000]) + @test x1 === get(dfc, 1, [1000]) + + x2 = get!(()->[2], dfc, 2) + @test x2 == [2] + @test x2 === dfc[2] + @test x2 === get!(()->[1000], dfc, 2) + @test x2 === get(()->[1000], dfc, 2) + + @test [42] == get(()->[42], dfc, 3) + @test !haskey(dfc, 3) + @test [43] == get(dfc, 4, [43]) + @test !haskey(dfc, 4) end + function bitreverse(i) zeroi = zero(i) onei = one(i)