Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

deprecate names! #1986

Merged
merged 5 commits into from
Oct 17, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion docs/src/lib/functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ filter!
hcat
insertcols!
mapcols
names!
nonunique
nrow
ncol
Expand Down
1 change: 0 additions & 1 deletion src/DataFrames.jl
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ export AbstractDataFrame,
mapcols,
melt,
meltdf,
names!,
ncol,
nonunique,
nrow,
Expand Down
50 changes: 15 additions & 35 deletions src/abstractdataframe/abstractdataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ The following are normally implemented for AbstractDataFrames:
* `vcat` : vertical concatenation
* [`repeat`](@ref) : repeat rows
* `names` : columns names
* [`names!`](@ref) : set columns names
* [`rename!`](@ref) : rename columns names based on keyword arguments
* `length` : number of columns
* `size` : (nrows, ncols)
Expand Down Expand Up @@ -75,40 +74,8 @@ _names(df::AbstractDataFrame) = _names(index(df))

Compat.hasproperty(df::AbstractDataFrame, s::Symbol) = haskey(index(df), s)

"""
Set column names


```julia
names!(df::AbstractDataFrame, vals)
```

**Arguments**

* `df` : the AbstractDataFrame
* `vals` : column names, normally a Vector{Symbol} the same length as
the number of columns in `df`
* `makeunique` : if `false` (the default), an error will be raised
if duplicate names are found; if `true`, duplicate names will be suffixed
with `_i` (`i` starting at 1 for the first duplicate).

**Result**

* `::AbstractDataFrame` : the updated result


**Examples**

```julia
df = DataFrame(i = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10))
names!(df, [:a, :b, :c])
names!(df, [:a, :b, :a]) # throws ArgumentError
names!(df, [:a, :b, :a], makeunique=true) # renames second :a to :a_1
```

"""
function names!(df::AbstractDataFrame, vals; makeunique::Bool=false)
names!(index(df), vals, makeunique=makeunique)
function rename!(df::AbstractDataFrame, vals::Vector{Symbol}; makeunique::Bool=false)
bkamins marked this conversation as resolved.
Show resolved Hide resolved
rename!(index(df), vals, makeunique=makeunique)
return df
end

Expand All @@ -121,17 +88,21 @@ function rename!(f::Function, df::AbstractDataFrame)
return df
end

rename(df::AbstractDataFrame, vals::Vector{Symbol}; makeunique::Bool=false) =
rename!(copy(df), vals, makeunique=makeunique)
rename(df::AbstractDataFrame, args...) = rename!(copy(df), args...)
rename(f::Function, df::AbstractDataFrame) = rename!(f, copy(df))

"""
Rename columns

```julia
rename!(df::AbstractDataFrame, vals::Vector{Symbol}; makeunique::Bool=false)
bkamins marked this conversation as resolved.
Show resolved Hide resolved
rename!(df::AbstractDataFrame, (from => to)::Pair{Symbol, Symbol}...)
rename!(df::AbstractDataFrame, d::AbstractDict{Symbol,Symbol})
rename!(df::AbstractDataFrame, d::AbstractArray{Pair{Symbol,Symbol}})
rename!(f::Function, df::AbstractDataFrame)
rename(df::AbstractDataFrame, vals::Vector{Symbol}; makeunique::Bool=false)
bkamins marked this conversation as resolved.
Show resolved Hide resolved
rename(df::AbstractDataFrame, (from => to)::Pair{Symbol, Symbol}...)
rename(df::AbstractDataFrame, d::AbstractDict{Symbol,Symbol})
rename(df::AbstractDataFrame, d::AbstractArray{Pair{Symbol,Symbol}})
Expand All @@ -145,6 +116,11 @@ rename(f::Function, df::AbstractDataFrame)
the original names to new names
* `f` : a function which for each column takes the old name (a Symbol)
and returns the new name (a Symbol)
* `vals` : column names, normally a Vector{Symbol} the same length as
bkamins marked this conversation as resolved.
Show resolved Hide resolved
the number of columns in `df`
* `makeunique` : if `false` (the default), an error will be raised
if duplicate names are found; if `true`, duplicate names will be suffixed
with `_i` (`i` starting at 1 for the first duplicate).

**Result**

Expand All @@ -165,6 +141,10 @@ rename(df) do x
Symbol(uppercase(string(x)))
end
rename!(df, Dict(:i => :A, :x => :X))

rename!(df, [:a, :b, :c])
rename!(df, [:a, :b, :a]) # throws ArgumentError
rename(df, [:a, :b, :a], makeunique=true) # renames second :a to :a_1
```

"""
Expand Down
2 changes: 2 additions & 0 deletions src/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1544,3 +1544,5 @@ import Base: setproperty!
@deprecate eltypes(df::AbstractDataFrame) eltype.(eachcol(df))

@deprecate permutecols!(df::DataFrame, p::AbstractVector) select!(df, p)
@deprecate names!(x::Index, nms::Vector{Symbol}; makeunique::Bool=false) rename!(x, nms, makeunique=makeunique)
@deprecate names!(df::AbstractDataFrame, vals::Vector{Symbol}; makeunique::Bool=false) rename!(df, vals, makeunique=makeunique)
4 changes: 3 additions & 1 deletion src/other/index.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ Base.isequal(x::AbstractIndex, y::AbstractIndex) = _names(x) == _names(y) # it i
Base.:(==)(x::AbstractIndex, y::AbstractIndex) = isequal(x, y)


function names!(x::Index, nms::Vector{Symbol}; makeunique::Bool=false)
function rename!(x::Index, nms::Vector{Symbol}; makeunique::Bool=false)
if !makeunique
if length(unique(nms)) != length(nms)
dup = unique(nms[nonunique(DataFrame(nms=nms))])
Expand Down Expand Up @@ -87,6 +87,8 @@ end
rename!(x::Index, nms::Pair{Symbol,Symbol}...) = rename!(x::Index, collect(nms))
rename!(f::Function, x::Index) = rename!(x, [(x=>f(x)) for x in x.names])

rename(x::Index, nms::Vector{Symbol}; makeunique::Bool=false) =
rename!(copy(x), nms, makeunique=makeunique)
rename(x::Index, args...) = rename!(copy(x), args...)
rename(f::Function, x::Index) = rename!(f, copy(x))

Expand Down
2 changes: 1 addition & 1 deletion test/broadcasting.jl
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ end
@test_throws DimensionMismatch dfv .+ rand(2,2)

df2 = copy(df)
names!(df2, [:x1, :x2, :x3, :x4, :y])
rename!(df2, [:x1, :x2, :x3, :x4, :y])
@test_throws ArgumentError df .+ df2
@test_throws ArgumentError df .+ 1 .+ df2
end
Expand Down
6 changes: 3 additions & 3 deletions test/constructors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -74,14 +74,14 @@ const ≅ = isequal
df2 = convert(DataFrame, Union{Float64, Missing}[0.0 1.0;
0.0 1.0;
0.0 1.0])
names!(df2, [:x1, :x2])
rename!(df2, [:x1, :x2])
@test df[!, :x1] == df2[!, :x1]
@test df[!, :x2] == df2[!, :x2]

df2 = DataFrame([0.0 1.0;
0.0 1.0;
0.0 1.0])
names!(df2, [:x1, :x2])
rename!(df2, [:x1, :x2])
@test df[!, :x1] == df2[!, :x1]
@test df[!, :x2] == df2[!, :x2]

Expand All @@ -92,7 +92,7 @@ const ≅ = isequal
df2 = DataFrame([0.0 1.0;
0.0 1.0;
0.0 1.0], [:a, :b])
names!(df2, [:a, :b])
rename!(df2, [:a, :b])
@test df[!, :x1] == df2[!, :a]
@test df[!, :x2] == df2[!, :b]

Expand Down
39 changes: 37 additions & 2 deletions test/dataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,36 @@ const ≇ = !isequal
end
end

@testset "additional rename! tests" begin
Random.seed!(123)
for i in 1:1000
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need this brute-force approach? Can't you just choose a set of values that cover all possible cases?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It takes 0.1 second to run these 1000 tests. I add this because I was not sure if we cover all possible cases manually (the code for rename! in #1974 is tricky, so I prefer to be on a safe side).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, but are you sure that with 1000 iterations we cover all cases? :-)

Anyway, please add a comment explaining a bit why we do this.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for the docs fixes - a keen eye as always 😄.

I have added a note in the test. In general I am convinced that 1000 should cover all cases (I have chosen column count - 8 and names set so that with high probability all cases are covered). But I am not sure 😢, if I were sure I would not write a randomized test.

In general we want to be sure that cyclical renames like :X=>:Y, :Y=>:Z, :Z=>:X for various cycle lengths are processed correctly and also that non cyclical renames like :X=>:Y, :Z=>:X are processed correctly also when they happen in combination and in the presence of other columns that are not renamed and in two subcategories: a) the proposed renaming scheme is valid and b) the proposed renaming scheme is invalid (so as you see there are really many possible combinations of cases and it is really hard to make sure that you cover all of them if you do not generate the tests randomly; for sure the existing tests have not covered all the possibilities).

oldnames = Symbol.(rand('a':'z', 8))
while !allunique(oldnames)
oldnames .= Symbol.(rand('a':'z', 8))
end
newnames = [Symbol.(rand('a':'z', 4)); oldnames[5:end]]
df = DataFrame([[] for i in 1:8], oldnames)
if allunique(newnames)
@test names(rename(df, Pair.(oldnames[1:4], newnames[1:4])...)) == newnames
@test names(df) == oldnames
rename!(df, Pair.(oldnames[1:4], newnames[1:4])...)
@test names(df) == newnames
else
@test_throws ArgumentError rename(df, Pair.(oldnames[1:4], newnames[1:4])...)
@test names(df) == oldnames
@test_throws ArgumentError rename!(df, Pair.(oldnames[1:4], newnames[1:4])...)
@test names(df) == oldnames
end

newnames = [oldnames[1:2]; reverse(oldnames[3:6]); oldnames[7:end]]
df = DataFrame([[] for i in 1:8], oldnames)
@test names(rename(df, Pair.(oldnames[3:6], newnames[3:6])...)) == newnames
@test names(df) == oldnames
rename!(df, Pair.(oldnames[3:6], newnames[3:6])...)
@test names(df) == newnames
end
end

@testset "equality" begin
@test DataFrame(a=[1, 2, 3], b=[4, 5, 6]) == DataFrame(a=[1, 2, 3], b=[4, 5, 6])
@test DataFrame(a=[1, 2], b=[4, 5]) != DataFrame(a=[1, 2, 3], b=[4, 5, 6])
Expand All @@ -58,7 +88,12 @@ end

df[1, :a] = 4
df[1, :b][!, :e] .= 5
names!(df, [:f, :g])

@test names(rename(df, [:f, :g])) == [:f, :g]
@test names(rename(df, [:f, :f], makeunique=true)) == [:f, :f_1]
@test names(df) == [:a, :b]

rename!(df, [:f, :g])

@test names(dfc) == [:a, :b]
@test names(dfdc) == [:a, :b]
Expand Down Expand Up @@ -1043,7 +1078,7 @@ end
@test df == dfc
@test occursin("Error adding value to column a", String(take!(buf)))

names!(df, [:a, :b, :z])
rename!(df, [:a, :b, :z])
@test_throws ArgumentError append!(df, dfc)
end

Expand Down
15 changes: 10 additions & 5 deletions test/index.jl
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,14 @@ end
@test i[Not(1:0)] == 1:length(i)

@test names(i) == [:A,:B]
@test names!(i, [:a,:a], makeunique=true) == Index([:a,:a_1])
@test_throws ArgumentError names!(i, [:a,:a])
@test names!(i, [:a,:b]) == Index([:a,:b])
@test rename(i, [:a,:a], makeunique=true) == Index([:a,:a_1])
@test names(i) == [:A,:B]
@test rename!(i, [:a,:a], makeunique=true) == Index([:a,:a_1])
@test_throws ArgumentError rename(i, [:a,:a])
@test_throws ArgumentError rename!(i, [:a,:a])
@test rename(i, [:a,:b]) == Index([:a,:b])
@test names(i) == [:a,:a_1]
@test rename!(i, [:a,:b]) == Index([:a,:b])
@test rename(i, Dict(:a=>:A, :b=>:B)) == Index([:A,:B])
@test rename(i, :a => :A) == Index([:A,:b])
@test rename(i, :a => :a) == Index([:a,:b])
Expand All @@ -89,8 +94,8 @@ push!(i, :C)

i = Index([:A, :B, :C, :D, :E])
i2 = copy(i)
names!(i2, reverse(names(i2)))
names!(i2, reverse(names(i2)))
rename!(i2, reverse(names(i2)))
rename!(i2, reverse(names(i2)))
@test names(i2) == names(i)
for name in names(i)
i2[name] # Issue #715
Expand Down
2 changes: 1 addition & 1 deletion test/iteration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ end
sdf = view(df, [3,1,4], [3,1,4])
erd = eachrow(df)
erv = eachrow(sdf)
names!(df, Symbol.(string.("y", 1:4)))
rename!(df, Symbol.(string.("y", 1:4)))
df[!, 1] = 51:56
@test df[1, :] == erd[1]
@test copy(erv[1]) == (y3=33, y1=53, y4=43)
Expand Down
6 changes: 3 additions & 3 deletions test/reshape.jl
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ const ≅ = isequal
c = randn(12),
d = randn(12))
w2 = wide[:, [1, 2, 4, 5]]
names!(w2, [:id, :a, :_C_, :_D_])
rename!(w2, [:id, :a, :_C_, :_D_])
long = stack(wide)
wide3 = unstack(long, [:id, :a], :variable, :value)
@test wide3 == wide[:, [1, 2, 4, 5]]
Expand Down Expand Up @@ -329,9 +329,9 @@ end
c = randn(12),
d = randn(12))
w1 = wide[:, [1, 2, 4, 5]]
names!(w1, [:id, :a, :D, :d])
rename!(w1, [:id, :a, :D, :d])
w2 = wide[:, [1, 2, 4, 5]]
names!(w2, [:id, :a, :_D_, :_d_])
rename!(w2, [:id, :a, :_D_, :_d_])
long = stack(wide)
long.variable = replace.(string.(long.variable), Ref("c" => "D"))
wide3 = unstack(long, [:id, :a], :variable, :value)
Expand Down