Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow permutaion of names in rename! #1974

Merged
merged 8 commits into from
Oct 14, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
InvertedIndices = "41ab1584-1d38-5bbf-9106-f11c6c58b48f"
DataAPI = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
IteratorInterfaceExtensions = "82899510-4779-5014-852e-03e436cf321d"
Future = "9fa8497b-333b-5362-9e8d-4d0656e87820"
Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
PooledArrays = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
Expand Down
3 changes: 2 additions & 1 deletion src/DataFrames.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ using Tables, TableTraits, IteratorInterfaceExtensions
import DataAPI.All,
DataAPI.Between,
DataAPI.describe,
Tables.columnindex
Tables.columnindex,
Future.copy!

export AbstractDataFrame,
All,
Expand Down
6 changes: 3 additions & 3 deletions src/abstractdataframe/abstractdataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -150,21 +150,21 @@ rename(f::Function, df::AbstractDataFrame)

* `::AbstractDataFrame` : the updated result

New names are processed sequentially. A new name must not already exist in the `DataFrame`
at the moment an attempt to rename a column is performed.
Each name is changed at most once. Permutation of names is allowed.

**Examples**

```julia
df = DataFrame(i = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10))
rename(df, :i => :A, :x => :X)
rename(df, :x => :y, :y => :x)
rename(df, [:i => :A, :x => :X])
rename(df, Dict(:i => :A, :x => :X))
rename(x -> Symbol(uppercase(string(x))), df)
rename(df) do x
Symbol(uppercase(string(x)))
end
rename!(df, Dict(:i =>: A, :x => :X))
rename!(df, Dict(:i => :A, :x => :X))
```

"""
Expand Down
31 changes: 29 additions & 2 deletions src/other/index.jl
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,41 @@ function names!(x::Index, nms::Vector{Symbol}; makeunique::Bool=false)
end

function rename!(x::Index, nms)
xbackup = copy(x)
processedfrom = Set{Symbol}()
processedto = Set{Symbol}()
toholder = Dict{Symbol,Int}()
for (from, to) in nms
if from ∈ processedfrom
copy!(x.lookup, xbackup.lookup)
x.names .= xbackup.names
throw(ArgumentError("Tried renaming $from multiple times."))
end
if to ∈ processedto
copy!(x.lookup, xbackup.lookup)
x.names .= xbackup.names
throw(ArgumentError("Tried renaming to $to multiple times."))
end
push!(processedfrom, from)
push!(processedto, to)
from == to && continue # No change, nothing to do
if !haskey(xbackup, from)
copy!(x.lookup, xbackup.lookup)
x.names .= xbackup.names
throw(ArgumentError("Tried renaming $from to $to, when $from does not exist in the Index."))
end
if haskey(x, to)
error("Tried renaming $from to $to, when $to already exists in the Index.")
toholder[to] = x.lookup[to]
end
x.lookup[to] = col = pop!(x.lookup, from)
col = haskey(toholder, from) ? pop!(toholder, from) : pop!(x.lookup, from)
x.lookup[to] = col
x.names[col] = to
end
if !isempty(toholder)
copy!(x.lookup, xbackup.lookup)
x.names .= xbackup.names
throw(ArgumentError("Tried renaming to $(first(keys(toholder))), when it already exists in the Index."))
end
return x
end

Expand Down
65 changes: 64 additions & 1 deletion test/dataframe.jl
Original file line number Diff line number Diff line change
@@ -1,10 +1,43 @@
module TestDataFrame

using Dates, DataFrames, Statistics, Random, Test, Logging
using DataFrames: _columns
using DataFrames: _columns, index
const ≅ = isequal
const ≇ = !isequal

# randomized test from https://github.com/JuliaData/DataFrames.jl/pull/1974
@testset "randomized tests for rename!" begin
n = Symbol.('a':'z')
Random.seed!(1234)
for k in 1:20
sn = shuffle(n)
df = DataFrame(zeros(1,26), n)
p = Dict(Pair.(n, sn))
cyclelength = Int[]
for x in n
i = 0
y = x
while true
y = p[y]
i += 1
x == y && break
end
push!(cyclelength, i)
end
i = lcm(cyclelength)
while true
rename!(df, p)
@test sort(names(df)) == n
@test sort(collect(keys(index(df).lookup))) == n
@test sort(collect(values(index(df).lookup))) == 1:26
@test all(index(df).lookup[x] == i for (i,x) in enumerate(names(df)))
i -= 1
names(df) == n && break
end
@test i == 0
end
end

@testset "equality" begin
@test DataFrame(a=[1, 2, 3], b=[4, 5, 6]) == DataFrame(a=[1, 2, 3], b=[4, 5, 6])
@test DataFrame(a=[1, 2], b=[4, 5]) != DataFrame(a=[1, 2, 3], b=[4, 5, 6])
Expand Down Expand Up @@ -1127,6 +1160,36 @@ end
@test names(df) == [:A_4, :B_4]
@test rename!(x->Symbol(lowercase(string(x))), df) === df
@test names(df) == [:a_4, :b_4]

df = DataFrame(A = 1:3, B = 'A':'C', C = [:x, :y, :z])
@test rename!(df, :A => :B, :B => :A) === df
@test names(df) == [:B, :A, :C]
@test rename!(df, :A => :B, :B => :A, :C => :D) === df
@test names(df) == [:A, :B, :D]
@test rename!(df, :A => :B, :B => :C, :D => :A) === df
@test names(df) == [:B, :C, :A]
@test rename!(df, :A => :C, :B => :A, :C => :B) === df
@test names(df) == [:A, :B, :C]
@test rename!(df, :A => :A, :B => :B, :C => :C) === df
@test names(df) == [:A, :B, :C]

cdf = copy(df)
@test_throws ArgumentError rename!(df, :X => :Y)
@test df == cdf
@test_throws ArgumentError rename!(df, :A => :X, :X => :Y)
@test df == cdf
@test_throws ArgumentError rename!(df, :A => :B)
@test df == cdf
@test_throws ArgumentError rename!(df, :A => :X, :A => :X)
@test df == cdf
@test_throws ArgumentError rename!(df, :A => :X, :A => :Y)
@test df == cdf
@test_throws ArgumentError rename!(df, :A => :X, :B => :X)
@test df == cdf
@test_throws ArgumentError rename!(df, :A => :B, :B => :A, :C => :B)
@test df == cdf
@test_throws ArgumentError rename!(df, :A => :B, :B => :A, :A => :X)
@test df == cdf
end

@testset "size" begin
Expand Down