Skip to content

Commit

Permalink
Allow permutaion of names in rename! (#1974)
Browse files Browse the repository at this point in the history
  • Loading branch information
innerlee authored and nalimilan committed Oct 14, 2019
1 parent c3771d4 commit a237d23
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 7 deletions.
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
InvertedIndices = "41ab1584-1d38-5bbf-9106-f11c6c58b48f"
DataAPI = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
IteratorInterfaceExtensions = "82899510-4779-5014-852e-03e436cf321d"
Future = "9fa8497b-333b-5362-9e8d-4d0656e87820"
Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
PooledArrays = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
Expand Down
3 changes: 2 additions & 1 deletion src/DataFrames.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ using Tables, TableTraits, IteratorInterfaceExtensions
import DataAPI.All,
DataAPI.Between,
DataAPI.describe,
Tables.columnindex
Tables.columnindex,
Future.copy!

export AbstractDataFrame,
All,
Expand Down
6 changes: 3 additions & 3 deletions src/abstractdataframe/abstractdataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -150,21 +150,21 @@ rename(f::Function, df::AbstractDataFrame)
* `::AbstractDataFrame` : the updated result
New names are processed sequentially. A new name must not already exist in the `DataFrame`
at the moment an attempt to rename a column is performed.
Each name is changed at most once. Permutation of names is allowed.
**Examples**
```julia
df = DataFrame(i = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10))
rename(df, :i => :A, :x => :X)
rename(df, :x => :y, :y => :x)
rename(df, [:i => :A, :x => :X])
rename(df, Dict(:i => :A, :x => :X))
rename(x -> Symbol(uppercase(string(x))), df)
rename(df) do x
Symbol(uppercase(string(x)))
end
rename!(df, Dict(:i =>: A, :x => :X))
rename!(df, Dict(:i => :A, :x => :X))
```
"""
Expand Down
31 changes: 29 additions & 2 deletions src/other/index.jl
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,41 @@ function names!(x::Index, nms::Vector{Symbol}; makeunique::Bool=false)
end

function rename!(x::Index, nms)
xbackup = copy(x)
processedfrom = Set{Symbol}()
processedto = Set{Symbol}()
toholder = Dict{Symbol,Int}()
for (from, to) in nms
if from processedfrom
copy!(x.lookup, xbackup.lookup)
x.names .= xbackup.names
throw(ArgumentError("Tried renaming $from multiple times."))
end
if to processedto
copy!(x.lookup, xbackup.lookup)
x.names .= xbackup.names
throw(ArgumentError("Tried renaming to $to multiple times."))
end
push!(processedfrom, from)
push!(processedto, to)
from == to && continue # No change, nothing to do
if !haskey(xbackup, from)
copy!(x.lookup, xbackup.lookup)
x.names .= xbackup.names
throw(ArgumentError("Tried renaming $from to $to, when $from does not exist in the Index."))
end
if haskey(x, to)
error("Tried renaming $from to $to, when $to already exists in the Index.")
toholder[to] = x.lookup[to]
end
x.lookup[to] = col = pop!(x.lookup, from)
col = haskey(toholder, from) ? pop!(toholder, from) : pop!(x.lookup, from)
x.lookup[to] = col
x.names[col] = to
end
if !isempty(toholder)
copy!(x.lookup, xbackup.lookup)
x.names .= xbackup.names
throw(ArgumentError("Tried renaming to $(first(keys(toholder))), when it already exists in the Index."))
end
return x
end

Expand Down
65 changes: 64 additions & 1 deletion test/dataframe.jl
Original file line number Diff line number Diff line change
@@ -1,10 +1,43 @@
module TestDataFrame

using Dates, DataFrames, Statistics, Random, Test, Logging
using DataFrames: _columns
using DataFrames: _columns, index
const = isequal
const = !isequal

# randomized test from https://github.com/JuliaData/DataFrames.jl/pull/1974
@testset "randomized tests for rename!" begin
n = Symbol.('a':'z')
Random.seed!(1234)
for k in 1:20
sn = shuffle(n)
df = DataFrame(zeros(1,26), n)
p = Dict(Pair.(n, sn))
cyclelength = Int[]
for x in n
i = 0
y = x
while true
y = p[y]
i += 1
x == y && break
end
push!(cyclelength, i)
end
i = lcm(cyclelength)
while true
rename!(df, p)
@test sort(names(df)) == n
@test sort(collect(keys(index(df).lookup))) == n
@test sort(collect(values(index(df).lookup))) == 1:26
@test all(index(df).lookup[x] == i for (i,x) in enumerate(names(df)))
i -= 1
names(df) == n && break
end
@test i == 0
end
end

@testset "equality" begin
@test DataFrame(a=[1, 2, 3], b=[4, 5, 6]) == DataFrame(a=[1, 2, 3], b=[4, 5, 6])
@test DataFrame(a=[1, 2], b=[4, 5]) != DataFrame(a=[1, 2, 3], b=[4, 5, 6])
Expand Down Expand Up @@ -1127,6 +1160,36 @@ end
@test names(df) == [:A_4, :B_4]
@test rename!(x->Symbol(lowercase(string(x))), df) === df
@test names(df) == [:a_4, :b_4]

df = DataFrame(A = 1:3, B = 'A':'C', C = [:x, :y, :z])
@test rename!(df, :A => :B, :B => :A) === df
@test names(df) == [:B, :A, :C]
@test rename!(df, :A => :B, :B => :A, :C => :D) === df
@test names(df) == [:A, :B, :D]
@test rename!(df, :A => :B, :B => :C, :D => :A) === df
@test names(df) == [:B, :C, :A]
@test rename!(df, :A => :C, :B => :A, :C => :B) === df
@test names(df) == [:A, :B, :C]
@test rename!(df, :A => :A, :B => :B, :C => :C) === df
@test names(df) == [:A, :B, :C]

cdf = copy(df)
@test_throws ArgumentError rename!(df, :X => :Y)
@test df == cdf
@test_throws ArgumentError rename!(df, :A => :X, :X => :Y)
@test df == cdf
@test_throws ArgumentError rename!(df, :A => :B)
@test df == cdf
@test_throws ArgumentError rename!(df, :A => :X, :A => :X)
@test df == cdf
@test_throws ArgumentError rename!(df, :A => :X, :A => :Y)
@test df == cdf
@test_throws ArgumentError rename!(df, :A => :X, :B => :X)
@test df == cdf
@test_throws ArgumentError rename!(df, :A => :B, :B => :A, :C => :B)
@test df == cdf
@test_throws ArgumentError rename!(df, :A => :B, :B => :A, :A => :X)
@test df == cdf
end

@testset "size" begin
Expand Down

0 comments on commit a237d23

Please sign in to comment.