Allow permutaion of names in rename! (#1974)

JuliaData · Oct 14, 2019 · a237d23 · a237d23
1 parent c3771d4
commit a237d23
Show file tree

Hide file tree

Showing 5 changed files with 99 additions and 7 deletions.
diff --git a/Project.toml b/Project.toml
@@ -8,6 +8,7 @@ Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
 InvertedIndices = "41ab1584-1d38-5bbf-9106-f11c6c58b48f"
 DataAPI = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
 IteratorInterfaceExtensions = "82899510-4779-5014-852e-03e436cf321d"
+Future = "9fa8497b-333b-5362-9e8d-4d0656e87820"
 Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
 PooledArrays = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"

diff --git a/src/DataFrames.jl b/src/DataFrames.jl
@@ -21,7 +21,8 @@ using Tables, TableTraits, IteratorInterfaceExtensions
 import DataAPI.All,
        DataAPI.Between,
        DataAPI.describe,
-       Tables.columnindex
+       Tables.columnindex,
+       Future.copy!
 
 export AbstractDataFrame,
        All,

diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl
@@ -150,21 +150,21 @@ rename(f::Function, df::AbstractDataFrame)
 
 * `::AbstractDataFrame` : the updated result
 
-New names are processed sequentially. A new name must not already exist in the `DataFrame`
-at the moment an attempt to rename a column is performed.
+Each name is changed at most once. Permutation of names is allowed.
 
 **Examples**
 
 ```julia
 df = DataFrame(i = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10))
 rename(df, :i => :A, :x => :X)
+rename(df, :x => :y, :y => :x)
 rename(df, [:i => :A, :x => :X])
 rename(df, Dict(:i => :A, :x => :X))
 rename(x -> Symbol(uppercase(string(x))), df)
 rename(df) do x
     Symbol(uppercase(string(x)))
 end
-rename!(df, Dict(:i =>: A, :x => :X))
+rename!(df, Dict(:i => :A, :x => :X))
 ```
 
 """

diff --git a/src/other/index.jl b/src/other/index.jl
@@ -46,14 +46,41 @@ function names!(x::Index, nms::Vector{Symbol}; makeunique::Bool=false)
 end
 
 function rename!(x::Index, nms)
+    xbackup = copy(x)
+    processedfrom = Set{Symbol}()
+    processedto = Set{Symbol}()
+    toholder = Dict{Symbol,Int}()
     for (from, to) in nms
+        if from ∈ processedfrom
+            copy!(x.lookup, xbackup.lookup)
+            x.names .= xbackup.names
+            throw(ArgumentError("Tried renaming $from multiple times."))
+        end
+        if to ∈ processedto
+            copy!(x.lookup, xbackup.lookup)
+            x.names .= xbackup.names
+            throw(ArgumentError("Tried renaming to $to multiple times."))
+        end
+        push!(processedfrom, from)
+        push!(processedto, to)
         from == to && continue # No change, nothing to do
+        if !haskey(xbackup, from)
+            copy!(x.lookup, xbackup.lookup)
+            x.names .= xbackup.names
+            throw(ArgumentError("Tried renaming $from to $to, when $from does not exist in the Index."))
+        end
         if haskey(x, to)
-            error("Tried renaming $from to $to, when $to already exists in the Index.")
+            toholder[to] = x.lookup[to]
         end
-        x.lookup[to] = col = pop!(x.lookup, from)
+        col = haskey(toholder, from) ? pop!(toholder, from) : pop!(x.lookup, from)
+        x.lookup[to] = col
         x.names[col] = to
     end
+    if !isempty(toholder)
+        copy!(x.lookup, xbackup.lookup)
+        x.names .= xbackup.names
+        throw(ArgumentError("Tried renaming to $(first(keys(toholder))), when it already exists in the Index."))
+    end
     return x
 end
 

diff --git a/test/dataframe.jl b/test/dataframe.jl
@@ -1,10 +1,43 @@
 module TestDataFrame
 
 using Dates, DataFrames, Statistics, Random, Test, Logging
-using DataFrames: _columns
+using DataFrames: _columns, index
 const ≅ = isequal
 const ≇ = !isequal
 
+# randomized test from https://github.com/JuliaData/DataFrames.jl/pull/1974
+@testset "randomized tests for rename!" begin
+    n = Symbol.('a':'z')
+    Random.seed!(1234)
+    for k in 1:20
+        sn = shuffle(n)
+        df = DataFrame(zeros(1,26), n)
+        p = Dict(Pair.(n, sn))
+        cyclelength = Int[]
+        for x in n
+            i = 0
+            y = x
+            while true
+                y = p[y]
+                i += 1
+                x == y && break
+            end
+            push!(cyclelength, i)
+        end
+        i = lcm(cyclelength)
+        while true
+            rename!(df, p)
+            @test sort(names(df)) == n
+            @test sort(collect(keys(index(df).lookup))) == n
+            @test sort(collect(values(index(df).lookup))) == 1:26
+            @test all(index(df).lookup[x] == i for (i,x) in enumerate(names(df)))
+            i -= 1
+            names(df) == n && break
+        end
+        @test i == 0
+    end
+end
+
 @testset "equality" begin
     @test DataFrame(a=[1, 2, 3], b=[4, 5, 6]) == DataFrame(a=[1, 2, 3], b=[4, 5, 6])
     @test DataFrame(a=[1, 2], b=[4, 5]) != DataFrame(a=[1, 2, 3], b=[4, 5, 6])
@@ -1127,6 +1160,36 @@ end
     @test names(df) == [:A_4, :B_4]
     @test rename!(x->Symbol(lowercase(string(x))), df) === df
     @test names(df) == [:a_4, :b_4]
+
+    df = DataFrame(A = 1:3, B = 'A':'C', C = [:x, :y, :z])
+    @test rename!(df, :A => :B, :B => :A) === df
+    @test names(df) == [:B, :A, :C]
+    @test rename!(df, :A => :B, :B => :A, :C => :D) === df
+    @test names(df) == [:A, :B, :D]
+    @test rename!(df, :A => :B, :B => :C, :D => :A) === df
+    @test names(df) == [:B, :C, :A]
+    @test rename!(df, :A => :C, :B => :A, :C => :B) === df
+    @test names(df) == [:A, :B, :C]
+    @test rename!(df, :A => :A, :B => :B, :C => :C) === df
+    @test names(df) == [:A, :B, :C]
+
+    cdf = copy(df)
+    @test_throws ArgumentError rename!(df, :X => :Y)
+    @test df == cdf
+    @test_throws ArgumentError rename!(df, :A => :X, :X => :Y)
+    @test df == cdf
+    @test_throws ArgumentError rename!(df, :A => :B)
+    @test df == cdf
+    @test_throws ArgumentError rename!(df, :A => :X, :A => :X)
+    @test df == cdf
+    @test_throws ArgumentError rename!(df, :A => :X, :A => :Y)
+    @test df == cdf
+    @test_throws ArgumentError rename!(df, :A => :X, :B => :X)
+    @test df == cdf
+    @test_throws ArgumentError rename!(df, :A => :B, :B => :A, :C => :B)
+    @test df == cdf
+    @test_throws ArgumentError rename!(df, :A => :B, :B => :A, :A => :X)
+    @test df == cdf
 end
 
 @testset "size" begin