From a237d23a55ffcfa73d4be2e04c501702c1e10053 Mon Sep 17 00:00:00 2001 From: lizz Date: Tue, 15 Oct 2019 03:37:55 +0800 Subject: [PATCH] Allow permutaion of names in rename! (#1974) --- Project.toml | 1 + src/DataFrames.jl | 3 +- src/abstractdataframe/abstractdataframe.jl | 6 +- src/other/index.jl | 31 ++++++++++- test/dataframe.jl | 65 +++++++++++++++++++++- 5 files changed, 99 insertions(+), 7 deletions(-) diff --git a/Project.toml b/Project.toml index af169dd1bb..d05c4e9220 100644 --- a/Project.toml +++ b/Project.toml @@ -8,6 +8,7 @@ Compat = "34da2185-b29b-5c13-b0c7-acf172513d20" InvertedIndices = "41ab1584-1d38-5bbf-9106-f11c6c58b48f" DataAPI = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" IteratorInterfaceExtensions = "82899510-4779-5014-852e-03e436cf321d" +Future = "9fa8497b-333b-5362-9e8d-4d0656e87820" Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" PooledArrays = "2dfb63ee-cc39-5dd5-95bd-886bf059d720" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" diff --git a/src/DataFrames.jl b/src/DataFrames.jl index 5f7b6404c7..2009d7a8f0 100644 --- a/src/DataFrames.jl +++ b/src/DataFrames.jl @@ -21,7 +21,8 @@ using Tables, TableTraits, IteratorInterfaceExtensions import DataAPI.All, DataAPI.Between, DataAPI.describe, - Tables.columnindex + Tables.columnindex, + Future.copy! export AbstractDataFrame, All, diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl index f684eaa703..4ec8db7fb9 100644 --- a/src/abstractdataframe/abstractdataframe.jl +++ b/src/abstractdataframe/abstractdataframe.jl @@ -150,21 +150,21 @@ rename(f::Function, df::AbstractDataFrame) * `::AbstractDataFrame` : the updated result -New names are processed sequentially. A new name must not already exist in the `DataFrame` -at the moment an attempt to rename a column is performed. +Each name is changed at most once. Permutation of names is allowed. **Examples** ```julia df = DataFrame(i = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10)) rename(df, :i => :A, :x => :X) +rename(df, :x => :y, :y => :x) rename(df, [:i => :A, :x => :X]) rename(df, Dict(:i => :A, :x => :X)) rename(x -> Symbol(uppercase(string(x))), df) rename(df) do x Symbol(uppercase(string(x))) end -rename!(df, Dict(:i =>: A, :x => :X)) +rename!(df, Dict(:i => :A, :x => :X)) ``` """ diff --git a/src/other/index.jl b/src/other/index.jl index 5aea6d382e..5333c8cd52 100644 --- a/src/other/index.jl +++ b/src/other/index.jl @@ -46,14 +46,41 @@ function names!(x::Index, nms::Vector{Symbol}; makeunique::Bool=false) end function rename!(x::Index, nms) + xbackup = copy(x) + processedfrom = Set{Symbol}() + processedto = Set{Symbol}() + toholder = Dict{Symbol,Int}() for (from, to) in nms + if from ∈ processedfrom + copy!(x.lookup, xbackup.lookup) + x.names .= xbackup.names + throw(ArgumentError("Tried renaming $from multiple times.")) + end + if to ∈ processedto + copy!(x.lookup, xbackup.lookup) + x.names .= xbackup.names + throw(ArgumentError("Tried renaming to $to multiple times.")) + end + push!(processedfrom, from) + push!(processedto, to) from == to && continue # No change, nothing to do + if !haskey(xbackup, from) + copy!(x.lookup, xbackup.lookup) + x.names .= xbackup.names + throw(ArgumentError("Tried renaming $from to $to, when $from does not exist in the Index.")) + end if haskey(x, to) - error("Tried renaming $from to $to, when $to already exists in the Index.") + toholder[to] = x.lookup[to] end - x.lookup[to] = col = pop!(x.lookup, from) + col = haskey(toholder, from) ? pop!(toholder, from) : pop!(x.lookup, from) + x.lookup[to] = col x.names[col] = to end + if !isempty(toholder) + copy!(x.lookup, xbackup.lookup) + x.names .= xbackup.names + throw(ArgumentError("Tried renaming to $(first(keys(toholder))), when it already exists in the Index.")) + end return x end diff --git a/test/dataframe.jl b/test/dataframe.jl index 5983d43f1e..60eceefdf1 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -1,10 +1,43 @@ module TestDataFrame using Dates, DataFrames, Statistics, Random, Test, Logging -using DataFrames: _columns +using DataFrames: _columns, index const ≅ = isequal const ≇ = !isequal +# randomized test from https://github.com/JuliaData/DataFrames.jl/pull/1974 +@testset "randomized tests for rename!" begin + n = Symbol.('a':'z') + Random.seed!(1234) + for k in 1:20 + sn = shuffle(n) + df = DataFrame(zeros(1,26), n) + p = Dict(Pair.(n, sn)) + cyclelength = Int[] + for x in n + i = 0 + y = x + while true + y = p[y] + i += 1 + x == y && break + end + push!(cyclelength, i) + end + i = lcm(cyclelength) + while true + rename!(df, p) + @test sort(names(df)) == n + @test sort(collect(keys(index(df).lookup))) == n + @test sort(collect(values(index(df).lookup))) == 1:26 + @test all(index(df).lookup[x] == i for (i,x) in enumerate(names(df))) + i -= 1 + names(df) == n && break + end + @test i == 0 + end +end + @testset "equality" begin @test DataFrame(a=[1, 2, 3], b=[4, 5, 6]) == DataFrame(a=[1, 2, 3], b=[4, 5, 6]) @test DataFrame(a=[1, 2], b=[4, 5]) != DataFrame(a=[1, 2, 3], b=[4, 5, 6]) @@ -1127,6 +1160,36 @@ end @test names(df) == [:A_4, :B_4] @test rename!(x->Symbol(lowercase(string(x))), df) === df @test names(df) == [:a_4, :b_4] + + df = DataFrame(A = 1:3, B = 'A':'C', C = [:x, :y, :z]) + @test rename!(df, :A => :B, :B => :A) === df + @test names(df) == [:B, :A, :C] + @test rename!(df, :A => :B, :B => :A, :C => :D) === df + @test names(df) == [:A, :B, :D] + @test rename!(df, :A => :B, :B => :C, :D => :A) === df + @test names(df) == [:B, :C, :A] + @test rename!(df, :A => :C, :B => :A, :C => :B) === df + @test names(df) == [:A, :B, :C] + @test rename!(df, :A => :A, :B => :B, :C => :C) === df + @test names(df) == [:A, :B, :C] + + cdf = copy(df) + @test_throws ArgumentError rename!(df, :X => :Y) + @test df == cdf + @test_throws ArgumentError rename!(df, :A => :X, :X => :Y) + @test df == cdf + @test_throws ArgumentError rename!(df, :A => :B) + @test df == cdf + @test_throws ArgumentError rename!(df, :A => :X, :A => :X) + @test df == cdf + @test_throws ArgumentError rename!(df, :A => :X, :A => :Y) + @test df == cdf + @test_throws ArgumentError rename!(df, :A => :X, :B => :X) + @test df == cdf + @test_throws ArgumentError rename!(df, :A => :B, :B => :A, :C => :B) + @test df == cdf + @test_throws ArgumentError rename!(df, :A => :B, :B => :A, :A => :X) + @test df == cdf end @testset "size" begin