From 38027c7ff90288c802fe64751e7ec5412e1ea973 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Sat, 5 Sep 2020 10:55:01 +0200
Subject: [PATCH] add renamecols to select/transform/combine (#2397)

---
 NEWS.md                                   |   4 +
 src/abstractdataframe/selection.jl        | 181 +++++++++++++---------
 src/deprecated.jl                         |  38 ++---
 src/groupeddataframe/splitapplycombine.jl | 148 +++++++++---------
 test/grouping.jl                          |  29 +++-
 test/select.jl                            |  21 +++
 6 files changed, 257 insertions(+), 164 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index c0f0a3ce74..2c650aae2a 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -55,6 +55,10 @@
   ([#2373](https://github.com/JuliaData/DataFrames.jl/pull/2373))
 * add `columnindex` for `DataFrameRow`
   ([#2380](https://github.com/JuliaData/DataFrames.jl/pull/2380))
+* `select`, `select!`, `transform`, `transform!` and `combine` now allow `renamecols`
+  keyword argument that makes it possible to avoid adding transformation function name
+  as a suffix in automatically generated column names
+  ([#2397](https://github.com/JuliaData/DataFrames.jl/pull/2397))
 
 ## Deprecated
 
diff --git a/src/abstractdataframe/selection.jl b/src/abstractdataframe/selection.jl
index ad3043be84..0469b87fb3 100644
--- a/src/abstractdataframe/selection.jl
+++ b/src/abstractdataframe/selection.jl
@@ -34,7 +34,7 @@ _by_row_helper(x::Union{NamedTuple, DataFrameRow}) =
 # add a method to funname defined in other/utils.jl
 funname(row::ByRow) = funname(row.fun)
 
-normalize_selection(idx::AbstractIndex, sel) =
+normalize_selection(idx::AbstractIndex, sel, renamecols::Bool) =
     try
         idx[sel]
     catch e
@@ -45,28 +45,33 @@ normalize_selection(idx::AbstractIndex, sel) =
         end
     end
 
-normalize_selection(idx::AbstractIndex, sel::Pair{typeof(nrow), Symbol}) =
+normalize_selection(idx::AbstractIndex, sel::Pair{typeof(nrow), Symbol},
+                    renamecols::Bool) =
     length(idx) == 0 ? (Int[] => (() -> 0) => last(sel)) : (1 => length => last(sel))
-normalize_selection(idx::AbstractIndex, sel::Pair{typeof(nrow), <:AbstractString}) =
-    normalize_selection(idx, first(sel) => Symbol(last(sel)))
-normalize_selection(idx::AbstractIndex, sel::typeof(nrow)) =
-    normalize_selection(idx, nrow => :nrow)
+normalize_selection(idx::AbstractIndex, sel::Pair{typeof(nrow), <:AbstractString},
+                    renamecols::Bool) =
+    normalize_selection(idx, first(sel) => Symbol(last(sel)), renamecols)
+normalize_selection(idx::AbstractIndex, sel::typeof(nrow), renamecols::Bool) =
+    normalize_selection(idx, nrow => :nrow, renamecols)
 
-function normalize_selection(idx::AbstractIndex, sel::ColumnIndex)
+function normalize_selection(idx::AbstractIndex, sel::ColumnIndex, renamecols::Bool)
     c = idx[sel]
     return c => identity => _names(idx)[c]
 end
 
-function normalize_selection(idx::AbstractIndex, sel::Pair{<:ColumnIndex, Symbol})
+function normalize_selection(idx::AbstractIndex, sel::Pair{<:ColumnIndex, Symbol},
+                             renamecols::Bool)
     c = idx[first(sel)]
     return c => identity => last(sel)
 end
 
-normalize_selection(idx::AbstractIndex, sel::Pair{<:ColumnIndex, <:AbstractString}) =
-    normalize_selection(idx, first(sel) => Symbol(last(sel)))
+normalize_selection(idx::AbstractIndex, sel::Pair{<:ColumnIndex, <:AbstractString},
+                    renamecols::Bool) =
+    normalize_selection(idx, first(sel) => Symbol(last(sel)), renamecols::Bool)
 
 function normalize_selection(idx::AbstractIndex,
-                             sel::Pair{<:Any,<:Pair{<:Base.Callable, Symbol}})
+                             sel::Pair{<:Any,<:Pair{<:Base.Callable, Symbol}},
+                             renamecols::Bool)
     if first(sel) isa AsTable
         rawc = first(sel).cols
         wanttable = true
@@ -97,19 +102,25 @@ function normalize_selection(idx::AbstractIndex,
 end
 
 normalize_selection(idx::AbstractIndex,
-                    sel::Pair{<:Any,<:Pair{<:Base.Callable,<:AbstractString}}) =
-    normalize_selection(idx, first(sel) => first(last(sel)) => Symbol(last(last(sel))))
+                    sel::Pair{<:Any,<:Pair{<:Base.Callable,<:AbstractString}},
+                    renamecols::Bool) =
+    normalize_selection(idx, first(sel) => first(last(sel)) => Symbol(last(last(sel))),
+                        renamecols::Bool)
 
 function normalize_selection(idx::AbstractIndex,
-                             sel::Pair{<:ColumnIndex,<:Base.Callable})
+                             sel::Pair{<:ColumnIndex,<:Base.Callable}, renamecols::Bool)
     c = idx[first(sel)]
     fun = last(sel)
-    newcol = Symbol(_names(idx)[c], "_", funname(fun))
+    if renamecols
+        newcol = Symbol(_names(idx)[c], "_", funname(fun))
+    else
+        newcol = _names(idx)[c]
+    end
     return c => fun => newcol
 end
 
 function normalize_selection(idx::AbstractIndex,
-                             sel::Pair{<:Any, <:Base.Callable})
+                             sel::Pair{<:Any, <:Base.Callable}, renamecols::Bool)
     if first(sel) isa AsTable
         rawc = first(sel).cols
         wanttable = true
@@ -138,11 +149,23 @@ function normalize_selection(idx::AbstractIndex,
     end
     fun = last(sel)
     if length(c) > 3
-        newcol = Symbol(join(@views(_names(idx)[c[1:2]]), '_'), "_etc_", funname(fun))
+        prefix = join(@views(_names(idx)[c[1:2]]), '_')
+        if renamecols
+            newcol = Symbol(prefix, "_etc_", funname(fun))
+        else
+            newcol = Symbol(prefix, "_etc")
+        end
     elseif isempty(c)
+        renamecols || throw(ArgumentError("when renamecols=false target column name " *
+                                          "must be passed if there are no input columns"))
         newcol = Symbol(funname(fun))
     else
-        newcol = Symbol(join(view(_names(idx), c), '_'), '_', funname(fun))
+        prefix = join(view(_names(idx), c), '_')
+        if renamecols
+            newcol = Symbol(prefix, '_', funname(fun))
+        else
+            newcol = Symbol(prefix)
+        end
     end
     return (wanttable ? AsTable(c) : c) => fun => newcol
 end
@@ -251,10 +274,14 @@ SELECT_ARG_RULES =
 
     Column transformation can also be specified using the short `old_column =>
     fun` form. In this case, `new_column_name` is automatically generated as
-    `\$(old_column)_\$(fun)`. Up to three column names are used for multiple
-    input columns and they are joined using `_`; if more than three columns are
-    passed then the name consists of the first two names and `etc` suffix then,
-    e.g. `[:a,:b,:c,:d] => fun` produces the new column name `:a_b_etc_fun`.
+    `\$(old_column)_\$(fun)` if `renamecols=true` and `\$(old_column)` if
+    `renamecols=false`. Up to three column names are used for multiple input
+    columns and they are joined using `_`; if more than three columns are passed
+    then the name consists of the first two names and `etc` suffix then, e.g.
+    `[:a,:b,:c,:d] => fun` produces the new column name `:a_b_etc_fun` if
+    `renamecols=true` and ``:a_b_etc` if `renamecols=false`.
+    It is not allowed to pass `renamecols=false` if `old_column` is empty
+    as it would generate an empty column name.
 
     Column renaming and transformation operations can be passed wrapped in
     vectors (this is useful when combined with broadcasting).
@@ -275,7 +302,7 @@ SELECT_ARG_RULES =
     """
 
 """
-    select!(df::DataFrame, args...)
+    select!(df::DataFrame, args...; renamecols::Bool=true)
 
 Mutate `df` in place to retain only columns specified by `args...` and return it.
 The result is guaranteed to have the same number of rows as `df`, except when no
@@ -345,22 +372,22 @@ julia> df = DataFrame(a=1:3, b=4:6);
 
 julia> using Statistics
 
-julia> select!(df, AsTable(:) => ByRow(mean))
+julia> select!(df, AsTable(:) => ByRow(mean), renamecols=false)
 3×1 DataFrame
-│ Row │ a_b_mean │
-│     │ Float64  │
-├─────┼──────────┤
-│ 1   │ 2.5      │
-│ 2   │ 3.5      │
-│ 3   │ 4.5      │
+│ Row │ a_b     │
+│     │ Float64 │
+├─────┼─────────┤
+│ 1   │ 2.5     │
+│ 2   │ 3.5     │
+│ 3   │ 4.5     │
 ```
 
 """
-select!(df::DataFrame, args...) =
-    _replace_columns!(df, select(df, args..., copycols=false))
+select!(df::DataFrame, args...; renamecols::Bool=true) =
+    _replace_columns!(df, select(df, args..., copycols=false, renamecols=renamecols))
 
 """
-    transform!(df::DataFrame, args...)
+    transform!(df::DataFrame, args...; renamecols::Bool=true)
 
 Mutate `df` in place to add columns specified by `args...` and return it.
 The result is guaranteed to have the same number of rows as `df`.
@@ -368,10 +395,11 @@ Equivalent to `select!(df, :, args...)`.
 
 See [`select!`](@ref) for detailed rules regarding accepted values for `args`.
 """
-transform!(df::DataFrame, args...) = select!(df, :, args...)
+transform!(df::DataFrame, args...; renamecols::Bool=true) =
+    select!(df, :, args..., renamecols=renamecols)
 
 """
-    select(df::AbstractDataFrame, args...; copycols::Bool=true)
+    select(df::AbstractDataFrame, args...; copycols::Bool=true, renamecols::Bool=true)
 
 Create a new data frame that contains columns from `df` specified by `args` and
 return it. The result is guaranteed to have the same number of rows as `df`,
@@ -479,22 +507,22 @@ julia> select(df, names(df) .=> sum .=> [:A, :B])
 │ 2   │ 6     │ 15    │
 │ 3   │ 6     │ 15    │
 
-julia> select(df, AsTable(:) => ByRow(mean))
+julia> select(df, AsTable(:) => ByRow(mean), renamecols=false)
 3×1 DataFrame
-│ Row │ a_b_mean │
-│     │ Float64  │
-├─────┼──────────┤
-│ 1   │ 2.5      │
-│ 2   │ 3.5      │
-│ 3   │ 4.5      │
+│ Row │ a_b     │
+│     │ Float64 │
+├─────┼─────────┤
+│ 1   │ 2.5     │
+│ 2   │ 3.5     │
+│ 3   │ 4.5     │
 ```
 
 """
-select(df::AbstractDataFrame, args...; copycols::Bool=true) =
-    manipulate(df, args..., copycols=copycols, keeprows=true)
+select(df::AbstractDataFrame, args...; copycols::Bool=true, renamecols::Bool=true) =
+    manipulate(df, args..., copycols=copycols, keeprows=true, renamecols=renamecols)
 
 """
-    transform(df::AbstractDataFrame, args...; copycols::Bool=true)
+    transform(df::AbstractDataFrame, args...; copycols::Bool=true, renamecols::Bool=true)
 
 Create a new data frame that contains columns from `df` and adds columns
 specified by `args` and return it.
@@ -503,12 +531,12 @@ Equivalent to `select(df, :, args..., copycols=copycols)`.
 
 See [`select`](@ref) for detailed rules regarding accepted values for `args`.
 """
-transform(df::AbstractDataFrame, args...; copycols::Bool=true) =
-    select(df, :, args..., copycols=copycols)
+transform(df::AbstractDataFrame, args...; copycols::Bool=true, renamecols::Bool=true) =
+    select(df, :, args..., copycols=copycols, renamecols=renamecols)
 
 """
-    combine(df::AbstractDataFrame, args...)
-    combine(arg, df::AbstractDataFrame)
+    combine(df::AbstractDataFrame, args...; renamecols::Bool=true)
+    combine(arg, df::AbstractDataFrame; renamecols::Bool=true)
 
 Create a new data frame that contains columns from `df` specified by `args` and
 return it. The result can have any number of rows that is determined by the
@@ -530,42 +558,46 @@ julia> df = DataFrame(a=1:3, b=4:6)
 │ 2   │ 2     │ 5     │
 │ 3   │ 3     │ 6     │
 
-julia> combine(df, :a => sum, nrow)
+julia> combine(df, :a => sum, nrow, renamecols=false)
 1×2 DataFrame
-│ Row │ a_sum │ nrow  │
+│ Row │ a     │ nrow  │
 │     │ Int64 │ Int64 │
 ├─────┼───────┼───────┤
 │ 1   │ 6     │ 3     │
 ```
 """
-combine(df::AbstractDataFrame, args...) =
-    manipulate(df, args..., copycols=true, keeprows=false)
+combine(df::AbstractDataFrame, args...; renamecols::Bool=true) =
+    manipulate(df, args..., copycols=true, keeprows=false, renamecols=renamecols)
 
-function combine(arg, df::AbstractDataFrame)
+function combine(arg, df::AbstractDataFrame; renamecols::Bool=true)
     if nrow(df) == 0
         throw(ArgumentError("calling combine on a data frame with zero rows" *
                             " with transformation as a first argument is " *
                             "currently not supported"))
     end
-    return combine(arg, groupby(df, Symbol[]))
+    return combine(arg, groupby(df, Symbol[]), renamecols=renamecols)
 end
 
-manipulate(df::DataFrame, args::AbstractVector{Int}; copycols::Bool, keeprows::Bool) =
-    DataFrame(_columns(df)[args], Index(_names(df)[args]),
-              copycols=copycols)
+manipulate(df::DataFrame, args::AbstractVector{Int}; copycols::Bool, keeprows::Bool,
+           renamecols::Bool) =
+    DataFrame(_columns(df)[args], Index(_names(df)[args]), copycols=copycols)
 
-function manipulate(df::DataFrame, c::MultiColumnIndex; copycols::Bool, keeprows::Bool)
+function manipulate(df::DataFrame, c::MultiColumnIndex; copycols::Bool, keeprows::Bool,
+                    renamecols::Bool)
     if c isa AbstractVector{<:Pair}
-        return manipulate(df, c..., copycols=copycols, keeprows=keeprows)
+        return manipulate(df, c..., copycols=copycols, keeprows=keeprows,
+                          renamecols=renamecols)
     else
-        return manipulate(df, index(df)[c], copycols=copycols, keeprows=keeprows)
+        return manipulate(df, index(df)[c], copycols=copycols, keeprows=keeprows,
+                          renamecols=renamecols)
     end
 end
 
-manipulate(df::DataFrame, c::ColumnIndex; copycols::Bool, keeprows::Bool) =
-    manipulate(df, [c], copycols=copycols, keeprows=keeprows)
+manipulate(df::DataFrame, c::ColumnIndex; copycols::Bool, keeprows::Bool,
+           renamecols::Bool) =
+    manipulate(df, [c], copycols=copycols, keeprows=keeprows, renamecols=renamecols)
 
-function manipulate(df::DataFrame, cs...; copycols::Bool, keeprows::Bool)
+function manipulate(df::DataFrame, cs...; copycols::Bool, keeprows::Bool, renamecols::Bool)
     cs_vec = []
     for v in cs
         if v isa AbstractVector{<:Pair}
@@ -574,7 +606,7 @@ function manipulate(df::DataFrame, cs...; copycols::Bool, keeprows::Bool)
             push!(cs_vec, v)
         end
     end
-    return _manipulate(df, [normalize_selection(index(df), c) for c in cs_vec],
+    return _manipulate(df, [normalize_selection(index(df), c, renamecols) for c in cs_vec],
                     copycols, keeprows)
 end
 
@@ -679,19 +711,22 @@ function _manipulate(df::AbstractDataFrame, normalized_cs, copycols::Bool, keepr
     return newdf
 end
 
-manipulate(dfv::SubDataFrame, ind::ColumnIndex; copycols::Bool, keeprows::Bool) =
-    manipulate(dfv, [ind], copycols=copycols, keeprows=keeprows)
+manipulate(dfv::SubDataFrame, ind::ColumnIndex; copycols::Bool, keeprows::Bool,
+           renamecols::Bool) =
+    manipulate(dfv, [ind], copycols=copycols, keeprows=keeprows, renamecols=renamecols)
 
 function manipulate(dfv::SubDataFrame, args::MultiColumnIndex;
-                 copycols::Bool, keeprows::Bool)
+                 copycols::Bool, keeprows::Bool, renamecols::Bool)
     if args isa AbstractVector{<:Pair}
-        return manipulate(dfv, args..., copycols=copycols, keeprows=keeprows)
+        return manipulate(dfv, args..., copycols=copycols, keeprows=keeprows,
+                          renamecols=renamecols)
     else
         return copycols ? dfv[:, args] : view(dfv, :, args)
     end
 end
 
-function manipulate(dfv::SubDataFrame, args...; copycols::Bool, keeprows::Bool)
+function manipulate(dfv::SubDataFrame, args...; copycols::Bool, keeprows::Bool,
+                    renamecols::Bool)
     if copycols
         cs_vec = []
         for v in args
@@ -701,8 +736,8 @@ function manipulate(dfv::SubDataFrame, args...; copycols::Bool, keeprows::Bool)
                 push!(cs_vec, v)
             end
         end
-        return _manipulate(dfv, [normalize_selection(index(dfv), c) for c in cs_vec],
-                        true, keeprows)
+        return _manipulate(dfv, [normalize_selection(index(dfv), c, renamecols) for c in cs_vec],
+                           true, keeprows)
     else
         # we do not support transformations here
         # newinds contains only indexing; making it Vector{Any} avoids some compilation
@@ -719,7 +754,7 @@ function manipulate(dfv::SubDataFrame, args...; copycols::Bool, keeprows::Bool)
                     push!(seen_single_column, ind_idx)
                 end
             else
-                newind = normalize_selection(index(dfv), ind)
+                newind = normalize_selection(index(dfv), ind, renamecols)
                 if newind isa Pair
                     throw(ArgumentError("transforming and renaming columns of a " *
                                         "SubDataFrame is not allowed when `copycols=false`"))
diff --git a/src/deprecated.jl b/src/deprecated.jl
index e4686b830c..9851dfc472 100644
--- a/src/deprecated.jl
+++ b/src/deprecated.jl
@@ -21,19 +21,19 @@ function CategoricalArrays.categorical(df::AbstractDataFrame,
     end
     if cols isa AbstractVector{<:Union{AbstractString, Symbol}}
         Base.depwarn("`categorical(df, cols)` is deprecated. " *
-                     "Use `transform(df, cols .=> $categoricalstr .=> cols)` instead.",
+                     "Use `transform(df, cols .=> $categoricalstr, renamecols=false)` instead.",
                      :categorical)
-        return transform(df, cols .=> (x -> categorical(x, compress=compress)) .=> cols)
+        return transform(df, cols .=> (x -> categorical(x, compress=compress)), renamecols=false)
     elseif cols isa Union{AbstractString, Symbol}
         Base.depwarn("`categorical(df, cols)` is deprecated. " *
-                     "Use `transform(df, cols => $categoricalstr => cols)` instead.",
+                     "Use `transform(df, cols => $categoricalstr, renamecols=false)` instead.",
                      :categorical)
-        return transform(df, cols => (x -> categorical(x, compress=compress)) => cols)
+        return transform(df, cols => (x -> categorical(x, compress=compress)), renamecols=false)
     else
         Base.depwarn("`categorical(df, cols)` is deprecated. " *
-                     "Use `transform(df, names(df, cols) .=> $categoricalstr .=> names(df, cols))` instead.",
+                     "Use `transform(df, names(df, cols) .=> $categoricalstr, renamecols=false)` instead.",
                      :categorical)
-        return transform(df, names(df, cols) .=> (x -> categorical(x, compress=compress)) .=> names(df, cols))
+        return transform(df, names(df, cols) .=> (x -> categorical(x, compress=compress)), renamecols=false)
     end
 end
 
@@ -49,15 +49,15 @@ function CategoricalArrays.categorical(df::AbstractDataFrame,
     if cols === nothing
         cols = Union{AbstractString, Missing}
         Base.depwarn("`categorical(df)` is deprecated. " *
-                     "Use `cols = names(df)[map(c -> eltype(c) <: $cols, eachcol(df))]; transform(df, cols .=> $categoricalstr .=> cols)` instead.",
+                     "Use `cols = names(df)[map(c -> eltype(c) <: $cols, eachcol(df))]; transform(df, cols .=> $categoricalstr, renamecols=false)` instead.",
                      :categorical)
     else
         Base.depwarn("`categorical(df, T)` is deprecated. " *
-                     "Use `cols = names(df)[map(c -> eltype(c) <: T, eachcol(df))]; transform(df, cols .=> $categoricalstr .=> cols)` instead.",
+                     "Use `cols = names(df)[map(c -> eltype(c) <: T, eachcol(df))]; transform(df, cols .=> $categoricalstr, renamecols=false)` instead.",
                      :categorical)
     end
     colsstr = names(df)[map(c -> eltype(c) <: cols, eachcol(df))]
-    return transform(df, colsstr .=> (x -> categorical(x, compress=compress)) .=> colsstr)
+    return transform(df, colsstr .=> (x -> categorical(x, compress=compress)), renamecols=false)
 end
 
 function categorical!(df::DataFrame, cols::Union{ColumnIndex, MultiColumnIndex};
@@ -70,19 +70,19 @@ function categorical!(df::DataFrame, cols::Union{ColumnIndex, MultiColumnIndex};
     end
     if cols isa AbstractVector{<:Union{AbstractString, Symbol}}
         Base.depwarn("`categorical!(df, cols)` is deprecated. " *
-                     "Use `transform!(df, cols .=> $categoricalstr .=> cols)` instead.",
+                     "Use `transform!(df, cols .=> $categoricalstr, renamecols=false)` instead.",
                      :categorical!)
-        return transform!(df, cols .=> (x -> categorical(x, compress=compress)) .=> cols)
+        return transform!(df, cols .=> (x -> categorical(x, compress=compress)), renamecols=false)
     elseif cols isa Union{AbstractString, Symbol}
         Base.depwarn("`categorical!(df, cols)` is deprecated. " *
-                     "Use `transform!(df, cols => $categoricalstr => cols)` instead.",
+                     "Use `transform!(df, cols => $categoricalstr, renamecols=false)` instead.",
                      :categorical!)
-        return transform!(df, cols => (x -> categorical(x, compress=compress)) => cols)
+        return transform!(df, cols => (x -> categorical(x, compress=compress)), renamecols=false)
     else
         Base.depwarn("`categorical!(df, cols)` is deprecated. " *
-                     "Use `transform!(df, names(df, cols) .=> $categoricalstr .=> names(df, cols))` instead.",
+                     "Use `transform!(df, names(df, cols) .=> $categoricalstr, renamecols=false)` instead.",
                      :categorical!)
-        return transform!(df, names(df, cols) .=> (x -> categorical(x, compress=compress)) .=> names(df, cols))
+        return transform!(df, names(df, cols) .=> (x -> categorical(x, compress=compress)), renamecols=false)
     end
 end
 
@@ -97,13 +97,13 @@ function categorical!(df::DataFrame, cols::Union{Type, Nothing}=nothing;
     if cols === nothing
         cols = Union{AbstractString, Missing}
         Base.depwarn("`categorical!(df)` is deprecated. " *
-                     "Use `cols = names(df)[map(c -> eltype(c) <: $cols, eachcol(df))]; transform!(df, cols .=> $categoricalstr .=> cols)` instead.",
+                     "Use `cols = names(df)[map(c -> eltype(c) <: $cols, eachcol(df))]; transform!(df, cols .=> $categoricalstr, renamecols=false)` instead.",
                      :categorical!)
     else
         Base.depwarn("`categorical!(df, T)` is deprecated. " *
-                     "Use `cols = names(df)[map(c -> eltype(c) <: T, eachcol(df))]; transform!(df, cols .=> $categoricalstr .=> cols)` instead.",
+                     "Use `cols = names(df)[map(c -> eltype(c) <: T, eachcol(df))]; transform!(df, cols .=> $categoricalstr, renamecols=false)` instead.",
                      :categorical!)
     end
     colsstr = names(df)[map(c -> eltype(c) <: cols, eachcol(df))]
-    return transform!(df, colsstr .=> (x -> categorical(x, compress=compress)) .=> colsstr)
-end
\ No newline at end of file
+    return transform!(df, colsstr .=> (x -> categorical(x, compress=compress)), renamecols=false)
+end
diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl
index f48ec1ec6d..d7b1c23d86 100644
--- a/src/groupeddataframe/splitapplycombine.jl
+++ b/src/groupeddataframe/splitapplycombine.jl
@@ -227,13 +227,15 @@ const F_ARGUMENT_RULES =
     * Column transformation operations using the `Pair` notation that is described below
       and vectors of such pairs.
 
-    Transformations allowed using `Pair`s follow the rules specified
-    for [`select`](@ref) and have the form `source_cols => fun`,
-    `source_cols => fun => target_col`, or `source_col => target_col`.
-    Function `fun` is passed `SubArray` views as positional arguments for each column
-    specified to be selected, or a `NamedTuple` containing these `SubArray`s if
-    `source_cols` is an `AsTable` selector. It can return a vector or a single value
-    (defined precisely below).
+    Transformations allowed using `Pair`s follow the rules specified for
+    [`select`](@ref) and have the form `source_cols => fun`, `source_cols => fun
+    => target_col`, or `source_col => target_col`. Function `fun` is passed
+    `SubArray` views as positional arguments for each column specified to be
+    selected, or a `NamedTuple` containing these `SubArray`s if `source_cols` is
+    an `AsTable` selector. It can return a vector or a single value (defined
+    precisely below). If automatic generation of target column
+    name is required it respects the `renamecols` keyword argument following the
+    rules described in [`select`](@ref).
 
     As a special case `nrow` or `nrow => target_col` can be passed without specifying
     input columns to efficiently calculate number of rows in each group.
@@ -272,10 +274,12 @@ const KWARG_PROCESSING_RULES =
     """
 
 """
-    combine(gd::GroupedDataFrame, args...; keepkeys::Bool=true, ungroup::Bool=true)
+    combine(gd::GroupedDataFrame, args...; keepkeys::Bool=true, ungroup::Bool=true,
+            renamecols::Bool=true)
     combine(fun::Union{Function, Type}, gd::GroupedDataFrame;
-            keepkeys::Bool=true, ungroup::Bool=true)
-    combine(pair::Pair, gd::GroupedDataFrame; keepkeys::Bool=true, ungroup::Bool=true)
+            keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true)
+    combine(pair::Pair, gd::GroupedDataFrame; keepkeys::Bool=true, ungroup::Bool=true,
+            renamecols::Bool=true)
 
 Apply operations to each group in a [`GroupedDataFrame`](@ref) and return the combined
 result as a `DataFrame` if `ungroup=true` or `GroupedDataFrame` if `ungroup=false`.
@@ -433,33 +437,34 @@ julia> combine(gd, AsTable(:) => Ref)
 │ 3   │ 3     │ (a = [3, 3], b = [2, 2], c = [3, 7]) │
 │ 4   │ 4     │ (a = [4, 4], b = [1, 1], c = [4, 8]) │
 
-julia> combine(gd, :, AsTable(Not(:a)) => sum)
+julia> combine(gd, :, AsTable(Not(:a)) => sum, renamecols=false)
 8×4 DataFrame
-│ Row │ a     │ b     │ c     │ b_c_sum │
-│     │ Int64 │ Int64 │ Int64 │ Int64   │
-├─────┼───────┼───────┼───────┼─────────┤
-│ 1   │ 1     │ 2     │ 1     │ 3       │
-│ 2   │ 1     │ 2     │ 5     │ 7       │
-│ 3   │ 2     │ 1     │ 2     │ 3       │
-│ 4   │ 2     │ 1     │ 6     │ 7       │
-│ 5   │ 3     │ 2     │ 3     │ 5       │
-│ 6   │ 3     │ 2     │ 7     │ 9       │
-│ 7   │ 4     │ 1     │ 4     │ 5       │
-│ 8   │ 4     │ 1     │ 8     │ 9       │
+│ Row │ a     │ b     │ c     │ b_c   │
+│     │ Int64 │ Int64 │ Int64 │ Int64 │
+├─────┼───────┼───────┼───────┼───────┤
+│ 1   │ 1     │ 2     │ 1     │ 3     │
+│ 2   │ 1     │ 2     │ 5     │ 7     │
+│ 3   │ 2     │ 1     │ 2     │ 3     │
+│ 4   │ 2     │ 1     │ 6     │ 7     │
+│ 5   │ 3     │ 2     │ 3     │ 5     │
+│ 6   │ 3     │ 2     │ 7     │ 9     │
+│ 7   │ 4     │ 1     │ 4     │ 5     │
+│ 8   │ 4     │ 1     │ 8     │ 9     │
 ```
 """
 function combine(f::Base.Callable, gd::GroupedDataFrame;
-                 keepkeys::Bool=true, ungroup::Bool=true)
+                 keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true)
     return combine_helper(f, gd, keepkeys=keepkeys, ungroup=ungroup,
-                          copycols=true, keeprows=false)
+                          copycols=true, keeprows=false, renamecols=renamecols)
 end
 
 combine(f::typeof(nrow), gd::GroupedDataFrame;
-        keepkeys::Bool=true, ungroup::Bool=true) =
-    combine(gd, [nrow => :nrow], keepkeys=keepkeys, ungroup=ungroup)
+        keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true) =
+    combine(gd, [nrow => :nrow], keepkeys=keepkeys, ungroup=ungroup,
+            renamecols=renamecols)
 
 function combine(p::Pair, gd::GroupedDataFrame;
-                 keepkeys::Bool=true, ungroup::Bool=true)
+                 keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true)
     # move handling of aggregate to specialized combine
     p_from, p_to = p
 
@@ -467,7 +472,7 @@ function combine(p::Pair, gd::GroupedDataFrame;
     # by moving to combine(::GroupedDataFrame, ::AbstractVector) method
     # note that even if length(gd) == 0 we can do this step
     if isagg(p_from => (p_to isa Pair ? first(p_to) : p_to), gd) || p_from === nrow
-        return combine(gd, [p], keepkeys=keepkeys, ungroup=ungroup)
+        return combine(gd, [p], keepkeys=keepkeys, ungroup=ungroup, renamecols=renamecols)
     end
 
     if p_from isa Tuple
@@ -479,19 +484,20 @@ function combine(p::Pair, gd::GroupedDataFrame;
         cs = p_from
     end
     return combine_helper(cs => p_to, gd, keepkeys=keepkeys, ungroup=ungroup,
-                          copycols=true, keeprows=false)
+                          copycols=true, keeprows=false, renamecols=renamecols)
 end
 
 combine(gd::GroupedDataFrame,
         cs::Union{Pair, typeof(nrow), ColumnIndex, MultiColumnIndex}...;
-        keepkeys::Bool=true, ungroup::Bool=true) =
+        keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true) =
     _combine_prepare(gd, cs..., keepkeys=keepkeys, ungroup=ungroup,
-                     copycols=true, keeprows=false)
+                     copycols=true, keeprows=false, renamecols=renamecols)
 
 function _combine_prepare(gd::GroupedDataFrame,
                           @nospecialize(cs::Union{Pair, typeof(nrow),
-                                                  ColumnIndex, MultiColumnIndex}...);
-                 keepkeys::Bool, ungroup::Bool, copycols::Bool, keeprows::Bool)
+                                        ColumnIndex, MultiColumnIndex}...);
+                          keepkeys::Bool, ungroup::Bool, copycols::Bool,
+                          keeprows::Bool, renamecols::Bool)
     cs_vec = []
     for p in cs
         if p === nrow
@@ -513,7 +519,7 @@ function _combine_prepare(gd::GroupedDataFrame,
             end
         end
     end
-    cs_norm_pre = [normalize_selection(index(parent(gd)), c) for c in cs_vec]
+    cs_norm_pre = [normalize_selection(index(parent(gd)), c, renamecols) for c in cs_vec]
     seen_cols = Set{Symbol}()
     process_vectors = false
     for v in cs_norm_pre
@@ -564,7 +570,7 @@ function _combine_prepare(gd::GroupedDataFrame,
     f = Pair[first(x) => first(last(x)) for x in cs_norm]
     nms = Symbol[last(last(x)) for x in cs_norm]
     return combine_helper(f, gd, nms, keepkeys=keepkeys, ungroup=ungroup,
-                          copycols=copycols, keeprows=keeprows)
+                          copycols=copycols, keeprows=keeprows, renamecols=renamecols)
 end
 
 function gen_groups(idx::Vector{Int})
@@ -584,11 +590,11 @@ end
 function combine_helper(f, gd::GroupedDataFrame,
                         nms::Union{AbstractVector{Symbol},Nothing}=nothing;
                         keepkeys::Bool, ungroup::Bool,
-                        copycols::Bool, keeprows::Bool)
+                        copycols::Bool, keeprows::Bool, renamecols::Bool)
     if !ungroup && !keepkeys
         throw(ArgumentError("keepkeys=false when ungroup=false is not allowed"))
     end
-    idx, valscat = _combine(f, gd, nms, copycols, keeprows)
+    idx, valscat = _combine(f, gd, nms, copycols, keeprows, renamecols)
     !keepkeys && ungroup && return valscat
     keys = groupcols(gd)
     for key in keys
@@ -1137,7 +1143,7 @@ end
 
 function _combine(f::AbstractVector{<:Pair},
                   gd::GroupedDataFrame, nms::AbstractVector{Symbol},
-                  copycols::Bool, keeprows::Bool)
+                  copycols::Bool, keeprows::Bool, renamecols::Bool)
     # here f should be normalized and in a form of source_cols => fun
     @assert all(x -> first(x) isa Union{Int, AbstractVector{Int}, AsTable}, f)
     @assert all(x -> last(x) isa Base.Callable, f)
@@ -1277,7 +1283,7 @@ function _combine(f::AbstractVector{<:Pair},
 end
 
 function _combine(fun::Base.Callable, gd::GroupedDataFrame, ::Nothing,
-                  copycols::Bool, keeprows::Bool)
+                  copycols::Bool, keeprows::Bool, renamecols::Bool)
     @assert copycols && !keeprows
     # use `similar` as `gd` might have been subsetted
     firstres = length(gd) > 0 ? fun(gd[1]) : fun(similar(parent(gd), 0))
@@ -1287,11 +1293,11 @@ function _combine(fun::Base.Callable, gd::GroupedDataFrame, ::Nothing,
 end
 
 function _combine(p::Pair, gd::GroupedDataFrame, ::Nothing,
-                  copycols::Bool, keeprows::Bool)
+                  copycols::Bool, keeprows::Bool, renamecols::Bool)
     # here p should not be normalized as we allow tabular return value from fun
     # map and combine should not dispatch here if p is isagg
     @assert copycols && !keeprows
-    source_cols, (fun, out_col) = normalize_selection(index(parent(gd)), p)
+    source_cols, (fun, out_col) = normalize_selection(index(parent(gd)), p, renamecols)
     parentdf = parent(gd)
     if source_cols isa Int
         incols = (parent(gd)[!, source_cols],)
@@ -1553,8 +1559,8 @@ function _combine_tables_with_first!(first::Union{AbstractDataFrame,
 end
 
 """
-    select(gd::GroupedDataFrame, args...;
-           copycols::Bool=true, keepkeys::Bool=true, ungroup::Bool=true)
+    select(gd::GroupedDataFrame, args...; copycols::Bool=true, keepkeys::Bool=true,
+           ungroup::Bool=true, renamecols::Bool=true)
 
 Apply `args` to `gd` following the rules described in [`combine`](@ref).
 
@@ -1686,42 +1692,42 @@ julia> select(gd, :b, :c => sum) # passing columns and broadcasting
 │ 7   │ 1     │ 2     │ 19    │
 │ 8   │ 2     │ 1     │ 17    │
 
-julia> select(gd, :, AsTable(Not(:a)) => sum)
+julia> select(gd, :, AsTable(Not(:a)) => sum, renamecols=false)
 8×4 DataFrame
-│ Row │ a     │ b     │ c     │ b_c_sum │
-│     │ Int64 │ Int64 │ Int64 │ Int64   │
-├─────┼───────┼───────┼───────┼─────────┤
-│ 1   │ 1     │ 2     │ 1     │ 3       │
-│ 2   │ 1     │ 1     │ 2     │ 3       │
-│ 3   │ 1     │ 2     │ 3     │ 5       │
-│ 4   │ 2     │ 1     │ 4     │ 5       │
-│ 5   │ 2     │ 2     │ 5     │ 7       │
-│ 6   │ 1     │ 1     │ 6     │ 7       │
-│ 7   │ 1     │ 2     │ 7     │ 9       │
-│ 8   │ 2     │ 1     │ 8     │ 9       │
+│ Row │ a     │ b     │ c     │ b_c   │
+│     │ Int64 │ Int64 │ Int64 │ Int64 │
+├─────┼───────┼───────┼───────┼───────┤
+│ 1   │ 1     │ 2     │ 1     │ 3     │
+│ 2   │ 1     │ 1     │ 2     │ 3     │
+│ 3   │ 1     │ 2     │ 3     │ 5     │
+│ 4   │ 2     │ 1     │ 4     │ 5     │
+│ 5   │ 2     │ 2     │ 5     │ 7     │
+│ 6   │ 1     │ 1     │ 6     │ 7     │
+│ 7   │ 1     │ 2     │ 7     │ 9     │
+│ 8   │ 2     │ 1     │ 8     │ 9     │
 ```
 """
-select(gd::GroupedDataFrame, args...;
-       copycols::Bool=true, keepkeys::Bool=true, ungroup::Bool=true) =
+select(gd::GroupedDataFrame, args...; copycols::Bool=true, keepkeys::Bool=true,
+       ungroup::Bool=true, renamecols::Bool=true) =
     _combine_prepare(gd, args..., copycols=copycols, keepkeys=keepkeys,
-                     ungroup=ungroup, keeprows=true)
+                     ungroup=ungroup, keeprows=true, renamecols=renamecols)
 
 """
     transform(gd::GroupedDataFrame, args...;
               copycols::Bool=true, keepkeys::Bool=true, ungroup::Bool=true)
 
 An equivalent of
-`select(gd, :, args..., copycols=copycols, keepkeys=keepkeys, ungroup=ungroup)`
+`select(gd, :, args..., copycols=copycols, keepkeys=keepkeys, ungroup=ungroup, renamecols=renamecols)`
 but keeps the columns of `parent(gd)` in their original order.
 
 # See also
 
 [`groupby`](@ref), [`combine`](@ref), [`select`](@ref), [`select!`](@ref), [`transform!`](@ref)
 """
-function transform(gd::GroupedDataFrame, args...;
-                   copycols::Bool=true, keepkeys::Bool=true, ungroup::Bool=true)
+function transform(gd::GroupedDataFrame, args...; copycols::Bool=true,
+                   keepkeys::Bool=true, ungroup::Bool=true, renamecols::Bool=true)
     res = select(gd, :, args..., copycols=copycols, keepkeys=keepkeys,
-                 ungroup=ungroup)
+                 ungroup=ungroup, renamecols=renamecols)
     # res can be a GroupedDataFrame based on DataFrame or a DataFrame,
     # so parent always gives a data frame
     select!(parent(res), propertynames(parent(gd)), :)
@@ -1729,10 +1735,10 @@ function transform(gd::GroupedDataFrame, args...;
 end
 
 """
-    select!(gd::GroupedDataFrame{DataFrame}, args...; ungroup::Bool=true)
+    select!(gd::GroupedDataFrame{DataFrame}, args...; ungroup::Bool=true, renamecols::Bool=true)
 
 An equivalent of
-`select(gd, args..., copycols=false, keepkeys=true, ungroup=ungroup)`
+`select(gd, args..., copycols=false, keepkeys=true, ungroup=ungroup, renamecols=renamecols)`
 but updates `parent(gd)` in place.
 
 `gd` is updated to reflect the new rows of its updated parent.
@@ -1743,18 +1749,19 @@ using the same parent data frame they might get corrupt.
 
 [`groupby`](@ref), [`combine`](@ref), [`select`](@ref), [`transform`](@ref), [`transform!`](@ref)
 """
-function select!(gd::GroupedDataFrame{DataFrame}, args...; ungroup::Bool=true)
-    newdf = select(gd, args..., copycols=false)
+function select!(gd::GroupedDataFrame{DataFrame}, args...;
+                 ungroup::Bool=true, renamecols::Bool=true)
+    newdf = select(gd, args..., copycols=false, renamecols=renamecols)
     df = parent(gd)
     _replace_columns!(df, newdf)
     return ungroup ? df : gd
 end
 
 """
-    transform!(gd::GroupedDataFrame{DataFrame}, args...; ungroup::Bool=true)
+    transform!(gd::GroupedDataFrame{DataFrame}, args...; ungroup::Bool=true, renamecols::Bool=true)
 
 An equivalent of
-`transform(gd, args..., copycols=false, keepkeys=true, ungroup=ungroup)`
+`transform(gd, args..., copycols=false, keepkeys=true, ungroup=ungroup, renamecols=renamecols)`
 but updates `parent(gd)` in place
 and keeps the columns of `parent(gd)` in their original order.
 
@@ -1762,8 +1769,9 @@ and keeps the columns of `parent(gd)` in their original order.
 
 [`groupby`](@ref), [`combine`](@ref), [`select`](@ref), [`select!`](@ref), [`transform`](@ref)
 """
-function transform!(gd::GroupedDataFrame{DataFrame}, args...; ungroup::Bool=true)
-    newdf = select(gd, :, args..., copycols=false)
+function transform!(gd::GroupedDataFrame{DataFrame}, args...;
+                    ungroup::Bool=true, renamecols::Bool=true)
+    newdf = select(gd, :, args..., copycols=false, renamecols=renamecols)
     df = parent(gd)
     select!(newdf, propertynames(df), :)
     _replace_columns!(df, newdf)
diff --git a/test/grouping.jl b/test/grouping.jl
index 336c84eb82..c8839c9fa9 100644
--- a/test/grouping.jl
+++ b/test/grouping.jl
@@ -1379,7 +1379,7 @@ end
         @test gd[Dict([Test.GenericString(String(k)) => v for (k, v)  in pairs(key)]...)] ≅ gd[i]
         # Out of order Dict
         @test gd[Dict([k => v for (k, v) in Iterators.reverse(pairs(key))]...)] ≅ gd[i]
-        # AbstractDict 
+        # AbstractDict
         @test gd[Test.GenericDict(Dict(key))] ≅ gd[i]
     end
 
@@ -1395,7 +1395,7 @@ end
     @test get(gd, Dict(:a => :A, :b => 1), nothing) ≅ gd[1]
     @test get(gd, Dict(:b => 1, :a => :A), nothing) ≅ gd[1]
     @test get(gd, Dict(:a => :A, :b => 3), nothing) == nothing
-    
+
     # Wrong values
     @test_throws KeyError gd[(a=:A, b=3)]
     @test_throws KeyError gd[(:A, 3)]
@@ -2839,4 +2839,29 @@ end
     end
 end
 
+@testset "renamecols=false tests" begin
+    df = DataFrame(a=1:3, b=4:6, c=7:9, d=10:12)
+    gdf = groupby_checked(df, :a)
+
+    @test select(gdf, :a => +, [:a, :b] => +, All() => +, renamecols=false) ==
+          DataFrame(a=1:3, a_b=5:2:9, a_b_etc=22:4:30)
+    @test_throws ArgumentError select(gdf, [] => () -> 10, renamecols=false)
+    @test transform(gdf, :a => +, [:a, :b] => +, All() => +, renamecols=false) ==
+          DataFrame(a=1:3, b=4:6, c=7:9, d=10:12, a_b=5:2:9, a_b_etc=22:4:30)
+    @test combine(gdf, :a => +, [:a, :b] => +, All() => +, renamecols=false) ==
+          DataFrame(a=1:3, a_b=5:2:9, a_b_etc=22:4:30)
+    @test combine([:a, :b] => +, gdf, renamecols=false) == DataFrame(a=1:3, a_b=5:2:9)
+    @test combine(identity, gdf, renamecols=false) == df
+
+    df = DataFrame(a=1:3, b=4:6, c=7:9, d=10:12)
+    gdf = groupby_checked(df, :a)
+    @test select!(gdf, :a => +, [:a, :b] => +, All() => +, renamecols=false) == df
+    @test df == DataFrame(a=1:3, a_b=5:2:9, a_b_etc=22:4:30)
+
+    df = DataFrame(a=1:3, b=4:6, c=7:9, d=10:12)
+    gdf = groupby_checked(df, :a)
+    @test transform!(gdf, :a => +, [:a, :b] => +, All() => +, renamecols=false) == df
+    @test df == DataFrame(a=1:3, b=4:6, c=7:9, d=10:12, a_b=5:2:9, a_b_etc=22:4:30)
+end
+
 end # module
diff --git a/test/select.jl b/test/select.jl
index 99dc122322..f707a9e950 100644
--- a/test/select.jl
+++ b/test/select.jl
@@ -1285,4 +1285,25 @@ end
     @test_throws MethodError transform!(dfv, 1)
 end
 
+@testset "renamecols=false tests" begin
+    df = DataFrame(a=1:3, b=4:6, c=7:9, d=10:12)
+    @test select(df, :a => +, [:a, :b] => +, All() => +, renamecols=false) ==
+          DataFrame(a=1:3, a_b=5:2:9, a_b_etc=22:4:30)
+    @test_throws ArgumentError select(df, [] => () -> 10, renamecols=false)
+    @test transform(df, :a => +, [:a, :b] => +, All() => +, renamecols=false) ==
+          DataFrame(a=1:3, b=4:6, c=7:9, d=10:12, a_b=5:2:9, a_b_etc=22:4:30)
+    @test combine(df, :a => +, [:a, :b] => +, All() => +, renamecols=false) ==
+          DataFrame(a=1:3, a_b=5:2:9, a_b_etc=22:4:30)
+    @test combine([:a, :b] => +, df, renamecols=false) == DataFrame(a_b=5:2:9)
+    @test combine(identity, df, renamecols=false) == df
+
+    df = DataFrame(a=1:3, b=4:6, c=7:9, d=10:12)
+    @test select!(df, :a => +, [:a, :b] => +, All() => +, renamecols=false) == df
+    @test df == DataFrame(a=1:3, a_b=5:2:9, a_b_etc=22:4:30)
+
+    df = DataFrame(a=1:3, b=4:6, c=7:9, d=10:12)
+    @test transform!(df, :a => +, [:a, :b] => +, All() => +, renamecols=false) == df
+    @test df == DataFrame(a=1:3, b=4:6, c=7:9, d=10:12, a_b=5:2:9, a_b_etc=22:4:30)
+end
+
 end # module