JuliaData · bkamins · Jan 31, 2021 · Sep 6, 2020 · Sep 7, 2020 · Sep 8, 2020
diff --git a/NEWS.md b/NEWS.md
@@ -20,6 +20,8 @@
   ([#2573](https://github.com/JuliaData/DataFrames.jl/pull/2573))
 * add `subset` and `subset!` functions that allow to subset rows
   ([#2496](https://github.com/JuliaData/DataFrames.jl/pull/2496))
+* `names` now allows passing a predicate as a column selector
+  ([#2417](https://github.com/JuliaData/DataFrames.jl/pull/2417))
 
 ## Deprecated
 

diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl
@@ -68,12 +68,16 @@ Return a freshly allocated `Vector{String}` of names of columns contained in `df
 
 If `cols` is passed then restrict returned column names to those matching the
 selector (this is useful in particular with regular expressions, `Cols`, `Not`, and `Between`).
-`cols` can be any column selector ($COLUMNINDEX_STR; $MULTICOLUMNINDEX_STR)
-or a `Type`, in which case columns whose `eltype` is a subtype of `cols` are returned.
+`cols` can be:
+* any column selector ($COLUMNINDEX_STR; $MULTICOLUMNINDEX_STR)
+* a `Type`, in which case names of columns whose `eltype` is a subtype of `T`
+  are returned
+* a `Function` predicate taking the column name as a string and returning `true`
+  for columns that should be kept
 
 See also [`propertynames`](@ref) which returns a `Vector{Symbol}`.
 """
-Base.names(df::AbstractDataFrame) = names(index(df))
+Base.names(df::AbstractDataFrame, cols::Colon=:) = names(index(df))
 
 function Base.names(df::AbstractDataFrame, cols)
     nms = _names(index(df))
@@ -84,6 +88,7 @@ end
 
 Base.names(df::AbstractDataFrame, T::Type) =
     [String(n) for (n, c) in pairs(eachcol(df)) if eltype(c) <: T]
+Base.names(df::AbstractDataFrame, fun::Function) = filter!(fun, names(df))
 
 # _names returns Vector{Symbol} without copying
 _names(df::AbstractDataFrame) = _names(index(df))

diff --git a/src/dataframerow/dataframerow.jl b/src/dataframerow/dataframerow.jl
@@ -263,7 +263,7 @@ Base.@propagate_inbounds Base.setindex!(r::DataFrameRow, value, idx) =
 
 index(r::DataFrameRow) = getfield(r, :colindex)
 
-Base.names(r::DataFrameRow) = names(index(r))
+Base.names(r::DataFrameRow, cols::Colon=:) = names(index(r))
 
 function Base.names(r::DataFrameRow, cols)
     nms = _names(index(r))
@@ -272,6 +272,10 @@ function Base.names(r::DataFrameRow, cols)
     return [string(nms[i]) for i in idxs]
 end
 
+Base.names(r::DataFrameRow, T::Type) =
+    [String(n) for n in _names(r) if eltype(parent(r)[!, n]) <: T]
+Base.names(r::DataFrameRow, fun::Function) = filter!(fun, names(r))
+
 _names(r::DataFrameRow) = view(_names(parent(r)), parentcols(index(r), :))
 
 Base.haskey(r::DataFrameRow, key::Bool) =

diff --git a/src/groupeddataframe/groupeddataframe.jl b/src/groupeddataframe/groupeddataframe.jl
@@ -282,9 +282,9 @@ function Base.isequal(gd1::GroupedDataFrame, gd2::GroupedDataFrame)
         all(x -> isequal(x...), zip(gd1, gd2))
 end
 
-Base.names(gd::GroupedDataFrame) = names(gd.parent)
-Base.names(gd::GroupedDataFrame, cols) = names(gd.parent, cols)
-_names(gd::GroupedDataFrame) = _names(gd.parent)
+Base.names(gd::GroupedDataFrame) = names(parent(gd))
+Base.names(gd::GroupedDataFrame, cols) = names(parent(gd), cols)
+_names(gd::GroupedDataFrame) = _names(parent(gd))
 
 function DataFrame(gd::GroupedDataFrame; copycols::Bool=true, keepkeys::Bool=true)
     if !copycols

diff --git a/test/dataframe.jl b/test/dataframe.jl
@@ -1925,16 +1925,34 @@ end
     @test_throws ArgumentError push!(df, "a")
 end
 
-@testset "names for Type" begin
-    df = DataFrame(a1 = 1:3, a2 = [1, missing, 3],
-                   b1 = 1.0:3.0, b2 = [1.0, missing, 3.0],
-                   c1 = '1':'3', c2 = ['1', missing, '3'])
-    @test names(df, Int) == ["a1"]
-    @test names(df, Union{Missing, Int}) == ["a1", "a2"]
-    @test names(df, Real) == ["a1", "b1"]
-    @test names(df, Union{Missing, Real}) == ["a1", "a2", "b1", "b2"]
-    @test names(df, Any) == names(df)
-    @test names(df, Union{Char, Float64, Missing}) == ["b1", "b2", "c1", "c2"]
+@testset "names for Type, predicate + standard tests of cols" begin
+    df_long = DataFrame(a1 = 1:3, a2 = [1, missing, 3],
+                        b1 = 1.0:3.0, b2 = [1.0, missing, 3.0],
+                        c1 = '1':'3', c2 = ['1', missing, '3'], x=1:3)
+    for x in (df_long[:, Not(end)], @view(df_long[:, Not(end)]),
+              groupby(df_long[:, Not(end)], :a1), groupby(@view(df_long[:, Not(end)]), :a1),
+              eachrow(df_long[:, Not(end)]), eachrow(@view(df_long[:, Not(end)])),
+              eachcol(df_long[:, Not(end)]), eachcol(@view(df_long[:, Not(end)])),
+              df_long[1, Not(end)])
+        @test names(x, 1) == ["a1"]
+        @test names(x, "a1") == ["a1"]
+        @test names(x, :a1) == ["a1"]
+        @test names(x, [2, 1]) == ["a2", "a1"]
+        @test names(x, ["a2", "a1"]) == ["a2", "a1"]
+        @test names(x, [:a2, :a1]) == ["a2", "a1"]
+        @test names(x, Int) == ["a1"]
+        @test names(x, Union{Missing, Int}) == ["a1", "a2"]
+        @test names(x, Real) == ["a1", "b1"]
+        @test names(x, Union{Missing, Real}) == ["a1", "a2", "b1", "b2"]
+        @test names(x, Any) == names(x)
+        @test isempty(names(x, BigInt))
+        @test names(x, Union{Char, Float64, Missing}) == ["b1", "b2", "c1", "c2"]
+        @test names(x, startswith("a")) == ["a1", "a2"]
+        @test names(x, :) == names(x)
+        @test names(x, <("a2")) == ["a1"]
+
+        @test_throws TypeError names(x, x -> 1)
+    end
 end
 
 end # module