JuliaData · nalimilan · Nov 8, 2018 · Nov 1, 2018 · Nov 2, 2018 · Nov 2, 2018
diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl
@@ -1035,7 +1035,9 @@ function _vcat(dfs::AbstractVector{<:AbstractDataFrame})
     length(header) == 0 && return DataFrame()
     cols = Vector{AbstractVector}(undef, length(header))
     for (i, name) in enumerate(header)
-        data = [df[name] for df in dfs]
+        # TODO: replace with commented out code after getindex deprecation
+        # data = [df[name] for df in dfs]
+        data = [df isa DataFrame ? df[name] : df[:, name] for df in dfs]
         lens = map(length, data)
         T = mapreduce(eltype, promote_type, data)
         cols[i] = Tables.allocatecolumn(T, sum(lens))

diff --git a/src/deprecated.jl b/src/deprecated.jl
@@ -1387,3 +1387,249 @@ import Base: delete!, insert!, merge!
 
 import Base: setindex!
 @deprecate setindex!(df::DataFrame, x::Nothing, col_ind::Int) deletecols!(df, col_ind)
+
+# TODO: START: Deprecations to be removed after getindex deprecation period finishes
+
+function Base.iterate(itr::Cols{SubDataFrame}, st=1)
+    st > length(itr.df) && return nothing
+    # after deprecation we will return a view, now we materialize a vector
+    return (itr.df[:, st], st + 1)
+end
+
+# after deprecation we will return a view, now we materialize a vector
+Base.get(df::SubDataFrame, key::Any, default::Any) = haskey(df, key) ? df[:, key] : default
+
+function completecases(df::SubDataFrame)
+    res = trues(size(df, 1))
+    for i in 1:size(df, 2)
+        _nonmissing!(res, df[:, i])
+    end
+    res
+end
+
+function completecases(df::SubDataFrame, col::Union{Integer, Symbol})
+    res = trues(size(df, 1))
+    _nonmissing!(res, df[:, col])
+    res
+end
+
+completecases(df::SubDataFrame, cols::AbstractVector) =
+    completecases(df[:, cols])
+
+nonunique(df::SubDataFrame, cols::Union{Real, Symbol}) = nonunique(df[:, [cols]])
+nonunique(df::SubDataFrame, cols::Any) = nonunique(df[:, cols])
+
+function Base.hash(df::SubDataFrame, h::UInt)
+    h += hashdf_seed
+    h += hash(size(df))
+    for i in 1:size(df, 2)
+        h = hash(df[:, i], h)
+    end
+    return h
+end
+
+function Base.iterate(itr::DFColumnIterator{<:SubDataFrame}, j=1)
+    j > size(itr.df, 2) && return nothing
+    return ((_names(itr.df)[j], itr.df[:, j]), j + 1)
+end
+
+Base.getindex(itr::DFColumnIterator{<:SubDataFrame}, j) = itr.df[:, j]
+
+function showrowindices(io::IO,
+                        df::SubDataFrame,
+                        rowindices::AbstractVector{Int},
+                        maxwidths::Vector{Int},
+                        leftcol::Int,
+                        rightcol::Int) # -> Void
+    rowmaxwidth = maxwidths[end]
+
+    for i in rowindices
+        # Print row ID
+        @printf io "│ %d" i
+        padding = rowmaxwidth - ndigits(i)
+        for _ in 1:padding
+            write(io, ' ')
+        end
+        print(io, " │ ")
+        # Print DataFrame entry
+        for j in leftcol:rightcol
+            strlen = 0
+            if isassigned(df[:, j], i)
+                s = df[i, j]
+                strlen = ourstrwidth(s)
+                if ismissing(s)
+                    printstyled(io, s, color=:light_black)
+                elseif s === nothing
+                    strlen = 0
+                else
+                    ourshowcompact(io, s)
+                end
+            else
+                strlen = ourstrwidth(Base.undef_ref_str)
+                ourshowcompact(io, Base.undef_ref_str)
+            end
+            padding = maxwidths[j] - strlen
+            for _ in 1:padding
+                write(io, ' ')
+            end
+            if j == rightcol
+                if i == rowindices[end]
+                    print(io, " │")
+                else
+                    print(io, " │\n")
+                end
+            else
+                print(io, " │ ")
+            end
+        end
+    end
+    return
+end
+
+function showrows(io::IO,
+                  df::SubDataFrame,
+                  rowindices1::AbstractVector{Int},
+                  rowindices2::AbstractVector{Int},
+                  maxwidths::Vector{Int},
+                  splitcols::Bool = false,
+                  allcols::Bool = false,
+                  rowlabel::Symbol = :Row,
+                  displaysummary::Bool = true) # -> Void
+    ncols = size(df, 2)
+
+    if isempty(rowindices1)
+        if displaysummary
+            println(io, summary(df))
+        end
+        return
+    end
+
+    rowmaxwidth = maxwidths[ncols + 1]
+    chunkbounds = getchunkbounds(maxwidths, splitcols, displaysize(io)[2])
+    nchunks = allcols ? length(chunkbounds) - 1 : min(length(chunkbounds) - 1, 1)
+
+    header = displaysummary ? summary(df) : ""
+    if !allcols && length(chunkbounds) > 2
+        header *= ". Omitted printing of $(chunkbounds[end] - chunkbounds[2]) columns"
+    end
+    println(io, header)
+
+    for chunkindex in 1:nchunks
+        leftcol = chunkbounds[chunkindex] + 1
+        rightcol = chunkbounds[chunkindex + 1]
+
+        # Print column names
+        @printf io "│ %s" rowlabel
+        padding = rowmaxwidth - ourstrwidth(rowlabel)
+        for itr in 1:padding
+            write(io, ' ')
+        end
+        print(io, " │ ")
+        for j in leftcol:rightcol
+            s = _names(df)[j]
+            ourshowcompact(io, s)
+            padding = maxwidths[j] - ourstrwidth(s)
+            for itr in 1:padding
+                write(io, ' ')
+            end
+            if j == rightcol
+                print(io, " │\n")
+            else
+                print(io, " │ ")
+            end
+        end
+
+        # Print column types
+        print(io, "│ ")
+        padding = rowmaxwidth
+        for itr in 1:padding
+            write(io, ' ')
+        end
+        print(io, " │ ")
+        for j in leftcol:rightcol
+            s = compacttype(eltype(df[:, j]), maxwidths[j])
+            printstyled(io, s, color=:light_black)
+            padding = maxwidths[j] - ourstrwidth(s)
+            for itr in 1:padding
+                write(io, ' ')
+            end
+            if j == rightcol
+                print(io, " │\n")
+            else
+                print(io, " │ ")
+            end
+        end
+
+        # Print table bounding line
+        write(io, '├')
+        for itr in 1:(rowmaxwidth + 2)
+            write(io, '─')
+        end
+        write(io, '┼')
+        for j in leftcol:rightcol
+            for itr in 1:(maxwidths[j] + 2)
+                write(io, '─')
+            end
+            if j < rightcol
+                write(io, '┼')
+            else
+                write(io, '┤')
+            end
+        end
+        write(io, '\n')
+
+        # Print main table body, potentially in two abbreviated sections
+        showrowindices(io,
+                       df,
+                       rowindices1,
+                       maxwidths,
+                       leftcol,
+                       rightcol)
+
+        if !isempty(rowindices2)
+            print(io, "\n⋮\n")
+            showrowindices(io,
+                           df,
+                           rowindices2,
+                           maxwidths,
+                           leftcol,
+                           rightcol)
+        end
+
+        # Print newlines to separate chunks
+        if chunkindex < nchunks
+            print(io, "\n\n")
+        end
+    end
+
+    return
+end
+
+function hcat!(df1::DataFrame, df2::SubDataFrame; makeunique::Bool=false)
+    u = add_names(index(df1), index(df2), makeunique=makeunique)
+    for i in 1:length(u)
+        # after deprecation period of getindex we will store a view here
+        df1[u[i]] = df2[:, i]
+    end
+    return df1
+end
+
+function Base.append!(df1::DataFrame, df2::SubDataFrame)
+    _names(df1) == _names(df2) || error("Column names do not match")
+    nrows, ncols = size(df1)
+    try
+        for j in 1:ncols
+            append!(df1[j], df2[:, j])
+        end
+    catch err
+        # Undo changes in case of error
+        for j in 1:ncols
+            resize!(df1[j], nrows)
+        end
+        rethrow(err)
+    end
+    return df1
+end
+
+
+# TODO: END:   Deprecations to be removed after getindex deprecation period finishes
diff --git a/src/groupeddataframe/grouping.jl b/src/groupeddataframe/grouping.jl
@@ -79,7 +79,8 @@ combine(map(d -> mean(skipmissing(d[:c])), gd))
 """
 function groupby(df::AbstractDataFrame, cols::Vector;
                  sort::Bool = false, skipmissing::Bool = false)
-    sdf = df[cols]
+    # TODO: decide if we want this or retain `sdf` as a SubDataFrame after deprecation
+    sdf = df isa SubDataFrame ? df[:, cols] : df[cols]
     df_groups = group_rows(sdf, skipmissing)
     # sort the groups
     if sort

diff --git a/src/subdataframe/subdataframe.jl b/src/subdataframe/subdataframe.jl
@@ -182,7 +182,7 @@ ncol(sdf::SubDataFrame) = length(index(sdf))
 
 function Base.getindex(sdf::SubDataFrame, colind::ColumnIndex)
     Base.depwarn("`sdf[colind]` will create a view of `parent(sdf)[colind]` in the future." *
-                 " Use sdf[:, [colind]]` to get a `DataFrame`.", :getindex)
+                 " Use sdf[:, colind]` to get a freshly allocated vector.", :getindex)
     return parent(sdf)[rows(sdf), colind]
 end
 

diff --git a/test/cat.jl b/test/cat.jl
@@ -85,7 +85,10 @@ module TestCat
         df = DataFrame(Int, 4, 3)
 
         # Assignment of rows
-        df[1, :] = df[1, :]
+        # TODO: re-enable after getindex deprecation period
+        # this has to make sure that DataFrameRow can be on RHS
+        # df[1, :] = df[1, :]
+        df[1, :] = df[1:1, :]
         df[1:2, :] = df[1:2, :]
         df[[true,false,false,true], :] = df[2:3, :]
 
@@ -108,7 +111,10 @@ module TestCat
         df[:x3] = 2
 
         # assignment of subtables
-        df[1, 1:2] = df[2, 2:3]
+        # TODO: re-enable after getindex deprecation period
+        # this has to make sure that DataFrameRow can be on RHS
+        # df[1, 1:2] = df[2, 2:3]
+        df[1, 1:2] = df[2:2, 2:3]
         df[1:2, 1:2] = df[2:3, 2:3]
         df[[true,false,false,true], 2:3] = df[1:2,1:2]
 
@@ -278,7 +284,7 @@ module TestCat
         err = @test_throws ArgumentError vcat(df1, df2, df3, df4, df1, df2, df3, df4, df1, df2, df3, df4)
         @test err.value.msg == "column(s) E and F are missing from argument(s) 1, 5 and 9, column(s) B are missing from argument(s) 2, 6 and 10, and column(s) F are missing from argument(s) 3, 7 and 11"
     end
-    x = view(DataFrame(A = Vector{Union{Missing, Int}}(1:3)), 2)
+    x = view(DataFrame(A = Vector{Union{Missing, Int}}(1:3)), 2:2, :)
     y = DataFrame(A = 4:5)
     @test vcat(x, y) == DataFrame(A = [2, 4, 5])
 end
diff --git a/test/data.jl b/test/data.jl
@@ -27,7 +27,9 @@ module TestData
         @test df6[2, :C] == "two"
         @test df6[:B] ≅ [1, 2, missing, 4]
         @test size(df6[[2,3]], 2) == 2
-        @test size(df6[2,:], 1) == 1
+        # TODO: reenable after getinex deprecation period
+        # @test size(df6[2,:], 1) == 1
+        @test size(df6[2:2,:], 1) == 1
         @test size(df6[[1, 3], [1, 3]]) == (2, 2)
         @test size(df6[1:2, 1:2]) == (2, 2)
         @test size(head(df6,2)) == (2, 3)
@@ -75,16 +77,20 @@ module TestData
 
         #test_group("constructors")
         # single index is rows
-        sdf6a = view(df6, 1)
-        sdf6b = view(df6, 2:3)
-        sdf6c = view(df6, [true, false, true, false])
+        sdf6a = view(df6, 1:1, :)
+        sdf6b = view(df6, 2:3, :)
+        sdf6c = view(df6, [true, false, true, false], :)
         @test size(sdf6a) == (1,3)
-        sdf6d = view(df6, [1,3], :B)
+        sdf6d = view(df6, [1,3], [:B])
         @test size(sdf6d) == (2,1)
-        sdf6e = view(df6, 0x01)
+        sdf6e = view(df6, [0x01], :)
         @test size(sdf6e) == (1,3)
-        sdf6f = view(df6, UInt64[1, 2])
+        sdf6f = view(df6, UInt64[1, 2], :)
         @test size(sdf6f) == (2,3)
+        # TODO: add those tests after getindex deprecation period
+        # sdf6g = view(df6, [1,3], :B)
+        # sdf6h = view(df6, 1, :B)
+        # sdf6i = view(df6, 1, [:B])
 
         #test_group("ref")
         @test sdf6a[1,2] == 4
@@ -104,18 +110,18 @@ module TestData
         #test_group("groupby")
         gd = groupby(df7, :d1)
         @test length(gd) == 2
-        @test gd[1][:d2] ≅ d2[d1 .== 1]
-        @test gd[2][:d2] ≅ d2[d1 .== 2]
-        @test gd[1][:d3] == d3[d1 .== 1]
-        @test gd[2][:d3] == d3[d1 .== 2]
+        @test gd[1][:, :d2] ≅ d2[d1 .== 1]
+        @test gd[2][:, :d2] ≅ d2[d1 .== 2]
+        @test gd[1][:, :d3] == d3[d1 .== 1]
+        @test gd[2][:, :d3] == d3[d1 .== 2]
 
         g1 = groupby(df7, [:d1, :d2])
         g2 = groupby(df7, [:d2, :d1])
-        @test g1[1][:d3] == g2[1][:d3]
+        @test g1[1][:, :d3] == g2[1][:, :d3]
 
         res = 0.0
         for x in g1
-            res += sum(x[:d1])
+            res += sum(x[:, :d1])
         end
         @test res == sum(df7[:d1])
 
@@ -216,10 +222,11 @@ module TestData
         d1s = stackdf(d1, [:a, :b])
         @test d1s[1][[1,24]] == [:a, :b]
         @test d1s[2][[1,24]] == [1, 4]
-        @test_broken d1s[1][true]
-        @test_broken d1s[1][1.0]
-        @test_broken d1s[2][true]
-        @test_broken d1s[2][1.0]
+        # TODO: enable those tests after deprecation period of getindex
+        # @test d1s[1][true]
+        # @test d1s[1][1.0]
+        # @test d1s[2][true]
+        # @test d1s[2][1.0]
 
         # Those tests check indexing RepeatedVector/StackedVector by a vector
         d1s[1][trues(24)] == d1s[1]