JuliaData · bkamins · Jan 26, 2020 · Jan 3, 2020 · Jan 6, 2020 · Jan 6, 2020
diff --git a/docs/src/lib/functions.md b/docs/src/lib/functions.md
@@ -48,17 +48,21 @@ filter!
 flatten
 hcat
 insertcols!
+length
 mapcols
 names
+ndims
 nonunique
 nrow
 ncol
+order
 rename!
 rename
 repeat
 select
 select!
 show
+size
 sort
 sort!
 unique!

diff --git a/docs/src/man/getting_started.md b/docs/src/man/getting_started.md
@@ -45,7 +45,7 @@ julia> df = DataFrame(A = 1:4, B = ["M", "F", "F", "M"])
 
 ```
 
-Columns can be directly (i.e. without copying) accessed via `df.col` or `df[!, :col]`. The latter syntax is more flexible as it allows passing a variable holding the name of the column, and not only a literal name. Note that column names are symbols (`:col` or `Symbol("col")`) rather than strings (`"col"`). Columns can also be accessed using an integer index specifying their position. 
+Columns can be directly (i.e. without copying) accessed via `df.col` or `df[!, :col]`. The latter syntax is more flexible as it allows passing a variable holding the name of the column, and not only a literal name. Note that column names are symbols (`:col` or `Symbol("col")`) rather than strings (`"col"`). Columns can also be accessed using an integer index specifying their position.
 
 Since `df[!, :col]` does not make a copy, changing the elements of the column vector returned by this syntax will affect the values stored in the original `df`. To get a copy of the column use `df[:, :col]`: changing the vector returned by this syntax does not change `df`.
 
@@ -218,6 +218,33 @@ SQLite.load!(df, db, "dataframe_table")
 df = df |> @map({a=_.a + 1, _.b}) |> DataFrame
 ```
 
+A particular common case of a collection that supports the
+[Tables.jl](https://github.com/JuliaData/Tables.jl) interface is
+a vector of `NamedTuple`s:
+```
+julia> v = [(a=1,b=2), (a=3,b=4)]
+2-element Array{NamedTuple{(:a, :b),Tuple{Int64,Int64}},1}:
+ (a = 1, b = 2)
+ (a = 3, b = 4)
+
+julia> df = DataFrame(v)
+2×2 DataFrame
+│ Row │ a     │ b     │
+│     │ Int64 │ Int64 │
+├─────┼───────┼───────┤
+│ 1   │ 1     │ 2     │
+│ 2   │ 3     │ 4     │
+```
+You can also easily convert a data frame back to a vector of `NamedTuple`s:
+```
+julia> using Tables
+
+julia> Tables.rowtable(df)
+2-element Array{NamedTuple{(:a, :b),Tuple{Int64,Int64}},1}:
+ (a = 1, b = 2)
+ (a = 3, b = 4)
+```
+
 ## Working with Data Frames
 
 ### Examining the Data

diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl
@@ -272,6 +272,26 @@ rename(df::AbstractDataFrame, vals::AbstractVector{<:AbstractString};
 rename(df::AbstractDataFrame, args...) = rename!(copy(df), args...)
 rename(f::Function, df::AbstractDataFrame) = rename!(f, copy(df))
 
+"""
+    size(df::AbstractDataFrame, [dim])
+
+Return a tuple containing the number of rows and columns of `df`.
+Optionally a dimension `dim` can be specified, where `1` corresponds to rows
+and `2` corresponds to columns.
+
+See also: [`nrow`](@ref), [`ncol`](@ref)
+
+# Examples
+```julia
+julia> df = DataFrame(a=1:3, b='a':'c');
+
+julia> size(df)
+(3, 2)
+
+julia> size(df, 1)
+3
+```
+"""
 Base.size(df::AbstractDataFrame) = (nrow(df), ncol(df))
 function Base.size(df::AbstractDataFrame, i::Integer)
     if i == 1
@@ -289,6 +309,12 @@ Base.lastindex(df::AbstractDataFrame) = ncol(df)
 Base.lastindex(df::AbstractDataFrame, i::Integer) = last(axes(df, i))
 Base.axes(df::AbstractDataFrame, i::Integer) = Base.OneTo(size(df, i))
 
+"""
+    ndims(::AbstractDataFrame)
+    ndims(::Type{<:AbstractDataFrame})
+
+Return the number of dimensions of a data frame, which is always `2`.
+"""
 Base.ndims(::AbstractDataFrame) = 2
 Base.ndims(::Type{<:AbstractDataFrame}) = 2
 

diff --git a/src/abstractdataframe/sort.jl b/src/abstractdataframe/sort.jl
@@ -19,7 +19,51 @@ struct UserColOrdering{T<:ColumnIndex}
     kwargs
 end
 
-# This is exported, and lets a user define orderings for a particular column
+"""
+    order(col::ColumnIndex; kwargs...)
+
+Specify sorting order for a column `col` in a data frame.
+`kwargs` can be `lt`, `by`, `rev`, and `order` with values
+following the rules defined in [`sort!`](@ref).
+
+See also: [`sort!`](@ref), [`sort`](@ref)
+
+# Examples
+```jldoctest
+julia> df = DataFrame(x = [-3, -1, 0, 2, 4], y = 1:5)
+5×2 DataFrame
+│ Row │ x     │ y     │
+│     │ Int64 │ Int64 │
+├─────┼───────┼───────┤
+│ 1   │ -3    │ 1     │
+│ 2   │ -1    │ 2     │
+│ 3   │ 0     │ 3     │
+│ 4   │ 2     │ 4     │
+│ 5   │ 4     │ 5     │
+
+julia> sort(df, order(:x, rev=true))
+5×2 DataFrame
+│ Row │ x     │ y     │
+│     │ Int64 │ Int64 │
+├─────┼───────┼───────┤
+│ 1   │ 4     │ 5     │
+│ 2   │ 2     │ 4     │
+│ 3   │ 0     │ 3     │
+│ 4   │ -1    │ 2     │
+│ 5   │ -3    │ 1     │
+
+julia> sort(df, order(:x, by=abs))
+5×2 DataFrame
+│ Row │ x     │ y     │
+│     │ Int64 │ Int64 │
+├─────┼───────┼───────┤
+│ 1   │ 0     │ 3     │
+│ 2   │ -1    │ 2     │
+│ 3   │ 2     │ 4     │
+│ 4   │ -3    │ 1     │
+│ 5   │ 4     │ 5     │
+```
+"""
 order(col::T; kwargs...) where {T<:ColumnIndex} = UserColOrdering{T}(col, kwargs)
 
 # Allow getting the column even if it is not wrapped in a UserColOrdering

diff --git a/src/dataframerow/dataframerow.jl b/src/dataframerow/dataframerow.jl
@@ -162,10 +162,53 @@ Base.view(r::DataFrameRow, cols::Union{AbstractVector, Regex, Not, Between, All}
     DataFrameRow(parent(r), row(r), parentcols(index(r), cols))
 Base.view(r::DataFrameRow, ::Colon) = r
 
+"""
+    size(dfr::DataFrameRow, [dim])
+
+Return a 1-tuple containing the number of elements of `dfr`.
+If an optional dimension `dim` is specified, it must be `1`, and the number of elements
+is returned directly as a number.
+
+See also: [`length`](@ref)
+
+# Examples
+```julia
+julia> dfr = DataFrame(a=1:3, b='a':'c')[1, :];
+
+julia> size(dfr)
+(2,)
+
+julia> size(dfr, 1)
+2
+```
+"""
 Base.size(r::DataFrameRow) = (length(index(r)),)
 Base.size(r::DataFrameRow, i) = size(r)[i]
+
+"""
+    length(dfr::DataFrameRow)
+
+Return the number of elements of `dfr`.
+
+See also: [`size`](@ref)
+
+# Examples
+```julia
+julia> dfr = DataFrame(a=1:3, b='a':'c')[1, :];
+
+julia> length(dfr)
+2
+```
+"""
 Base.length(r::DataFrameRow) = size(r, 1)
-Base.ndims(r::DataFrameRow) = 1
+
+"""
+    ndims(::DataFrameRow)
+    ndims(::Type{<:DataFrameRow})
+
+Return the number of dimensions of a data frame row, which is always `1`.
+"""
+Base.ndims(::DataFrameRow) = 1
 Base.ndims(::Type{<:DataFrameRow}) = 1
 
 Base.lastindex(r::DataFrameRow) = length(r)