Skip to content

Commit

Permalink
ENH: Add sample function for DataFrames
Browse files Browse the repository at this point in the history
  • Loading branch information
s-celles committed Jun 14, 2016
1 parent 3400fbc commit 0dc6c21
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 2 deletions.
5 changes: 4 additions & 1 deletion src/DataFrames.jl
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ export @~,
readtable,
rename!,
rename,
sample,
showcols,
stack,
stackdf,
Expand Down Expand Up @@ -111,7 +112,9 @@ for (dir, filename) in [
("statsmodels", "statsmodel.jl"),

("", "RDA.jl"),
("", "deprecated.jl")
("", "deprecated.jl"),

("other", "sample.jl"),
]

include(joinpath(dir, filename))
Expand Down
29 changes: 29 additions & 0 deletions src/other/sample.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import StatsBase: sample

function sample(df::AbstractDataFrame; replace::Bool=true, ordered::Bool=false)
sample(df, 1, replace=replace, ordered=ordered)
end

"""
sample(df[, N])
Returns a (random) sample of N rows from a DataFrame
# Example
```
julia> using RDatasets
julia> iris = dataset("datasets", "iris")
julia> srand(1)
julia> sample(iris, 5)
5×5 DataFrames.DataFrame
│ Row │ SepalLength │ SepalWidth │ PetalLength │ PetalWidth │ Species │
│ 1 │ 5.0 │ 2.0 │ 3.5 │ 1.0 │ "versicolor" │
│ 2 │ 6.2 │ 2.9 │ 4.3 │ 1.3 │ "versicolor" │
│ 3 │ 6.7 │ 3.1 │ 4.7 │ 1.5 │ "versicolor" │
│ 4 │ 5.5 │ 2.3 │ 4.0 │ 1.3 │ "versicolor" │
│ 5 │ 5.8 │ 2.7 │ 5.1 │ 1.9 │ "virginica" │
```
"""
function sample(df::AbstractDataFrame, N::Integer; replace::Bool=true, ordered::Bool=false)
df[sample(1:size(df, 1), N, replace=replace, ordered=ordered), :]
end
3 changes: 2 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ my_tests = ["utils.jl",
"iteration.jl",
"duplicates.jl",
"show.jl",
"statsmodel.jl"]
"statsmodel.jl",
"sample.jl"]

println("Running tests:")

Expand Down
10 changes: 10 additions & 0 deletions test/sample.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
module TestUtils
using Base.Test
using DataFrames

df = DataFrame(A=1:10,B=11:20)
srand(1)
df_sample = sample(df, 5)
@test df_sample[:A] == [1,8,7,4,2]
@test df_sample[:B] == [11,18,17,14,12]
end

0 comments on commit 0dc6c21

Please sign in to comment.