From 930ebaf2170a77e3f3117b94f3810f2f47f751b2 Mon Sep 17 00:00:00 2001 From: Josh Whittemore Date: Wed, 6 Feb 2019 16:17:59 -0800 Subject: [PATCH] Add module to make iris dataset available. --- Project.toml | 1 + src/data/Data.jl | 3 ++ src/data/iris.jl | 88 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+) create mode 100644 src/data/iris.jl diff --git a/Project.toml b/Project.toml index 331d683991..71ff487797 100644 --- a/Project.toml +++ b/Project.toml @@ -6,6 +6,7 @@ AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193" Colors = "5ae59095-9a9b-59fe-a467-6f913c188581" +DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" DiffRules = "b552c78f-8df3-52c6-915a-8e097449b14b" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" Juno = "e5e0dc1b-0480-54bc-9374-aad01c23163d" diff --git a/src/data/Data.jl b/src/data/Data.jl index ab78f4163c..d7cd0303ba 100644 --- a/src/data/Data.jl +++ b/src/data/Data.jl @@ -39,4 +39,7 @@ include("tree.jl") include("sentiment.jl") using .Sentiment +include("iris.jl") +export Iris + end diff --git a/src/data/iris.jl b/src/data/iris.jl new file mode 100644 index 0000000000..c432f8473f --- /dev/null +++ b/src/data/iris.jl @@ -0,0 +1,88 @@ + +""" + + Iris + +Fisher's classic iris dataset. + +Measurements from 3 different species of iris: setosa, versicolor and +virginica. There are 50 examples of each species. + +There are 4 measurements for each example: sepal length, sepal width, petal +length and petal width. The measurements are in centimeters. + +The module retrieves the data from the [UCI Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/iris). + +""" +module Iris + +using DelimitedFiles +using ..Data: deps, download_and_verify + +const cache_prefix = "" + +# Uncomment if the iris.data file is cached to cache.julialang.org. +# const cache_prefix = "https://cache.julialang.org/" + +function load() + isfile(deps("iris.data")) && return + + @info "Downloading iris dataset." + download_and_verify("$(cache_prefix)https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data", + deps("iris.data"), + "6f608b71a7317216319b4d27b4d9bc84e6abd734eda7872b71a458569e2656c0") +end + +""" + + labels() + +Get the labels of the iris dataset, a 150 element array of strings listing the +species of each example. + +```jldoctest +julia> labels = Flux.Data.Iris.labels(); + +julia> summary(labels) +"150-element Array{String,1}" + +julia> labels[1] +"Iris-setosa" +``` +""" +function labels() + load() + iris = readdlm(deps("iris.data"), ',') + Vector{String}(iris[1:end, end]) +end + +""" + + features() + +Get the features of the iris dataset. This is a 4x150 matrix of Float64 +elements. It has a row for each feature (sepal length, sepal width, +petal length, petal width) and a column for each example. + +```jldoctest +julia> features = Flux.Data.Iris.features(); + +julia> summary(features) +"4×150 Array{Float64,2}" + +julia> features[:, 1] +4-element Array{Float64,1}: + 5.1 + 3.5 + 1.4 + 0.2 +``` +""" +function features() + load() + iris = readdlm(deps("iris.data"), ',') + Matrix{Float64}(iris[1:end, 1:4]') +end +end + +