Skip to content
This repository has been archived by the owner on May 4, 2019. It is now read-only.

Finalize API for basic statistics functions #32

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/DataArrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ module DataArrays
include("extras.jl")
include("grouping.jl")
include("statistics.jl")
include("stats.jl")
include("predicates.jl")
include("literals.jl")
end
14 changes: 2 additions & 12 deletions src/operators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -145,18 +145,8 @@ const bit_operators = [:(Base.(:&)),
:(Base.(:|)),
:(Base.(:$))]

const unary_vector_operators = [:(Base.minimum),
:(Base.maximum),
:(Base.prod),
:(Base.sum),
:(Base.mean),
:(Base.median),
:(Base.std),
:(Base.var),
:(Stats.mad),
:(Base.norm),
:(Stats.skewness),
:(Stats.kurtosis)]
const unary_vector_operators = [:(Stats.mad),
:(Base.norm)]

# TODO: dist, iqr

Expand Down
170 changes: 170 additions & 0 deletions src/stats.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
function Base.mean{T <: Real}(da::DataArray{T};
skipna::Bool = false)
s, n = 0.0, 0
for i in 1:length(da)
if da.na[i]
if !skipna
throw(NAException())
end
else
s += da.data[i]
n += 1
end
end
return s / n
end

function Base.median{T <: Real}(da::DataArray{T};
skipna::Bool = false)
if !skipna
return median(array(da))
else
return median(removeNA(da))
end
end

function Base.var{T <: Real}(da::DataArray{T};
skipna::Bool = false)
s, n = 0.0, 0
m = mean(da, skipna = skipna)
for i in 1:length(da)
if da.na[i]
if !skipna
throw(NAException())
end
else
z = (da.data[i] - m)
s += z * z
n += 1
end
end
return s / (n - 1)
end

function Base.std{T <: Real}(da::DataArray{T};
skipna::Bool = false)
s, n = 0.0, 0
m = mean(da, skipna = skipna)
for i in 1:length(da)
if da.na[i]
if !skipna
throw(NAException())
end
else
z = (da.data[i] - m)
s += z * z
n += 1
end
end
return sqrt(s / (n - 1))
end

function Base.minimum{T <: Real}(da::DataArray{T};
skipna::Bool = false)
m = typemax(T)
for i in 1:length(da)
if da.na[i]
if !skipna
throw(NAException())
end
else
m = min(m, da.data[i])
end
end
return m
end

function Base.maximum{T <: Real}(da::DataArray{T};
skipna::Bool = false)
m = typemin(T)
for i in 1:length(da)
if da.na[i]
if !skipna
throw(NAException())
end
else
m = max(m, da.data[i])
end
end
return m
end

function Base.prod{T <: Real}(da::DataArray{T};
skipna::Bool = false)
r = one(T)
for i in 1:length(da)
if da.na[i]
if !skipna
throw(NAException())
end
else
r *= da.data[i]
end
end
return r
end

function Base.sum{T <: Real}(da::DataArray{T};
skipna::Bool = false)
r = zero(T)
for i in 1:length(da)
if da.na[i]
if !skipna
throw(NAException())
end
else
r += da.data[i]
end
end
return r
end

function Stats.skewness{T <: Real}(da::DataArray{T};
skipna::Bool = false,
m::Real = mean(da, skipna = skipna))
n = 0
cm2 = 0.0 # empirical 2nd centered moment (variance)
cm3 = 0.0 # empirical 3rd centered moment
for i in 1:length(da)
if da.na[i]
if !skipna
throw(NAException())
end
else
x_i = da.data[i]
z = x_i - m
z2 = z * z
cm2 += z2
cm3 += z2 * z
n += 1
end
end
cm3 /= n
cm2 /= n
return cm3 / (cm2^1.5)
end

function Stats.kurtosis{T <: Real}(da::DataArray{T};
skipna::Bool = false,
m::Real = mean(da, skipna = skipna))
n = 0
cm2 = 0.0 # empirical 2nd centered moment (variance)
cm4 = 0.0 # empirical 4th centered moment
for i in 1:length(da)
if da.na[i]
if !skipna
throw(NAException())
end
else
x_i = da.data[i]
z = x_i - m
z2 = z * z
cm2 += z2
cm4 += z2 * z2
n += 1
end
end
cm4 /= n
cm2 /= n
return (cm4 / (cm2^2)) - 3.0
end