Skip to content

Commit 7f329eb

Browse files
committed
Merge pull request JuliaLang#110 from JuliaStats/dropna
Bug fix, and clean up of NA iterators
2 parents 07f40a1 + 663ef6b commit 7f329eb

File tree

5 files changed

+101
-104
lines changed

5 files changed

+101
-104
lines changed

src/DataArrays.jl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ module DataArrays
2121
DataMatrix,
2222
DataVector,
2323
dropna,
24-
each_failNA,
24+
each_failna,
2525
each_dropna,
26-
each_replaceNA,
26+
each_replacena,
2727
EachFailNA,
2828
EachDropNA,
2929
EachReplaceNA,
@@ -75,6 +75,8 @@ module DataArrays
7575
include("deprecated.jl")
7676

7777
Base.@deprecate removeNA dropna
78+
Base.@deprecate each_failNA each_failna
79+
Base.@deprecate each_replaceNA each_replacena
7880
Base.@deprecate set_levels setlevels
7981
Base.@deprecate set_levels! setlevels!
8082
end

src/abstractdataarray.jl

Lines changed: 88 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -41,63 +41,129 @@ Base.done(x::AbstractDataArray, state::Integer) = state > length(x)
4141

4242
#' @description
4343
#'
44-
#' Determine if any of the entries of an AbstractArray are `NA`.
44+
#' Determine if the values of an AbstractArray are `NA`.
4545
#'
46-
#' @param a::AbstractArray{T, N} The AbstractArray whose elements will
46+
#' @param a::AbstractArray{T, N} The AbstractArray whose missingness will
4747
#' be assessed.
4848
#'
49-
#' @returns out::Bool Are any of the elements of `a` an `NA` value?
49+
#' @returns na::BitArray{N} Elementwise Boolean whether entry is missing.
5050
#'
5151
#' @examples
5252
#'
5353
#' a = [1, 2, 3]
54-
#' anyna(a)
55-
anyna(a::AbstractArray) = false # -> Bool
54+
#' isna(a)
55+
isna(a::AbstractArray) = falses(size(a)) # -> BitArray
5656

5757
#' @description
5858
#'
59-
#' Determine if all of the entries of an AbstractArray are `NA`.
59+
#' Safe and type-stable way to determine if element `i` of an
60+
#' AbstractArray is `NA`.
6061
#'
61-
#' @param a::AbstractArray{T, N} The AbstractArray whose elements will
62+
#' @param a::AbstractArray The AbstractArray whose missingness will
6263
#' be assessed.
64+
#' @param i::Integer The index of the element to be checked for `NA`.
6365
#'
64-
#' @returns out::Bool Are all of the elements of `a` an `NA` value?
66+
#' @returns na::Bool Is the element `NA` or not?
6567
#'
6668
#' @examples
6769
#'
6870
#' a = [1, 2, 3]
69-
#' allna(a)
70-
allna(a::AbstractArray) = false # -> Bool
71+
#' isna(a, 1)
72+
isna(a::AbstractArray, i::Real) = false # -> Bool
7173

7274
#' @description
7375
#'
74-
#' Determine if the values of an AbstractArray are `NA`.
76+
#' Determine if any of the entries of an AbstractArray are `NA`.
7577
#'
76-
#' @param a::AbstractArray{T, N} The AbstractArray whose missingness will
78+
#' @param a::AbstractArray{T, N} The AbstractArray whose elements will
7779
#' be assessed.
7880
#'
79-
#' @returns na::BitArray{N} Elementwise Boolean whether entry is missing.
81+
#' @returns out::Bool Are any of the elements of `a` an `NA` value?
8082
#'
8183
#' @examples
8284
#'
8385
#' a = [1, 2, 3]
84-
#' isna(a)
85-
isna(a::AbstractArray) = falses(size(a)) # -> BitArray
86+
#' anyna(a)
87+
anyna(a::AbstractArray) = false # -> Bool
8688

8789
#' @description
8890
#'
89-
#' Safe and type-stable way to determine if element `i` of an
90-
#' AbstractArray is `NA`.
91+
#' Determine if all of the entries of an AbstractArray are `NA`.
9192
#'
92-
#' @param a::AbstractArray The AbstractArray whose missingness will
93+
#' @param a::AbstractArray{T, N} The AbstractArray whose elements will
9394
#' be assessed.
94-
#' @param i::Integer The index of the element to be checked for `NA`.
9595
#'
96-
#' @returns na::Bool Is the element `NA` or not?
96+
#' @returns out::Bool Are all of the elements of `a` an `NA` value?
9797
#'
9898
#' @examples
9999
#'
100100
#' a = [1, 2, 3]
101-
#' isna(a, 1)
102-
isna(a::AbstractArray, i::Real) = false # -> Bool
101+
#' allna(a)
102+
allna(a::AbstractArray) = false # -> Bool
103+
104+
#' @description
105+
#'
106+
#' NO-OP: Turn a Vector into a Vector. See dropna(dv::DataVector) for
107+
#' rationale.
108+
#'
109+
#' @param v::Vector{T} Vector that will be converted to a Vector.
110+
#'
111+
#' @returns v::Vector{T} Vector containing all of the values of `v`.
112+
#'
113+
#' @examples
114+
#'
115+
#' v = [1, 2, 3, 4]
116+
#' v = dropna(v)
117+
dropna(v::AbstractVector) = copy(v) # -> AbstractVector
118+
119+
# Iterators
120+
# TODO: Use values()
121+
# Use DataValueIterator type?
122+
123+
type EachFailNA{T}
124+
da::AbstractDataArray{T}
125+
end
126+
each_failna{T}(da::AbstractDataArray{T}) = EachFailNA(da)
127+
Base.start(itr::EachFailNA) = 1
128+
Base.done(itr::EachFailNA, ind::Integer) = ind > length(itr.da)
129+
function Base.next(itr::EachFailNA, ind::Integer)
130+
if isna(itr.da[ind])
131+
throw(NAException())
132+
else
133+
(itr.da[ind], ind + 1)
134+
end
135+
end
136+
137+
type EachDropNA{T}
138+
da::AbstractDataArray{T}
139+
end
140+
each_dropna{T}(da::AbstractDataArray{T}) = EachDropNA(da)
141+
function _next_nonna_ind{T}(da::AbstractDataArray{T}, ind::Int)
142+
ind += 1
143+
while ind <= length(da) && isna(da, ind)
144+
ind += 1
145+
end
146+
ind
147+
end
148+
Base.start(itr::EachDropNA) = _next_nonna_ind(itr.da, 0)
149+
Base.done(itr::EachDropNA, ind::Int) = ind > length(itr.da)
150+
function Base.next(itr::EachDropNA, ind::Int)
151+
(itr.da[ind], _next_nonna_ind(itr.da, ind))
152+
end
103153

154+
type EachReplaceNA{S, T}
155+
da::AbstractDataArray{S}
156+
replacement::T
157+
end
158+
function each_replacena(da::AbstractDataArray, replacement::Any)
159+
EachReplaceNA(da, convert(eltype(da), replacement))
160+
end
161+
function each_replacena(replacement::Any)
162+
x -> each_replacena(x, replacement)
163+
end
164+
Base.start(itr::EachReplaceNA) = 1
165+
Base.done(itr::EachReplaceNA, ind::Integer) = ind > length(itr.da)
166+
function Base.next(itr::EachReplaceNA, ind::Integer)
167+
item = isna(itr.da, ind) ? itr.replacement : itr.da[ind]
168+
(item, ind + 1)
169+
end

src/dataarray.jl

Lines changed: 0 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -364,21 +364,6 @@ function array{T}(da::DataArray{T}, replacement::Any) # -> Array{T}
364364
return array(da, convert(T, replacement))
365365
end
366366

367-
#' @description
368-
#'
369-
#' NO-OP: Turn a Vector into a Vector. See dropna(dv::DataVector) for
370-
#' rationale.
371-
#'
372-
#' @param v::Vector{T} Vector that will be converted to a Vector.
373-
#'
374-
#' @returns v::Vector{T} Vector containing all of the values of `v`.
375-
#'
376-
#' @examples
377-
#'
378-
#' v = [1, 2, 3, 4]
379-
#' v = dropna(v)
380-
dropna(v::AbstractVector) = copy(v) # -> AbstractVector
381-
382367
#' @description
383368
#'
384369
#' Turn a DataVector into a Vector. Drop any NA's.
@@ -397,63 +382,6 @@ dropna(v::AbstractVector) = copy(v) # -> AbstractVector
397382
#' v = dropna(dv)
398383
dropna(dv::DataVector) = copy(dv.data[!dv.na]) # -> Vector
399384

400-
# Iterators
401-
# TODO: Use values()
402-
# Use DataValueIterator type?
403-
404-
type EachFailNA{T}
405-
da::AbstractDataArray{T}
406-
end
407-
each_failNA{T}(da::AbstractDataArray{T}) = EachFailNA(da)
408-
Base.start(itr::EachFailNA) = 1
409-
function Base.done(itr::EachFailNA, ind::Integer)
410-
return ind > length(itr.da)
411-
end
412-
function Base.next(itr::EachFailNA, ind::Integer)
413-
if isna(itr.da[ind])
414-
throw(NAException())
415-
else
416-
(itr.da[ind], ind + 1)
417-
end
418-
end
419-
420-
type EachDropNA{T}
421-
da::AbstractDataArray{T}
422-
end
423-
each_dropna{T}(da::AbstractDataArray{T}) = EachDropNA(da)
424-
Base.start(itr::EachDropNA) = 1
425-
function Base.done(itr::EachDropNA, ind::Integer)
426-
return ind > length(itr.da)
427-
end
428-
function Base.next(itr::EachDropNA, ind::Integer)
429-
while ind <= length(itr.da) && isna(itr.da[ind])
430-
ind += 1
431-
end
432-
(itr.da[ind], ind + 1)
433-
end
434-
435-
type EachReplaceNA{S, T}
436-
da::AbstractDataArray{S}
437-
replacement_val::T
438-
end
439-
function each_replaceNA(da::AbstractDataArray, val::Any)
440-
EachReplaceNA(da, convert(eltype(da), val))
441-
end
442-
function each_replaceNA(val::Any)
443-
x -> each_replaceNA(x, val)
444-
end
445-
Base.start(itr::EachReplaceNA) = 1
446-
function Base.done(itr::EachReplaceNA, ind::Integer)
447-
return ind > length(itr.da)
448-
end
449-
function Base.next(itr::EachReplaceNA, ind::Integer)
450-
if isna(itr.da[ind])
451-
(itr.replacement_val, ind + 1)
452-
else
453-
(itr.da[ind], ind + 1)
454-
end
455-
end
456-
457385
#' @description
458386
#'
459387
#' Determine if the entries of an DataArray are `NA`.

src/pooleddataarray.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -791,5 +791,6 @@ function dropna{T}(pdv::PooledDataVector{T})
791791
res[total] = pdv.pool[pdv.refs[i]]
792792
end
793793
end
794-
return res[1:total]
794+
resize!(res, total)
795+
return res
795796
end

test/nas.jl

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -49,18 +49,18 @@ module TestNAs
4949

5050
dv = DataArray([1, 2, 3], bitpack([false, false, false]))
5151

52-
dv = DataArray([1, 2, 3], [false, false, false])
52+
dv = DataArray([1:6], fill(false, 6))
5353

5454
a = dropna(dv)
55-
for v in each_failNA(dv); end
56-
@test collect(each_failNA(dv)) == a
55+
for v in each_failna(dv); end
56+
@test collect(each_failna(dv)) == a
5757
@test collect(each_dropna(dv)) == a
58-
@test collect(each_replaceNA(dv, 4)) == a
58+
@test collect(each_replacena(dv, 4)) == a
5959

60-
dv[1] = NA
60+
dv[[1, 2, end]] = NA
6161

6262
a = dropna(dv)
63-
@test_throws NAException for v in each_failNA(dv); end
63+
@test_throws NAException for v in each_failna(dv); end
6464
@test collect(each_dropna(dv)) == a
65-
@test collect(each_replaceNA(dv, 4)) == [4, a]
65+
@test collect(each_replacena(dv, 4)) == [4, 4, a, 4]
6666
end

0 commit comments

Comments
 (0)