Skip to content

Commit c55e58f

Browse files
authored
Merge pull request #24982 from JuliaLang/nl/skipmissing
Add skipmissing() to skip missing values
2 parents 7e2f904 + db5334a commit c55e58f

File tree

7 files changed

+199
-55
lines changed

7 files changed

+199
-55
lines changed

base/exports.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -859,6 +859,7 @@ export
859859
# missing values
860860
ismissing,
861861
missing,
862+
skipmissing,
862863

863864
# time
864865
sleep,

base/missing.jl

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@ end
1818
showerror(io::IO, ex::MissingException) =
1919
print(io, "MissingException: ", ex.msg)
2020

21+
nonmissingtype(::Type{Union{T, Missing}}) where {T} = T
22+
nonmissingtype(::Type{Missing}) = Union{}
23+
nonmissingtype(::Type{T}) where {T} = T
24+
nonmissingtype(::Type{Any}) = Any
25+
2126
promote_rule(::Type{Missing}, ::Type{T}) where {T} = Union{T, Missing}
2227
promote_rule(::Type{Union{S,Missing}}, ::Type{T}) where {T,S} = Union{promote_type(T, S), Missing}
2328
promote_rule(::Type{Any}, ::Type{T}) where {T} = Any
@@ -116,3 +121,81 @@ function float(A::AbstractArray{Union{T, Missing}}) where {T}
116121
convert(AbstractArray{Union{U, Missing}}, A)
117122
end
118123
float(A::AbstractArray{Missing}) = A
124+
125+
"""
126+
skipmissing(itr)
127+
128+
Return an iterator over the elements in `itr` skipping [`missing`](@ref) values.
129+
130+
Use [`collect`](@ref) to obtain an `Array` containing the non-`missing` values in
131+
`itr`. Note that even if `itr` is a multidimensional array, the result will always
132+
be a `Vector` since it is not possible to remove missings while preserving dimensions
133+
of the input.
134+
135+
# Examples
136+
```jldoctest
137+
julia> sum(skipmissing([1, missing, 2]))
138+
3
139+
140+
julia> collect(skipmissing([1, missing, 2]))
141+
2-element Array{Int64,1}:
142+
1
143+
2
144+
145+
julia> collect(skipmissing([1 missing; 2 missing]))
146+
2-element Array{Int64,1}:
147+
1
148+
2
149+
150+
```
151+
"""
152+
skipmissing(itr) = SkipMissing(itr)
153+
154+
struct SkipMissing{T}
155+
x::T
156+
end
157+
iteratorsize(::Type{<:SkipMissing}) = SizeUnknown()
158+
iteratoreltype(::Type{SkipMissing{T}}) where {T} = iteratoreltype(T)
159+
eltype(itr::SkipMissing) = nonmissingtype(eltype(itr.x))
160+
# Fallback implementation for general iterables: we cannot access a value twice,
161+
# so after finding the next non-missing element in start() or next(), we have to
162+
# pass it in the iterator state, which introduces a type instability since the value
163+
# is missing if the input does not contain any non-missing element.
164+
@inline function Base.start(itr::SkipMissing)
165+
s = start(itr.x)
166+
v = missing
167+
@inbounds while !done(itr.x, s) && v isa Missing
168+
v, s = next(itr.x, s)
169+
end
170+
(v, s)
171+
end
172+
@inline Base.done(itr::SkipMissing, state) = ismissing(state[1]) && done(itr.x, state[2])
173+
@inline function Base.next(itr::SkipMissing, state)
174+
v1, s = state
175+
v2 = missing
176+
@inbounds while !done(itr.x, s) && v2 isa Missing
177+
v2, s = next(itr.x, s)
178+
end
179+
(v1, (v2, s))
180+
end
181+
# Optimized implementation for AbstractArray, relying on the ability to access x[i] twice:
182+
# once in done() to find the next non-missing entry, and once in next() to return it.
183+
# This works around the type instability problem of the generic fallback.
184+
@inline function _next_nonmissing_ind(x::AbstractArray, s)
185+
idx = eachindex(x)
186+
@inbounds while !done(idx, s)
187+
i, new_s = next(idx, s)
188+
x[i] isa Missing || break
189+
s = new_s
190+
end
191+
s
192+
end
193+
@inline Base.start(itr::SkipMissing{<:AbstractArray}) =
194+
_next_nonmissing_ind(itr.x, start(eachindex(itr.x)))
195+
@inline Base.done(itr::SkipMissing{<:AbstractArray}, state) =
196+
done(eachindex(itr.x), state)
197+
@inline function Base.next(itr::SkipMissing{<:AbstractArray}, state)
198+
i, state = next(eachindex(itr.x), state)
199+
@inbounds v = itr.x[i]::eltype(itr)
200+
(v, _next_nonmissing_ind(itr.x, state))
201+
end

0 commit comments

Comments
 (0)