Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions docs/Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
[deps]
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"

[compat]
Documenter = "~0.22"
Documenter = "0.23"
11 changes: 2 additions & 9 deletions docs/make.jl
Original file line number Diff line number Diff line change
@@ -1,12 +1,5 @@
using Documenter, CSV

makedocs(
modules = [CSV],
sitename = "CSV.jl",
pages = ["Home" => "index.md"]
)
makedocs(modules = [CSV], sitename = "CSV.jl")

deploydocs(
repo = "github.com/JuliaData/CSV.jl.git",
target = "build"
)
deploydocs(repo = "github.com/JuliaData/CSV.jl.git")
14 changes: 7 additions & 7 deletions src/file.jl
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,7 @@ end
end
end

@inline function parserow(row, TR::Val{transpose}, ncols, typemap, tapes, startpos, buf, pos, len, positions, pool, refs, rowsguess, rowoffset, types, flags, debug, options::Parsers.Options{ignorerepeated}, coloptions, ::Type{customtypes}) where {transpose, ignorerepeated, customtypes}
@inline function parserow(row, TR::Val{transpose}, ncols, typemap, tapes, startpos, buf, pos::A, len, positions, pool, refs, rowsguess, rowoffset, types, flags, debug, options::B, coloptions::C, ::Type{customtypes}) where {transpose, A, B, C, customtypes}
for col = 1:ncols
if transpose
@inbounds pos = positions[col]
Expand Down Expand Up @@ -782,7 +782,7 @@ function detect(tapes, buf, pos, len, options, row, rowoffset, col, typemap, poo
return pos + tlen, code
end

function parseint!(flag, tape, tapes, buf, pos, len, options, row, rowoffset, col, types, flags)
function parseint!(flag, tape, tapes, buf, pos, len, options, row, rowoffset, col, types, flags)::Tuple{Int64, Int16}
x, code, vpos, vlen, tlen = Parsers.xparse(Int64, buf, pos, len, options)
if code > 0
if !Parsers.sentinel(code)
Expand Down Expand Up @@ -822,7 +822,7 @@ function parseint!(flag, tape, tapes, buf, pos, len, options, row, rowoffset, co
return pos + tlen, code
end

function parsevalue!(::Type{type}, flag, tape, tapes, buf, pos, len, options, row, rowoffset, col, types, flags) where {type}
function parsevalue!(::Type{type}, flag, tape, tapes, buf, pos, len, options, row, rowoffset, col, types, flags)::Tuple{Int64, Int16} where {type}
x, code, vpos, vlen, tlen = Parsers.xparse(type, buf, pos, len, options)
if code > 0
if !Parsers.sentinel(code)
Expand Down Expand Up @@ -853,7 +853,7 @@ function parsevalue!(::Type{type}, flag, tape, tapes, buf, pos, len, options, ro
return pos + tlen, code
end

function parsestring!(flag, tape, buf, pos, len, options, row, rowoffset, col, types, flags)
function parsestring!(flag, tape, buf, pos, len, options, row, rowoffset, col, types, flags)::Tuple{Int64, Int16}
x, code, vpos, vlen, tlen = Parsers.xparse(String, buf, pos, len, options)
setposlen!(tape, row, code, vpos, vlen)
if Parsers.invalidquotedfield(code)
Expand All @@ -869,7 +869,7 @@ function parsestring!(flag, tape, buf, pos, len, options, row, rowoffset, col, t
return pos + tlen, code
end

function parsestring2!(flag, tape, buf, pos, len, options, row, rowoffset, col, types, flags)
function parsestring2!(flag, tape, buf, pos, len, options, row, rowoffset, col, types, flags)::Tuple{Int64, Int16}
x, code, vpos, vlen, tlen = Parsers.xparse(String, buf, pos, len, options)
if Parsers.invalidquotedfield(code)
# this usually means parsing is borked because of an invalidly quoted field, hard error
Expand All @@ -886,7 +886,7 @@ function parsestring2!(flag, tape, buf, pos, len, options, row, rowoffset, col,
return pos + tlen, code
end

function parsemissing!(buf, pos, len, options, row, rowoffset, col)
function parsemissing!(buf, pos, len, options, row, rowoffset, col)::Tuple{Int64, Int16}
x, code, vpos, vlen, tlen = Parsers.xparse(String, buf, pos, len, options)
if Parsers.invalidquotedfield(code)
# this usually means parsing is borked because of an invalidly quoted field, hard error
Expand Down Expand Up @@ -920,7 +920,7 @@ end
return ret
end

function parsepooled!(flag, tape, tapes, buf, pos, len, options, row, rowoffset, col, rowsguess, pool, refs, types, flags)
function parsepooled!(flag, tape, tapes, buf, pos, len, options, row, rowoffset, col, rowsguess, pool, refs, types, flags)::Tuple{Int64, Int16}
x, code, vpos, vlen, tlen = Parsers.xparse(String, buf, pos, len, options)
if Parsers.invalidquotedfield(code)
# this usually means parsing is borked because of an invalidly quoted field, hard error
Expand Down
2 changes: 1 addition & 1 deletion src/header.jl
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ getdf(x::AbstractDict{Int}, nm, i) = haskey(x, i) ? x[i] : nothing
end
end
for i in todrop
flags[i] = WILLDROP
flags[i] |= WILLDROP
end
debug && println("computed types are: $types")
pool = pool === true ? 1.0 : pool isa Float64 ? pool : 0.0
Expand Down
92 changes: 76 additions & 16 deletions src/rows.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# no automatic type inference is done, but types are allowed to be passed
# for as many columns as desired; `CSV.detect(row, i)` can also be used to
# use the same inference logic used in `CSV.File` for determing a cell's typed value
struct Rows{transpose, O, IO, T}
struct Rows{transpose, O, O2, IO, T, V}
name::String
names::Vector{Symbol} # only includes "select"ed columns
finaltypes::Vector{Type} # only includes "select"ed columns
Expand All @@ -17,11 +17,12 @@ struct Rows{transpose, O, IO, T}
len::Int
limit::Int64
options::O # Parsers.Options
coloptions::Union{Nothing, Vector{Parsers.Options}}
coloptions::O2 # Union{Nothing, Vector{Parsers.Options}}
customtypes::T
positions::Vector{Int64}
reusebuffer::Bool
tapes::Vector{AbstractVector}
tapes::Vector{AbstractVector} # for parsing, allocated once and used for each iteration
values::Vector{V} # once values are parsed, put in values; allocated on each iteration if reusebuffer=false
lookup::Dict{Symbol, Int}
end

Expand Down Expand Up @@ -140,13 +141,14 @@ function Rows(source;

h = Header(source, header, normalizenames, datarow, skipto, footerskip, limit, transpose, comment, use_mmap, ignoreemptylines, false, select, drop, missingstrings, missingstring, delim, ignorerepeated, quotechar, openquotechar, closequotechar, escapechar, dateformat, dateformats, decimal, truestrings, falsestrings, type, types, typemap, categorical, pool, lazystrings, strict, silencewarnings, debug, parsingdebug, true)
tapes = allocate(1, h.cols, h.types, h.flags)
values = all(x->x == Union{String, Missing}, h.types) && lazystrings ? Vector{PosLen}(undef, h.cols) : Vector{Any}(undef, h.cols)
finaltypes = copy(h.types)
columnmap = [i for i = 1:h.cols]
deleteat!(h.names, h.todrop)
deleteat!(finaltypes, h.todrop)
deleteat!(columnmap, h.todrop)
lookup = Dict(nm=>i for (i, nm) in enumerate(h.names))
return Rows{transpose, typeof(h.options), typeof(h.buf), typeof(h.customtypes)}(
return Rows{transpose, typeof(h.options), typeof(h.coloptions), typeof(h.buf), typeof(h.customtypes), eltype(values)}(
h.name,
h.names,
finaltypes,
Expand All @@ -166,6 +168,7 @@ function Rows(source;
h.positions,
reusebuffer,
tapes,
values,
lookup,
)
end
Expand All @@ -179,33 +182,90 @@ Base.IteratorSize(::Type{<:Rows}) = Base.SizeUnknown()
const EMPTY_TYPEMAP = Dict{Type, Type}()
const EMPTY_REFS = RefPool[]

@inline function Base.iterate(r::Rows{transpose}, (pos, len, row)=(r.datapos, r.len, 1)) where {transpose}
@inline function setcustom!(::Type{T}, values, tapes, i) where {T}
if @generated
block = Expr(:block)
push!(block.args, quote
error("CSV.jl code-generation error, unexpected column type: $(typeof(tape))")
end)
for i = 1:fieldcount(T)
vec = fieldtype(T, i)
pushfirst!(block.args, quote
if tape isa $(fieldtype(vec, 1))
@inbounds values[i] = tape[1]
return
end
end)
end
pushfirst!(block.args, quote
@inbounds tape = tapes[col]
end)
pushfirst!(block.args, Expr(:meta, :inline))
# @show block
return block
else
# println("generated function failed")
@inbounds tape = tapes[i]
@inbounds values[i] = tape[1]
return
end
end

@inline function Base.iterate(r::Rows{transpose, O, O2, IO, T, V}, (pos, len, row)=(r.datapos, r.len, 1)) where {transpose, O, O2, IO, T, V}
(pos > len || row > r.limit) && return nothing
pos > len && return nothing
tapes = r.reusebuffer ? r.tapes : allocate(1, r.cols, r.types, r.flags)
pos = parserow(1, Val(transpose), r.cols, EMPTY_TYPEMAP, tapes, r.datapos, r.buf, pos, len, r.positions, 0.0, EMPTY_REFS, 1, r.datarow + row - 2, r.types, r.flags, false, r.options, r.coloptions, r.customtypes)
return Row2(r.names, r.finaltypes, r.columnmap, r.types, r.lookup, tapes, r.buf, r.e, r.options, r.coloptions), (pos, len, row + 1)
pos = parserow(1, Val(transpose), r.cols, EMPTY_TYPEMAP, r.tapes, r.datapos, r.buf, pos, len, r.positions, 0.0, EMPTY_REFS, 1, r.datarow + row - 2, r.types, r.flags, false, r.options, r.coloptions, T)
cols = r.cols
values = r.reusebuffer ? r.values : Vector{V}(undef, cols)
tapes = r.tapes
for i = 1:cols
@inbounds tape = tapes[i]
if tape isa Vector{PosLen}
@inbounds values[i] = tape[1]
elseif tape isa SVec{Int64}
@inbounds values[i] = tape[1]
elseif tape isa SVec{Float64}
@inbounds values[i] = tape[1]
elseif tape isa SVec2{String}
@inbounds values[i] = tape[1]
elseif tape isa SVec{Date}
@inbounds values[i] = tape[1]
elseif tape isa SVec{DateTime}
@inbounds values[i] = tape[1]
elseif tape isa SVec{Time}
@inbounds values[i] = tape[1]
elseif tape isa Vector{Union{Missing, Bool}}
@inbounds values[i] = tape[1]
elseif tape isa Vector{UInt32}
@inbounds values[i] = tape[1]
elseif T !== Tuple{}
setcustom!(T, values, tapes, i)
else
error("bad array type: $(typeof(tape))")
end
end
return Row2{O, O2, V}(r.names, r.finaltypes, r.columnmap, r.types, r.lookup, values, r.buf, r.e, r.options, r.coloptions), (pos, len, row + 1)
end

struct Row2{O} <: Tables.AbstractRow
struct Row2{O, O2, V} <: Tables.AbstractRow
names::Vector{Symbol}
finaltypes::Vector{Type}
columnmap::Vector{Int}
types::Vector{Type}
lookup::Dict{Symbol, Int}
tapes::Vector{AbstractVector}
values::Vector{V}
buf::Vector{UInt8}
e::UInt8
options::O
coloptions::Union{Nothing, Vector{Parsers.Options}}
coloptions::O2
end

getnames(r::Row2) = getfield(r, :names)
getfinaltypes(r::Row2) = getfield(r, :finaltypes)
getcolumnmap(r::Row2) = getfield(r, :columnmap)
gettypes(r::Row2) = getfield(r, :types)
getlookup(r::Row2) = getfield(r, :lookup)
gettapes(r::Row2) = getfield(r, :tapes)
getvalues(r::Row2) = getfield(r, :values)
getbuf(r::Row2) = getfield(r, :buf)
gete(r::Row2) = getfield(r, :e)
getoptions(r::Row2) = getfield(r, :options)
Expand All @@ -226,14 +286,14 @@ end
Base.@propagate_inbounds function Tables.getcolumn(r::Row2, ::Type{T}, i::Int, nm::Symbol) where {T}
@boundscheck checkbounds(r, i)
j = getcolumnmap(r)[i]
@inbounds x = gettapes(r)[j][1]
@inbounds x = getvalues(r)[j]
return x
end

Base.@propagate_inbounds function Tables.getcolumn(r::Row2, ::Union{Type{Union{Missing, String}}, Type{String}}, i::Int, nm::Symbol)
@boundscheck checkbounds(r, i)
j = getcolumnmap(r)[i]
@inbounds poslen = gettapes(r)[j][1]
@inbounds poslen = getvalues(r)[j]
if poslen isa Missing
return missing
elseif poslen isa String
Expand All @@ -250,7 +310,7 @@ Base.@propagate_inbounds function Parsers.parse(::Type{T}, r::Row2, i::Int) wher
j = getcolumnmap(r)[i]
type = gettypes(r)[j]
(type == String || type == Union{String, Missing}) || stringsonly()
@inbounds poslen = gettapes(r)[j][1]
@inbounds poslen = getvalues(r)[j]
missingvalue(poslen) && return missing
pos = getpos(poslen)
colopts = getcoloptions(r)
Expand All @@ -264,7 +324,7 @@ Base.@propagate_inbounds function detect(r::Row2, i::Int)
j = getcolumnmap(r)[i]
T = gettypes(r)[j]
(T == String || T == Union{String, Missing}) || stringsonly()
@inbounds offlen = gettapes(r)[j][1]
@inbounds offlen = getvalues(r)[j]
missingvalue(offlen) && return missing
pos = getpos(offlen)
colopts = getcoloptions(r)
Expand Down
2 changes: 1 addition & 1 deletion src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ end
const SmallIntegers = Union{Int8, UInt8, Int16, UInt16, Int32, UInt32}

function allocate(rowsguess, ncols, types, flags)
return AbstractVector[allocate(lazystrings(flags[i]) && types[i] >: String ? PosLen : types[i], rowsguess) for i = 1:ncols]
return AbstractVector[allocate(lazystrings(flags[i]) && (types[i] === String || types[i] === Union{String, Missing}) ? PosLen : types[i], rowsguess) for i = 1:ncols]
end

allocate(::Type{Union{}}, len) = MissingVector(len)
Expand Down