Skip to content

Commit

Permalink
speed up IO of arrays of mutable yet fixed length objects
Browse files Browse the repository at this point in the history
  • Loading branch information
Shashi Gowda committed Oct 4, 2017
1 parent 28c1c04 commit a763f2b
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 6 deletions.
15 changes: 9 additions & 6 deletions src/MemPool.jl
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ end

unwrap_payload(f::FileRef) = unwrap_payload(open(deserialize, f.file))

include("io.jl")
include("datastore.jl")

"""
`approx_size(d)`
Expand All @@ -50,10 +53,13 @@ function approx_size(d)
end

function approx_size{T}(d::Array{T})
if isbits(T)
sizeof(d)
isbits(T) && return sizeof(d)

fl = fixedlength(T)
if fl > 0
return length(d) * fl
else
Base.summarysize(d)
return Base.summarysize(d)
end
end

Expand All @@ -63,9 +69,6 @@ function approx_size(xs::Array{String})
sum(map(sizeof, xs)) + 4 * length(xs)
end

include("io.jl")
include("datastore.jl")

__init__() = global session = "sess-" * randstring(5)

end # module
85 changes: 85 additions & 0 deletions src/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,18 @@ function mmwrite(io::AbstractSerializer, arr::A) where A<:Union{Array,BitArray}
if isbits(T)
serialize(io, size(arr))
write(io.io, arr)
return
elseif T<:Union{} || T<:Nullable{Union{}}
serialize(io, size(arr))
return
end

fl = fixedlength(T)
if fl > 0
serialize(io, size(arr))
for x in arr
fast_write(io.io, x)
end
else
serialize(io, arr)
end
Expand All @@ -43,6 +53,16 @@ function mmread(::Type{A}, io, mmap) where A<:Union{Array,BitArray}
elseif T<:Union{} || T<:Nullable{Union{}}
sz = deserialize(io)
return Array{T}(sz)
end

fl = fixedlength(T)
if fl > 0
sz = deserialize(io)
arr = A(sz...)
@inbounds for i in eachindex(arr)
arr[i] = fast_read(io.io, T)::T
end
return arr
else
return deserialize(io) # slow!!
end
Expand Down Expand Up @@ -85,3 +105,68 @@ function mmread{N}(::Type{Array{String,N}}, io, mmap)
end
ys
end


## Optimized fixed length IO
## E.g. this is very good for `StaticArrays.MVector`s

function fixedlength(t::Type, cycles=ObjectIdDict())
if isbits(t)
return sizeof(t)
elseif isa(t, UnionAll)
return -1
end

if haskey(cycles, t)
return -1
end
cycles[t] = nothing
lens = ntuple(i->fixedlength(fieldtype(t, i), copy(cycles)), nfields(t))
if isempty(lens)
# e.g. abstract type / array type
return -1
elseif any(x->x<0, lens)
return -1
else
return sum(lens)
end
end

fixedlength(t::Type{<:String}) = -1
fixedlength(t::Type{<:Ptr}) = -1

function gen_writer{T}(::Type{T}, expr)
@assert fixedlength(T) >= 0 "gen_writer must be called for fixed length eltypes"
if T<:Tuple
:(write(io, Ref{$T}($expr)))
elseif length(T.types) > 0
:(begin
$([gen_writer(fieldtype(T, i), :(getfield($expr, $i))) for i=1:nfields(T)]...)
end)
elseif isbits(T)
return :(write(io, $expr))
else
error("Don't know how to serialize $T")
end
end

function gen_reader{T}(::Type{T})
@assert fixedlength(T) >= 0 "gen_reader must be called for fixed length eltypes"
if T<:Tuple
:(read(io, Ref{$T}())[])
elseif length(T.types) > 0
return :(ccall(:jl_new_struct, Any, (Any,Any...), $T, $([gen_reader(fieldtype(T, i)) for i=1:nfields(T)]...)))
elseif isbits(T)
return :(read(io, $T))
else
error("Don't know how to deserialize $T")
end
end

@generated function fast_write(io, x)
gen_writer(x, :x)
end

@generated function fast_read{T}(io, ::Type{T})
gen_reader(T)
end
11 changes: 11 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,17 @@ end
@test length(y) == 10
end

using StaticArrays
@testset "StaticArrays" begin
x = [@MVector(rand(75)) for i=1:100]
io = IOBuffer()
mmwrite(SerializationState(io), x)
alloc = @allocated mmwrite(SerializationState(seekstart(io)), x)

@test deserialize(seekstart(io)) == x
@test MemPool.approx_size(x) == 75*100*8
end

@testset "Array{String}" begin
roundtrip([randstring(rand(1:10)) for i=4])
end
Expand Down

4 comments on commit a763f2b

@shashi
Copy link
Collaborator

@shashi shashi commented on a763f2b Oct 4, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JeffBezanson would it be fine to adapt this optimization for Base?

It works for mutable structs where fields are other immutable isbits or other similar mutable structs... E.g. MVector, and is 10x faster than the default.

@JeffBezanson
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to do something related in JuliaLang/julia#14678. The problem is that it skips calling custom serialize methods for isbits fields. But I think we could decide we don't care about that. Or, the @generated function could generate unrolled calls to serialize instead of write, which would still help a bit.

@shashi
Copy link
Collaborator

@shashi shashi commented on a763f2b Oct 4, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But I think we could decide we don't care about that.

👍 we could document that custom serializers should override Array{T} too.

related in JuliaLang/julia#14678.

This doesn't mess with types containing Ptr...

@JeffBezanson
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But a special case was needed for Ptr. If other isbits types have serialize methods, they won't be called.

Please sign in to comment.