Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add JSON.lower for a better serialization API #151

Merged
merged 8 commits into from
Aug 16, 2016
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,11 @@ provide a desired ordering. For example, if you `import DataStructures`
package](https://github.com/JuliaLang/DataStructures.jl) is
installed), you can pass `dicttype=DataStructures.OrderedDict` to
maintain the insertion order of the items in the object.

```julia
JSON.lower(p::Point2D) = [x.x, x.y]
```

Define a custom serialization rule for a particular data type. Must return a
value that can be directly serialized; see help for more details. Do **not**
extend `JSON._print`; that is a clunky API and is now deprecated.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe some nicer words could be chosen here?

Please note that JSON._print is now deprecated in favor of JSON.lower.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair enough. I do apologize for being so hostile to JSON._print; it was the only option, and this package's documentation is quite sparse, so from a consumer's viewpoint it's a very reasonable solution.

169 changes: 107 additions & 62 deletions src/JSON.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,75 @@ using Compat
export json # returns a compact (or indented) JSON representation as a string

include("Parser.jl")
include("bytes.jl")

import .Parser.parse

# These are temporary ways to bypass excess memory allocation
# They can be removed once types define their own serialization behaviour again
"Internal JSON.jl implementation detail; do not depend on this type."
immutable AssociativeWrapper{T} <: Associative{Symbol, Any}
wrapped::T
fns::Array{Symbol, 1}
end
AssociativeWrapper(x) = AssociativeWrapper(x, @compat fieldnames(x))

typealias JSONPrimitive @compat(Union{
Associative, Tuple, AbstractArray, AbstractString, Integer,
AbstractFloat, Bool, Void})

Base.getindex(w::AssociativeWrapper, s::Symbol) = getfield(w.wrapped, s)
Base.keys(w::AssociativeWrapper) = w.fns
Base.length(w::AssociativeWrapper) = length(w.fns)

"""
Return a value of a JSON-encodable primitive type that `x` should be lowered
into before encoding as JSON. Supported types are: `Associative` to JSON
objects, `Tuple` and `AbstractVector` to JSON arrays, `AbstractArray` to nested
JSON arrays, `AbstractString` to JSON string, `Integer` and `AbstractFloat` to
JSON number, `Bool` to JSON boolean, and `Void` to JSON null.

Extensions of this method should preserve the property that the return value is
one of the aforementioned types. If first lowering to some intermediate type is
required, then extensions should call `lower` before returning a value.

Note that the return value need not be *recursively* lowered—this function may
for instance return an `AbstractArray{Any, 1}` whose elements are not JSON
primitives.
"""
lower(a) = AssociativeWrapper(a)
lower(a::JSONPrimitive) = a

if isdefined(Base, :Dates)
lower(s::Base.Dates.TimeType) = string(s)
end

lower(s::Symbol) = string(s)

if VERSION < v"0.5.0-dev+2396"
lower(f::Function) = "function at $(f.fptr)"
end

lower(d::DataType) = string(d)
lower(m::Module) = throw(ArgumentError("cannot serialize Module $m as JSON"))

const INDENT=true
const NOINDENT=false
const unescaped = Bool[isprint(c) && !iscntrl(c) && !(c in ['\\','"']) for c in '\x00':'\x7F']
const REVERSE_ESCAPES = Dict(map(reverse, ESCAPES))
const escaped = Array(Vector{UInt8}, 256)
for c in 0x00:0xFF
escaped[c + 1] = if c == SOLIDUS
[SOLIDUS] # don't escape this one
elseif c ≥ 0x80
[c] # UTF-8 character copied verbatim
elseif haskey(REVERSE_ESCAPES, c)
[BACKSLASH, REVERSE_ESCAPES[c]]
elseif iscntrl(@compat Char(c)) || !isprint(@compat Char(c))
UInt8[BACKSLASH, LATIN_U, hex(c, 4)...]
else
[c]
end
end

type State{I}
indentstep::Int
Expand Down Expand Up @@ -68,116 +131,88 @@ function end_object(io::IO, state::State{NOINDENT}, is_dict::Bool)
end

function print_escaped(io::IO, s::AbstractString)
for c in s
c <= '\x7f' ? (unescaped[@compat(Int(c))+1] ? Base.print(io, c) :
c == '\\' ? Base.print(io, "\\\\") :
c == '"' ? Base.print(io, "\\\"") :
8 ≤ @compat(UInt32(c)) ≤ 10 ? Base.print(io, '\\', "btn"[@compat(Int(c))-7]) :
c == '\f' ? Base.print(io, "\\f") :
c == '\r' ? Base.print(io, "\\r") :
Base.print(io, "\\u", hex(c, 4))) :
@inbounds for c in s
c <= '\x7f' ? Base.write(io, escaped[@compat UInt8(c) + 0x01]) :
Base.print(io, c) #JSON is UTF8 encoded
end
end

function _print(io::IO, state::State, s::AbstractString)
Base.print(io, '"')
JSON.print_escaped(io, s)
Base.print(io, '"')
end

if isdefined(Base, :Dates)
function _print(io::IO, state::State, s::Base.Dates.TimeType)
_print(io, state, string(s))
function print_escaped(io::IO, s::Compat.UTF8String)
@inbounds for c in s.data
Base.write(io, escaped[c + 0x01])
end
end

function _print(io::IO, state::State, s::Symbol)
_print(io, state, string(s))
function _writejson(io::IO, state::State, s::AbstractString)
Base.print(io, '"')
JSON.print_escaped(io, s)
Base.print(io, '"')
end

@compat function _print(io::IO, state::State, s::Union{Integer, AbstractFloat})
@compat function _writejson(io::IO, state::State, s::Union{Integer, AbstractFloat})
if isnan(s) || isinf(s)
Base.print(io, "null")
else
Base.print(io, s)
end
end

@compat function _print(io::IO, state::State, n::Void)
@compat function _writejson(io::IO, state::State, n::Void)
Base.print(io, "null")
end

function _print(io::IO, state::State, a::Associative)
function _writejson(io::IO, state::State, a::Associative)
if length(a) == 0
Base.print(io, "{}")
return
end
start_object(io, state, true)
first = true
for (key, value) in a
for key in keys(a)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just noticed this, and I'm curious about this change. Is iterating with a tuple more expensive than computing a hash?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmarking shows that the tuple is cheaper (though it allocates three times the memory), but the cost of hashing is peanuts compared to the cost of IO in this loop, so the difference isn't detectable. I switched this because for AssociativeWrapper, the "hash" is just a fast getfield call; forming the tuple still has allocation issues. After #162, it will be possible to define separate serialization for AssociativeWrapper and Dict, so I switched back in that PR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, thanks for the clarification! Not a big deal if it doesn't make much of a difference. The larger memory allocation might be misleading, as it's likely that the allocated memory is being released/reused quickly.

first ? (first = false) : Base.print(io, ",", suffix(state))
Base.print(io, prefix(state))
JSON._print(io, state, string(key))
_writejson(io, state, string(key))
Base.print(io, separator(state))
JSON._print(io, state, value)
_writejson(io, state, a[key])
end
end_object(io, state, true)
end

@compat function _print(io::IO, state::State, a::Union{AbstractVector,Tuple})
@compat function _writejson(io::IO, state::State, a::Union{AbstractVector,Tuple})
if length(a) == 0
Base.print(io, "[]")
return
end
start_object(io, state, false)
Base.print(io, prefix(state))
i = start(a)
!done(a,i) && ((x, i) = next(a, i); JSON._print(io, state, x); )
!done(a,i) && ((x, i) = next(a, i); _writejson(io, state, x); )

while !done(a,i)
(x, i) = next(a, i)
Base.print(io, ",")
printsp(io, state)
JSON._print(io, state, x)
_writejson(io, state, x)
end
end_object(io, state, false)
end

function _print(io::IO, state::State, a)
start_object(io, state, true)
range = @compat fieldnames(a)
if length(range) > 0
Base.print(io, prefix(state), "\"", range[1], "\"", separator(state))
JSON._print(io, state, getfield(a, range[1]))

for name in range[2:end]
Base.print(io, ",")
printsp(io, state)
Base.print(io, "\"", name, "\"", separator(state))
JSON._print(io, state, getfield(a, name))
end
function _writejson(io::IO, state::State, a)
# FIXME: This fallback is harming performance substantially.
# Remove this fallback when _print removed.
if applicable(_print, io, state, a)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this call is costly, how about moving it to a global constant?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mean to cache the results of the call? I'm a little worried about cache invalidation, but I suppose it's no worse than JuliaLang/julia#265 is.

Copy link
Collaborator Author

@TotalVerb TotalVerb Aug 11, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wasn't able to get this to work.

Caching the results in a Dict only saves a tiny portion of the runtime cost, while using more memory, adding compile-time overhead, and also making the entire thing not thread safe. I think we might have to live with the applicable check for now.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, thanks for checking. One other idea to try would be to put a try-catch around a call to the _print function, temporarily replacing the io parameter with an IOBuffer to capture the output if the call succeeds. But I'm not expecting that to necessarily help.

Copy link
Collaborator Author

@TotalVerb TotalVerb Aug 12, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using try-catch seems to make it even worse. I'll also try a @noinline function. If that does not work, then I'm out of ideas.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just use applicable should be the best solution. It's doing a specialized dict lookup anyway. try-catch is also very expensive.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I meant a noinline function to remove the excess code generated by depwarn. I think that might help some, even if it doesn't remove the applicable cost.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Noinline function doesn't do much either. Anyway, the problem will be resolved when the deprecation warning is removed.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 Should be good to merge.

Base.depwarn(
"Overloads to `_print` are deprecated; extend `lower` instead.",
:_print)
_print(io, state, a)
else
_writejson(io, state, lower(a))
end
end_object(io, state, true)
end

if VERSION < v"0.5.0-dev+2396"
function _print(io::IO, state::State, f::Function)
Base.print(io, "\"function at ", f.fptr, "\"")
end
end

function _print(io::IO, state::State, d::DataType)
Base.print(io, "\"", d, "\"")
end

function _print(::IO, ::State, m::Module)
throw(ArgumentError("cannot serialize Module $m as JSON"))
end

# Note: Arrays are printed in COLUMN MAJOR format.
# i.e. json([1 2 3; 4 5 6]) == "[[1,4],[2,5],[3,6]]"
function _print{T, N}(io::IO, state::State, a::AbstractArray{T, N})
function _writejson{T, N}(io::IO, state::State, a::AbstractArray{T, N})
lengthN = size(a, N)
if lengthN > 0
start_object(io, state, false)
Expand All @@ -187,28 +222,38 @@ function _print{T, N}(io::IO, state::State, a::AbstractArray{T, N})
newdims = ntuple(i -> 1:size(a, i), N - 1)
end
Base.print(io, prefix(state))
JSON._print(io, state, Compat.view(a, newdims..., 1))
_writejson(io, state, Compat.view(a, newdims..., 1))

for j in 2:lengthN
Base.print(io, ",")
printsp(io, state)
JSON._print(io, state, Compat.view(a, newdims..., j))
_writejson(io, state, Compat.view(a, newdims..., j))
end
end_object(io, state, false)
else
Base.print(io, "[]")
end
end

# this is _print() instead of _print because we need to support v0.3
# FIXME: drop the parentheses when v0.3 support dropped
"Deprecated way to overload JSON printing behaviour. Use `lower` instead."
function _print(io::IO, s::State, a::JSONPrimitive)
Base.depwarn(
"Do not call internal function `JSON._print`; use `JSON.print`",
:_print)
_writejson(io, s, a)
end

function print(io::IO, a, indent=0)
JSON._print(io, State(indent), a)
_writejson(io, State(indent), a)
if indent > 0
Base.print(io, "\n")
end
end

function print(a, indent=0)
JSON._print(STDOUT, State(indent), a)
_writejson(STDOUT, State(indent), a)
if indent > 0
println()
end
Expand Down
17 changes: 17 additions & 0 deletions test/lowering.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
if isdefined(Base, :Dates)
@test JSON.json(Date(2016, 8, 3)) == "\"2016-08-03\""
end

@test JSON.json(:x) == "\"x\""
@test_throws ArgumentError JSON.json(Base)

immutable Type151{T}
x::T
end

@test JSON.json(Type151) == "\"Type151{T}\""

JSON.lower{T}(v::Type151{T}) = @compat Dict(:type => T, :value => v.x)
@test JSON.parse(JSON.json(Type151(1.0))) == @compat Dict(
"type" => "Float64",
"value" => 1.0)
3 changes: 3 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -291,3 +291,6 @@ end

# Check that printing to the default STDOUT doesn't fail
JSON.print(["JSON.jl tests pass!"],1)

# Lowering tests
include("lowering.jl")