Skip to content

Commit

Permalink
more verbose and accurate testing
Browse files Browse the repository at this point in the history
  • Loading branch information
simeonschaub committed Dec 21, 2020
1 parent fc6c8e9 commit 5df14d2
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 59 deletions.
3 changes: 2 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,11 @@ ZygoteRules = "0.2"
julia = "1.3"

[extras]
FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"

[targets]
test = ["Random", "Statistics", "Test", "Zygote"]
test = ["FiniteDifferences", "Random", "Statistics", "Test", "Zygote"]
105 changes: 47 additions & 58 deletions test/zygote.jl
Original file line number Diff line number Diff line change
@@ -1,111 +1,100 @@
using Zygote, NNlib
using Random
using NNlib: conv, ∇conv_data, depthwiseconv, batched_mul
using FiniteDifferences: grad, central_fdm

function ngradient(f, xs::AbstractArray...)
grads = zero.(xs)
for (x, Δ) in zip(xs, grads), i in 1:length(x)
δ = sqrt(eps())
tmp = x[i]
x[i] = tmp - δ/2
y1 = f(xs...)
x[i] = tmp + δ/2
y2 = f(xs...)
x[i] = tmp
Δ[i] = (y2-y1)/δ
end
return grads
end

function gradcheck(f, xs...; rtol = 1e-5, atol = 1e-5)
function gradcheck(f, xs...; rtol = 1e-6, atol = 1e-6)
grad_zygote = gradient(f, xs...)
grad_finite_difference = ngradient(f, xs...)
return all(isapprox.(grad_zygote, grad_finite_difference; rtol = rtol, atol = atol))
grad_finite_difference = grad(central_fdm(5, 1), f, xs...)
#return all(isapprox.(grad_zygote, grad_finite_difference; rtol = rtol, atol = atol))
for (grad_zygote, grad_finite_difference) in zip(grad_zygote, grad_finite_difference)
@test isapprox(grad_zygote, grad_finite_difference; rtol = rtol, atol = atol)
end
end

gradtest(f, xs::AbstractArray...; kw...) = gradcheck((xs...) -> sum(sin.(f(xs...))), xs...; kw...)
gradtest(f, dims...; kw...) = gradtest(f, rand.(Float64, dims)...; kw...)

@test gradtest((x, W, b) -> relu.(W*x .+ b), 5, (2,5), 2)
@test gradtest((x, W, b) -> relu.(W*x .+ b), (5,3), (2,5), 2)
@test gradtest((x, W, b) -> selu.(W*x .+ b), 5, (2,5), 2)
@test gradtest((x, W, b) -> selu.(W*x .+ b), (5,3), (2,5), 2)
@test gradtest((x, W, b) -> elu.(W*x .+ b, 2), 5, (2,5), 2)
@test gradtest((x, W, b) -> elu.(W*x .+ b, 2), (5,3), (2,5), 2)
gradtest((x, W, b) -> relu.(W*x .+ b), 5, (2,5), 2)
gradtest((x, W, b) -> relu.(W*x .+ b), (5,3), (2,5), 2)
gradtest((x, W, b) -> selu.(W*x .+ b), 5, (2,5), 2)
gradtest((x, W, b) -> selu.(W*x .+ b), (5,3), (2,5), 2)
gradtest((x, W, b) -> elu.(W*x .+ b, 2), 5, (2,5), 2)
gradtest((x, W, b) -> elu.(W*x .+ b, 2), (5,3), (2,5), 2)

# tests for https://github.com/FluxML/Zygote.jl/issues/758
@test gradient(xs -> sum(selu.(xs)), [1_000, 10_000]) == ([1.0507009873554805, 1.0507009873554805],)
@test gradient(x -> selu(x), 1_000) == (1.0507009873554805,)
@test gradient(xs -> sum(elu.(xs, 2)), [1_000, 10_000]) == ([1., 1.],)
@test gradient(x -> elu(x, 2), 1_000) == (1.,)
@test gradient(x -> elu(x, 2), -1) == (2*exp(-1),)
@test gradcheck(x->sum(selu.(x)),[100., 1_000.])
@test gradcheck(x->sum(elu.(x, 3.5)),[100., 1_000.])
@test gradcheck(x->sum(elu.(x, 3.5)),[1_000., 10_000.]) # for elu the tests are passing but for selu not, interesting
gradient(xs -> sum(selu.(xs)), [1_000, 10_000]) == ([1.0507009873554805, 1.0507009873554805],)
gradient(x -> selu(x), 1_000) == (1.0507009873554805,)
gradient(xs -> sum(elu.(xs, 2)), [1_000, 10_000]) == ([1., 1.],)
gradient(x -> elu(x, 2), 1_000) == (1.,)
gradient(x -> elu(x, 2), -1) == (2*exp(-1),)
gradcheck(x->sum(selu.(x)),[100., 1_000.])
gradcheck(x->sum(elu.(x, 3.5)),[100., 1_000.])
gradcheck(x->sum(elu.(x, 3.5)),[1_000., 10_000.]) # for elu the tests are passing but for selu not, interesting
# numerical instability even for the linear part of such function, see:
# julia> ngradient(x->sum(selu.(x)),[1_000., 10_000.])
# ([1.0506591796875, 1.0506591796875],)
# julia> gradient(x->sum(selu.(x)),[1_000., 10_000.])
# ([1.0507009873554805, 1.0507009873554805],)
@test_broken gradcheck(x->sum(selu.(x)),[1_000., 10_000.])

@test gradtest((x, W, b) -> σ.(W*x .+ b), 5, (2,5), 2)
@test gradtest((x, W, b) -> σ.(W*x .+ b), (5,3), (2,5), 2)
@test gradtest((x, W, b) -> logσ.(W*x .+ b), 5, (2,5), 2)
@test gradtest((x, W, b) -> logσ.(W*x .+ b), (5,3), (2,5), 2)
gradtest((x, W, b) -> σ.(W*x .+ b), 5, (2,5), 2)
gradtest((x, W, b) -> σ.(W*x .+ b), (5,3), (2,5), 2)
gradtest((x, W, b) -> logσ.(W*x .+ b), 5, (2,5), 2)
gradtest((x, W, b) -> logσ.(W*x .+ b), (5,3), (2,5), 2)

@test gradtest(x -> softmax(x).*(1:3), 3)
@test gradtest(x -> softmax(x).*(1:3), (3,5))
@test gradtest(x -> softmax(x, dims=2).*(1:3), (3,5))
@test gradtest(x -> logsoftmax(x).*(1:3), 3)
@test gradtest(x -> logsoftmax(x).*(1:3), (3,5))
@test gradtest(x -> logsoftmax(x, dims=2).*(1:3), (3,5))
gradtest(x -> softmax(x).*(1:3), 3)
gradtest(x -> softmax(x).*(1:3), (3,5))
gradtest(x -> softmax(x, dims=2).*(1:3), (3,5))
gradtest(x -> logsoftmax(x).*(1:3), 3)
gradtest(x -> logsoftmax(x).*(1:3), (3,5))
gradtest(x -> logsoftmax(x, dims=2).*(1:3), (3,5))

@testset "conv: spatial_rank=$spatial_rank" for spatial_rank in (1, 2, 3)
x = rand(repeat([5], spatial_rank)..., 3, 2)
w = rand(repeat([3], spatial_rank)..., 3, 3)
cdims = DenseConvDims(x, w)
@test gradtest((x, w) -> conv(x, w, cdims), x, w)
@test gradtest((x, w) -> sum(conv(x, w, cdims)), x, w) # https://github.com/FluxML/Flux.jl/issues/1055
gradtest((x, w) -> conv(x, w, cdims), x, w)
gradtest((x, w) -> sum(conv(x, w, cdims)), x, w) # https://github.com/FluxML/Flux.jl/issues/1055

y = conv(x, w, cdims)
@test gradtest((y, w) -> ∇conv_data(y, w, cdims), y, w)
gradtest((y, w) -> ∇conv_data(y, w, cdims), y, w)
if spatial_rank == 3
@test_broken gradtest((y, w) -> sum(∇conv_data(y, w, cdims)), y, w)
else
@test gradtest((y, w) -> sum(∇conv_data(y, w, cdims)), y, w)
gradtest((y, w) -> sum(∇conv_data(y, w, cdims)), y, w)
end

dcdims = DepthwiseConvDims(x, w)
@test gradtest((x, w) -> depthwiseconv(x, w, dcdims), x, w)
gradtest((x, w) -> depthwiseconv(x, w, dcdims), x, w)

y = depthwiseconv(x, w, dcdims)
@test gradtest((y, w) -> ∇depthwiseconv_data(y, w, dcdims), y, w)
gradtest((y, w) -> ∇depthwiseconv_data(y, w, dcdims), y, w)
if spatial_rank == 3
@test_broken gradtest((y, w) -> sum(∇depthwiseconv_data(y, w, dcdims)), y, w)
else
@test gradtest((y, w) -> sum(∇depthwiseconv_data(y, w, dcdims)), y, w)
gradtest((y, w) -> sum(∇depthwiseconv_data(y, w, dcdims)), y, w)
end
end

@testset "pooling: spatial_rank=$spatial_rank" for spatial_rank in (1, 2)
x = rand(repeat([10], spatial_rank)..., 3, 2)
pdims = PoolDims(x, 2)
@test gradtest(x -> maxpool(x, pdims), x)
@test gradtest(x -> meanpool(x, pdims), x)
@test gradtest(x -> sum(maxpool(x, pdims)), x)
@test gradtest(x -> sum(meanpool(x, pdims)), x)
gradtest(x -> maxpool(x, pdims), x)
gradtest(x -> meanpool(x, pdims), x)
gradtest(x -> sum(maxpool(x, pdims)), x)
gradtest(x -> sum(meanpool(x, pdims)), x)

#https://github.com/FluxML/NNlib.jl/issues/188
k = ntuple(_ -> 2, spatial_rank) # Kernel size of pool in ntuple format
@test gradtest(x -> maxpool(x, k), x)
@test gradtest(x -> meanpool(x, k), x)
@test gradtest(x -> sum(maxpool(x, k)), x)
@test gradtest(x -> sum(meanpool(x, k)), x)
gradtest(x -> maxpool(x, k), x)
gradtest(x -> meanpool(x, k), x)
gradtest(x -> sum(maxpool(x, k)), x)
gradtest(x -> sum(meanpool(x, k)), x)
end

@testset "batched matrix multiplication" begin
rng, M, P, Q = MersenneTwister(123456), 13, 7, 11
B = 3
@test gradtest(batched_mul, randn(rng, M, P, B), randn(rng, P, Q, B))
gradtest(batched_mul, randn(rng, M, P, B), randn(rng, P, Q, B))
end

0 comments on commit 5df14d2

Please sign in to comment.