From eceaf3e47e81c7c9964931eb6e71f819bb9021ce Mon Sep 17 00:00:00 2001 From: Moksh Jain Date: Tue, 5 Feb 2019 15:45:55 +0530 Subject: [PATCH] fix merge conflicts --- src/layers/conv.jl | 2 +- src/layers/recurrent.jl | 8 ++++---- src/layers/stateless.jl | 6 ++++++ src/onehot.jl | 23 ++++++++++++++++++++++- test/cuda/cuda.jl | 7 +++++++ 5 files changed, 40 insertions(+), 6 deletions(-) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 2562989584..54817cd6e2 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -13,7 +13,7 @@ Standard convolutional layer. `size` should be a tuple like `(2, 2)`. `in` and `out` specify the number of input and output channels respectively. Data should be stored in WHCN order. In other words, a 100×100 RGB image would -be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array. +be a `100×100×3×1` array, and a batch of 50 would be a `100×100×3×50` array. Takes the keyword arguments `pad`, `stride` and `dilation`. """ diff --git a/src/layers/recurrent.jl b/src/layers/recurrent.jl index 40cd322a0a..4e23e9eec3 100644 --- a/src/layers/recurrent.jl +++ b/src/layers/recurrent.jl @@ -84,7 +84,7 @@ end RNNCell(in::Integer, out::Integer, σ = tanh; init = glorot_uniform) = RNNCell(σ, param(init(out, in)), param(init(out, out)), - param(zeros(out)), param(init(out))) + param(init(out)), param(zeros(out))) function (m::RNNCell)(h, x) σ, Wi, Wh, b = m.σ, m.Wi, m.Wh, m.b @@ -122,8 +122,8 @@ end function LSTMCell(in::Integer, out::Integer; init = glorot_uniform) - cell = LSTMCell(param(init(out*4, in)), param(init(out*4, out)), param(zeros(out*4)), - param(init(out)), param(init(out))) + cell = LSTMCell(param(init(out*4, in)), param(init(out*4, out)), param(init(out*4)), + param(zeros(out)), param(zeros(out))) cell.b.data[gate(out, 2)] .= 1 return cell end @@ -169,7 +169,7 @@ end GRUCell(in, out; init = glorot_uniform) = GRUCell(param(init(out*3, in)), param(init(out*3, out)), - param(zeros(out*3)), param(init(out))) + param(init(out*3)), param(zeros(out))) function (m::GRUCell)(h, x) b, o = m.b, size(h, 1) diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl index 0da3399c05..28722b3070 100644 --- a/src/layers/stateless.jl +++ b/src/layers/stateless.jl @@ -44,8 +44,14 @@ logitbinarycrossentropy(logŷ, y) = (1 - y)*logŷ - logσ(logŷ) Normalises x to mean 0 and standard deviation 1, across the dimensions given by dims. Defaults to normalising over columns. """ +<<<<<<< HEAD function normalise(x::AbstractArray, dims::Int=1) μ′ = mean(x, dims = dims) σ′ = std(x, dims = dims, mean = μ′) +======= +function normalise(x::AbstractVecOrMat) + μ′ = mean(x, dims = 1) + σ′ = std(x, dims = 1, mean = μ′, corrected=false) +>>>>>>> 940b1e6dbfd95ed3f64daf0a75edae713d25e7c1 return (x .- μ′) ./ σ′ end diff --git a/src/onehot.jl b/src/onehot.jl index cd29f14e9d..c82dce237a 100644 --- a/src/onehot.jl +++ b/src/onehot.jl @@ -9,6 +9,8 @@ Base.size(xs::OneHotVector) = (Int64(xs.of),) Base.getindex(xs::OneHotVector, i::Integer) = i == xs.ix +Base.getindex(xs::OneHotVector, ::Colon) = xs + A::AbstractMatrix * b::OneHotVector = A[:, b.ix] struct OneHotMatrix{A<:AbstractVector{OneHotVector}} <: AbstractMatrix{Bool} @@ -22,6 +24,21 @@ Base.getindex(xs::OneHotMatrix, i::Integer, j::Integer) = xs.data[j][i] Base.getindex(xs::OneHotMatrix, ::Colon, i::Integer) = xs.data[i] Base.getindex(xs::OneHotMatrix, ::Colon, i::AbstractArray) = OneHotMatrix(xs.height, xs.data[i]) +Base.getindex(xs::Flux.OneHotMatrix, j::Base.UnitRange, i::Int) = xs.data[i][j] + +Base.getindex(xs::OneHotMatrix, ::Colon, ::Colon) = xs + +# handle special case for when we want the entire column +function Base.getindex(xs::Flux.OneHotMatrix{T}, ot::Union{Base.Slice, Base.OneTo}, i::Int) where {T<:AbstractArray} + res = similar(xs, size(xs, 1), 1) + if length(ot) == size(xs, 1) + res = xs[:,i] + else + res = xs[1:length(ot),i] + end + res +end + A::AbstractMatrix * B::OneHotMatrix = A[:, map(x->x.ix, B.data)] Base.hcat(x::OneHotVector, xs::OneHotVector...) = OneHotMatrix(length(x), [x, xs...]) @@ -54,13 +71,17 @@ end onehotbatch(ls, labels, unk...) = OneHotMatrix(length(labels), [onehot(l, labels, unk...) for l in ls]) +Base.argmax(xs::OneHotVector) = xs.ix + onecold(y::AbstractVector, labels = 1:length(y)) = labels[Base.argmax(y)] onecold(y::AbstractMatrix, labels...) = dropdims(mapslices(y -> onecold(y, labels...), y, dims=1), dims=1) +onecold(y::OneHotMatrix, labels...) = map(x -> onecold(x, labels...), y.data) + function argmax(xs...) - Base.depwarn("`argmax(...) is deprecated, use `onecold(...)` instead.", :argmax) + Base.depwarn("`argmax(...)` is deprecated, use `onecold(...)` instead.", :argmax) return onecold(xs...) end diff --git a/test/cuda/cuda.jl b/test/cuda/cuda.jl index f7a085031b..43340c746f 100644 --- a/test/cuda/cuda.jl +++ b/test/cuda/cuda.jl @@ -38,6 +38,13 @@ Flux.back!(sum(l)) end +@testset "onecold gpu" begin + x = zeros(Float32, 10, 3) |> gpu; + y = Flux.onehotbatch(ones(3), 1:10) |> gpu; + res = Flux.onecold(x) .== Flux.onecold(y) + @test res isa CuArray +end + if CuArrays.libcudnn != nothing @info "Testing Flux/CUDNN" include("cudnn.jl")