Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions profiling/reflect.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
using CUDA
using Flux
using BenchmarkTools
using Random

using HighDimPDE
if CUDA.functional()
CUDA.allowscalar(false)
_device = Flux.gpu
Expand All @@ -10,6 +12,21 @@ end
# testing reflection on batchsize
d = 100
batch_size = 10000
#
for d in [2^i for i in 1:17]
Random.seed!(1)
X0 = fill(0.0f0,d)
X1 = X0 + randn(d)
println("\n\nd = $d\n")
if d <= 16384
args = copy.((X0, X1, -1, 1))
@time HighDimPDE._reflect(args...)
args = copy.((X0, X1, -1, 1))
@time HighDimPDE._reflect_GPU(args...)
end
args = copy.((X0, X1, -1, 1))
@time HighDimPDE._reflect_outs(args...)
end
y0 = CUDA.zeros(d,batch_size)
y1 = CUDA.randn(size(y0)...)
@btime _reflect_GPU2($y0,$y1,-1f0,1f0,_device)
Expand Down
94 changes: 94 additions & 0 deletions src/reflect.jl
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,97 @@ function _reflect_GPU(a, b, s::Real, e::Real)
end
return b
end

function _out_indices(b, s::R, e::R) where {R<:Real}
out1 = findall(b .< s)
out2 = findall(b .> e)
out = vcat(out1, out2)
return out1, out2, out
end


"""
_rtemp_lower!(rtemp, a, b, s, out_lower)
Update slice of `rtemp` corresponding to the lower boundary `s`.
"""
function _rtemp_lower!(rtemp, a, b, s, out_lower)
for i in 1:length(out_lower)
idx = out_lower[i]
rtemp[i] = @. (s - a[idx]) / (b[idx] - a[idx])
end
end

"""
_rtemp_lower!(rtemp, a, b, e, out_upper, offset)
Update slice of `rtemp` corresponding to the upper boundary `e`.
NOTE: An offset needs to be provided, corresponding to the number of dimensions where `b` is below the lower boundary.
(SEE `rtemp_lower!`).
"""
function _rtemp_upper!(rtemp, a, b, e, out_upper, offset)
for i in 1:length(out_upper)
idx = out_upper[i]
rtemp[i + offset] = @. (e - a[idx]) / (b[idx] - a[idx])
end
end


"""
_discard_reflected_out_index!(out, out1, out2, rmin_idx)
Checks if `b[n_idx]` (which corresponds to `out[rmin_idx]`) is within the specified boundary `[s, e]` after reflection along dimension `n_idx`.
The corresponding `out`-indices are removed before iterating the reflection.
"""
function _discard_reflected_out_index!(out, out1, out2, rmin_idx)
offset = length(out1)
if rmin_idx <= offset
deleteat!(out1, rmin_idx)
deleteat!(out, rmin_idx)
else
deleteat!(out2, rmin_idx - offset)
deleteat!(out, rmin_idx)
end
end


function _swap_boundary_outs!(out1, out2, n_idx, rmin_idx)
offset = length(out1)
# Should occur after reflecting OUT-OF-BOUNDS, where
# dimension `n_idx` is assigned to other `out`-vector
# for next reflection.
if rmin_idx <= offset
deleteat!(out1, rmin_idx)
push!(out2, n_idx)
else
deleteat!(out2, rmin_idx - offset)
push!(out1, n_idx)
end
end

function _reflect_outs(a, b, s::Real, e::Real)
all((a .>= s) .& (a .<= e)) ? nothing : error("a = $a not in hypercube")
size(a) == size(b) ? nothing : error("a not same dim as b")

# Initialize vectors of indices corresponding to dim of trajectories out of bounds
out1, out2, out = _out_indices(b, s, e)
rtemp = Vector{Float32}(undef, length(out))
while length(out) > 0
# TODO: Preallocate rtemp once, then remove elements with deleteat!(rtemp, rmin_idx)
_rtemp_lower!(rtemp, a, b, s, out1)
_rtemp_upper!(rtemp, a, b, e, out2, length(out1))

rmin, rmin_idx = findmin(rtemp)
n_idx = out[rmin_idx]
@. a += (b - a) * rmin
b[n_idx] = -b[n_idx] + 2 * a[n_idx]
if s < b[n_idx] && b[n_idx] < e
# Within boundary after reflection, can reduce number of reflecting dimensions.
_discard_reflected_out_index!(out, out1, out2, rmin_idx)
deleteat!(rtemp, rmin_idx)
else
# Reflected outside boundary, need to move element between out1 and out2
# TODO: Reassign out-indices manually via _swap_boundary_outs!, for now, just recompute out-indices.
# _swap_boundary_outs!(out1, out2, n_idx, rmin_idx)
out1, out2, out = _out_indices(b, s, e)
end
end
return b
end
12 changes: 12 additions & 0 deletions test/reflect.jl
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,15 @@ if CUDA.functional()
end
end
end

@testset "CPU index reflect methods" begin
d = 1000
X0 = fill(0.0f0,d)
X1 = X0 + randn(d)
@testset "test equivalence of index with cpu/gpu" begin
args = (X0, X1, -1, 1)
@test HighDimPDE._reflect(copy.(args)...) ≈ HighDimPDE._reflect_outs(copy.(args)...)
@test HighDimPDE._reflect_GPU(copy.(args)...) ≈ HighDimPDE._reflect_outs(copy.(args)...)
end

end