Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reproducible Benchmarks #10

Merged
merged 12 commits into from
Jul 22, 2020
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@
/examples/results/
/examples/assets/
/datasets/modelnet/
/benchmarks/*.bson
tmp.jl
92 changes: 92 additions & 0 deletions benchmarks/transforms.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
using Flux3D, BenchmarkTools, BSON

function setup_benchmark_record(names)
benchmarks = Dict{String, Vector{Float64}}()
for name in names
benchmarks[name] = []
end
return benchmarks
end

function generate_point_cloud(npoints::Int)
points = ones(3, npoints)
points = cumsum(points, dims = 2)
return PointCloud(points / npoints)
end

function run_benchmarks!(benchmarks, x, npoints, benchmark_func, device)
for (transform, name) in x
transform = transform |> device
pc = generate_point_cloud(npoints) |> device
trial = @benchmark $benchmark_func($transform, $pc)
time = minimum(trial.times) * 1.0e-6
println("$name: $time ms")
push!(benchmarks[name], time)
end
end

function realign_point_cloud(npoints)
pc = generate_point_cloud(npoints)
rot = RotatePointCloud(-ROT_MATRIX)
return rot(pc)
end

ROT_MATRIX = [1.0 2.0 3.0
0.2 0.5 0.9
3.0 2.0 1.0]

npoint_arr = 2 .^ [12, 14, 16, 18, 20]

names = ["Scale", "Rotate", "Realign", "Normalize"] .* "PointCloud"
push!(names, "Compose")

cpu_benchmarks = setup_benchmark_record(names)

println("DEVICE: CPU")
for _npoints in npoint_arr
arr = [
(ScalePointCloud(0.5; inplace=false), "ScalePointCloud"),
(RotatePointCloud(ROT_MATRIX; inplace=false), "RotatePointCloud"),
(ReAlignPointCloud(realign_point_cloud(_npoints); inplace=false), "RealignPointCloud"),
(NormalizePointCloud(inplace=false), "NormalizePointCloud"),
(Compose(
ScalePointCloud(0.5; inplace=false),
RotatePointCloud(ROT_MATRIX; inplace=false),
ReAlignPointCloud(realign_point_cloud(_npoints); inplace=false),
NormalizePointCloud()), "Compose")
]

println("Running benchmarks for npoints = $_npoints")
run_benchmarks!(cpu_benchmarks, arr, _npoints, (op, pc) -> op(pc), cpu)
println()
end

gpu_benchmarks = setup_benchmark_record(names)

using CUDAapi, CuArrays
if has_cuda()
println("CUDA is on. Running GPU Benchmarks")
CuArrays.allowscalar(false)
println("DEVICE: GPU")
for _npoints in npoint_arr
arr = [
(ScalePointCloud(0.5; inplace=false), "ScalePointCloud"),
(RotatePointCloud(ROT_MATRIX; inplace=false), "RotatePointCloud"),
(ReAlignPointCloud(realign_point_cloud(_npoints); inplace=false), "RealignPointCloud"),
(NormalizePointCloud(inplace=false), "NormalizePointCloud"),
(Compose(
ScalePointCloud(0.5; inplace=false),
RotatePointCloud(ROT_MATRIX; inplace=false),
ReAlignPointCloud(realign_point_cloud(_npoints); inplace=false),
NormalizePointCloud(inplace=false)), "Compose")
]

println("Running benchmarks for npoints = $_npoints")
run_benchmarks!(gpu_benchmarks, arr, _npoints, (op, pc) -> (CuArrays.@sync op(pc)), gpu)
println()
end
end

fname = joinpath(@__DIR__, "transform_benchmarks.bson")
BSON.@save fname cpu_benchmarks gpu_benchmarks
@info "Benchmarks have been saved at $fname"
108 changes: 108 additions & 0 deletions benchmarks/transforms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import torch
import numpy as np
import kaolin as kal
import kaolin.transforms as T
import time

def setup_benchmark_record(names):
benchmarks = {}
for name in names:
benchmarks[name] = []
return benchmarks

def generate_point_cloud(npoints, device='cpu'):
points = torch.ones([npoints,3], dtype=torch.float32)
points = points.cumsum(dim=0)
return kal.rep.PointCloud(points / npoints, device=device)

def realign_point_cloud(npoints, device='cpu'):
pc = generate_point_cloud(npoints, device)
rot = T.RotatePointCloud(-ROT_MATRIX.to(device))
return rot(pc)

def cpu_time(t, p, n_iters=101):
benchmark_time = []
for i in range(n_iters):
start_time = time.time()
t(p)
end_time = time.time()

if i is 1: # Ignore first iteration
continue
benchmark_time.append(end_time - start_time)
return benchmark_time

def gpu_time(t, p, n_iters=101):
benchmark_time = []
for i in range(n_iters):
# https://pytorch.org/docs/stable/notes/cuda.html#asynchronous-execution
start_event = torch.cuda.Event(enable_timing=True)
end_event = torch.cuda.Event(enable_timing=True)
start_event.record()

# Run some things here
t(p)

end_event.record()
torch.cuda.synchronize() # Wait for the events to be recorded!
elapsed_time_ms = start_event.elapsed_time(end_event)

if i is 1: # Ignore first iteration
continue
benchmark_time.append(elapsed_time_ms/1000)
return benchmark_time

def run_benchmarks_(benchmarks, x, npoints, benchmark_func, device):
for (transform, name) in x:
pc = generate_point_cloud(npoints, device)
# bug in kaolin (normalize doesn't accept PointCloud)
if name == "NormalizePointCloud":
pc = pc.points
trial = benchmark_func(transform, pc)
time = min(trial) * 1.0e3 # converting second to millisecond
print("{}: {} ms".format(name, time))
benchmarks[name].append(time)

ROT_MATRIX = torch.tensor([[1.0, 2.0, 3.0],
[0.2, 0.5, 0.9],
[3.0, 2.0, 1.0]])

npoint_arr = 2 ** np.array([12, 14, 16, 18, 20])

names = ["ScalePointCloud", "RotatePointCloud",
"RealignPointCloud", "NormalizePointCloud", "Compose"]

print("DEVICE: CPU")
device = "cpu"
cpu_benchmarks = setup_benchmark_record(names)

for _npoints in npoint_arr:
arr = [(T.ScalePointCloud(torch.Tensor([.5]).to(device), inplace=False), "ScalePointCloud"),
(T.RotatePointCloud(ROT_MATRIX.to(device), inplace=False), "RotatePointCloud"),
(T.RealignPointCloud(realign_point_cloud(_npoints, device), inplace=False), "RealignPointCloud"),
(T.NormalizePointCloud(inplace=False), "NormalizePointCloud"),
(T.Compose([T.ScalePointCloud(torch.Tensor([.5], inplace=False).to(device)),
T.RotatePointCloud(torch.randn(3,3).to(device), inplace=False),
T.RealignPointCloud(realign_point_cloud(_npoints, device), inplace=False),
T.NormalizePointCloud(inplace=False)]), "Compose")]
print("Running benchmarks for npoints = {}".format(_npoints))
run_benchmarks_(cpu_benchmarks, arr, _npoints, cpu_time, device)
print()

if torch.cuda.is_available():
print("CUDA is on. Running GPU Benchmarks")
print("DEVICE: GPU")
device = "cuda"
gpu_benchmarks = setup_benchmark_record(names)
for _npoints in npoint_arr:
arr = [(T.ScalePointCloud(torch.Tensor([.5]).to(device), inplace=False), "ScalePointCloud"),
(T.RotatePointCloud(ROT_MATRIX.to(device), inplace=False), "RotatePointCloud"),
(T.RealignPointCloud(realign_point_cloud(_npoints, device), inplace=False), "RealignPointCloud"),
(T.NormalizePointCloud(inplace=False), "NormalizePointCloud"),
(T.Compose([T.ScalePointCloud(torch.Tensor([.5]).to(device), inplace=False),
T.RotatePointCloud(torch.randn(3,3).to(device), inplace=False),
T.RealignPointCloud(realign_point_cloud(_npoints, device), inplace=False),
T.NormalizePointCloud(inplace=False)]), "Compose")]
print("Running benchmarks for npoints = {}".format(_npoints))
run_benchmarks_(gpu_benchmarks, arr, _npoints, gpu_time, device)
print()
75 changes: 0 additions & 75 deletions benchmarks/transforms_cpu.jl

This file was deleted.

94 changes: 0 additions & 94 deletions benchmarks/transforms_cpu.py

This file was deleted.

Loading