FluxML · nirmal-suthar · Jul 22, 2020 · Jun 7, 2020 · Jul 15, 2020 · Jul 16, 2020
diff --git a/.gitignore b/.gitignore
@@ -7,4 +7,5 @@
 /examples/results/
 /examples/assets/
 /datasets/modelnet/
+/benchmarks/*.bson
 tmp.jl
diff --git a/benchmarks/transforms.jl b/benchmarks/transforms.jl
@@ -0,0 +1,92 @@
+using Flux3D, BenchmarkTools, BSON
+
+function setup_benchmark_record(names)
+    benchmarks = Dict{String, Vector{Float64}}()
+    for name in names
+        benchmarks[name] = []
+    end
+    return benchmarks
+end
+
+function generate_point_cloud(npoints::Int)
+    points = ones(3, npoints)
+    points = cumsum(points, dims = 2)
+    return PointCloud(points / npoints)
+end
+
+function run_benchmarks!(benchmarks, x, npoints, benchmark_func, device)
+    for (transform, name) in x
+        transform = transform |> device
+        pc = generate_point_cloud(npoints) |> device
+        trial = @benchmark $benchmark_func($transform, $pc)
+        time = minimum(trial.times) * 1.0e-6
+        println("$name: $time ms")
+        push!(benchmarks[name], time)
+    end
+end
+
+function realign_point_cloud(npoints)
+    pc = generate_point_cloud(npoints)
+    rot = RotatePointCloud(-ROT_MATRIX)
+    return rot(pc)
+end
+
+ROT_MATRIX = [1.0 2.0 3.0
+	      	  0.2 0.5 0.9
+	      	  3.0 2.0 1.0]
+
+npoint_arr = 2 .^ [12, 14, 16, 18, 20]
+
+names = ["Scale", "Rotate", "Realign", "Normalize"] .* "PointCloud"
+push!(names, "Compose")
+
+cpu_benchmarks = setup_benchmark_record(names)
+
+println("DEVICE: CPU")
+for _npoints in npoint_arr
+    arr = [
+	(ScalePointCloud(0.5; inplace=false), "ScalePointCloud"),
+	(RotatePointCloud(ROT_MATRIX; inplace=false), "RotatePointCloud"),
+        (ReAlignPointCloud(realign_point_cloud(_npoints); inplace=false), "RealignPointCloud"),
+	(NormalizePointCloud(inplace=false), "NormalizePointCloud"),
+        (Compose(
+             ScalePointCloud(0.5; inplace=false),
+	     RotatePointCloud(ROT_MATRIX; inplace=false),
+	     ReAlignPointCloud(realign_point_cloud(_npoints); inplace=false),
+	     NormalizePointCloud()), "Compose")
+    ]
+
+    println("Running benchmarks for npoints = $_npoints")
+    run_benchmarks!(cpu_benchmarks, arr, _npoints, (op, pc) -> op(pc), cpu)
+    println()
+end
+
+gpu_benchmarks = setup_benchmark_record(names)
+
+using CUDAapi, CuArrays
+if has_cuda()
+    println("CUDA is on. Running GPU Benchmarks")
+    CuArrays.allowscalar(false)
+    println("DEVICE: GPU")
+    for _npoints in npoint_arr
+        arr = [
+	    (ScalePointCloud(0.5; inplace=false), "ScalePointCloud"),
+            (RotatePointCloud(ROT_MATRIX; inplace=false), "RotatePointCloud"),
+            (ReAlignPointCloud(realign_point_cloud(_npoints); inplace=false), "RealignPointCloud"),
+	    (NormalizePointCloud(inplace=false), "NormalizePointCloud"),
+            (Compose(
+                 ScalePointCloud(0.5; inplace=false),
+	         RotatePointCloud(ROT_MATRIX; inplace=false),
+	         ReAlignPointCloud(realign_point_cloud(_npoints); inplace=false),
+	         NormalizePointCloud(inplace=false)), "Compose")
+         ]
+
+        println("Running benchmarks for npoints = $_npoints")
+        run_benchmarks!(gpu_benchmarks, arr, _npoints, (op, pc) -> (CuArrays.@sync op(pc)), gpu)
+        println()
+    end
+end
+
+fname = joinpath(@__DIR__, "transform_benchmarks.bson")
+BSON.@save fname cpu_benchmarks gpu_benchmarks
+@info "Benchmarks have been saved at $fname"
diff --git a/benchmarks/transforms.py b/benchmarks/transforms.py
@@ -0,0 +1,108 @@
+import torch
+import numpy as np
+import kaolin as kal
+import kaolin.transforms as T 
+import time
+
+def setup_benchmark_record(names):
+    benchmarks = {}
+    for name in names:
+        benchmarks[name] = []
+    return benchmarks
+
+def generate_point_cloud(npoints, device='cpu'):
+    points = torch.ones([npoints,3], dtype=torch.float32)
+    points = points.cumsum(dim=0)
+    return kal.rep.PointCloud(points / npoints, device=device)
+
+def realign_point_cloud(npoints, device='cpu'):
+    pc = generate_point_cloud(npoints, device)
+    rot = T.RotatePointCloud(-ROT_MATRIX.to(device))
+    return rot(pc)
+
+def cpu_time(t, p, n_iters=101):
+    benchmark_time = []
+    for i in range(n_iters):
+        start_time = time.time()
+        t(p)
+        end_time = time.time()
+
+        if i is 1: # Ignore first iteration
+            continue
+        benchmark_time.append(end_time - start_time)
+    return benchmark_time
+
+def gpu_time(t, p, n_iters=101):
+    benchmark_time = []
+    for i in range(n_iters):
+        # https://pytorch.org/docs/stable/notes/cuda.html#asynchronous-execution
+        start_event = torch.cuda.Event(enable_timing=True)
+        end_event = torch.cuda.Event(enable_timing=True)
+        start_event.record()
+
+        # Run some things here
+        t(p)
+
+        end_event.record()
+        torch.cuda.synchronize()  # Wait for the events to be recorded!
+        elapsed_time_ms = start_event.elapsed_time(end_event)
+
+        if i is 1: # Ignore first iteration
+            continue
+        benchmark_time.append(elapsed_time_ms/1000)
+    return benchmark_time
+
+def run_benchmarks_(benchmarks, x, npoints, benchmark_func, device):
+    for (transform, name) in x:
+        pc = generate_point_cloud(npoints, device)
+        # bug in kaolin (normalize doesn't accept PointCloud)
+        if name == "NormalizePointCloud":
+            pc = pc.points
+        trial = benchmark_func(transform, pc)
+        time = min(trial) * 1.0e3 # converting second to millisecond
+        print("{}: {} ms".format(name, time))
+        benchmarks[name].append(time)
+
+ROT_MATRIX = torch.tensor([[1.0, 2.0, 3.0],
+                           [0.2, 0.5, 0.9],
+                           [3.0, 2.0, 1.0]])
+
+npoint_arr = 2 ** np.array([12, 14, 16, 18, 20])
+
+names = ["ScalePointCloud", "RotatePointCloud",
+         "RealignPointCloud", "NormalizePointCloud", "Compose"]
+
+print("DEVICE: CPU")
+device = "cpu"
+cpu_benchmarks = setup_benchmark_record(names)
+
+for _npoints in npoint_arr:
+    arr = [(T.ScalePointCloud(torch.Tensor([.5]).to(device), inplace=False), "ScalePointCloud"),
+       (T.RotatePointCloud(ROT_MATRIX.to(device), inplace=False), "RotatePointCloud"),
+       (T.RealignPointCloud(realign_point_cloud(_npoints, device), inplace=False), "RealignPointCloud"),
+       (T.NormalizePointCloud(inplace=False), "NormalizePointCloud"),
+       (T.Compose([T.ScalePointCloud(torch.Tensor([.5], inplace=False).to(device)),
+                  T.RotatePointCloud(torch.randn(3,3).to(device), inplace=False),
+                  T.RealignPointCloud(realign_point_cloud(_npoints, device), inplace=False),
+                  T.NormalizePointCloud(inplace=False)]), "Compose")]
+    print("Running benchmarks for npoints = {}".format(_npoints))
+    run_benchmarks_(cpu_benchmarks, arr, _npoints, cpu_time, device)
+    print()
+
+if torch.cuda.is_available():
+    print("CUDA is on. Running GPU Benchmarks")
+    print("DEVICE: GPU")
+    device = "cuda"
+    gpu_benchmarks = setup_benchmark_record(names)
+    for _npoints in npoint_arr:
+        arr = [(T.ScalePointCloud(torch.Tensor([.5]).to(device), inplace=False), "ScalePointCloud"),
+           (T.RotatePointCloud(ROT_MATRIX.to(device), inplace=False), "RotatePointCloud"),
+           (T.RealignPointCloud(realign_point_cloud(_npoints, device), inplace=False), "RealignPointCloud"),
+           (T.NormalizePointCloud(inplace=False), "NormalizePointCloud"),
+           (T.Compose([T.ScalePointCloud(torch.Tensor([.5]).to(device), inplace=False),
+                      T.RotatePointCloud(torch.randn(3,3).to(device), inplace=False),
+                      T.RealignPointCloud(realign_point_cloud(_npoints, device), inplace=False),
+                      T.NormalizePointCloud(inplace=False)]), "Compose")]
+        print("Running benchmarks for npoints = {}".format(_npoints))
+        run_benchmarks_(gpu_benchmarks, arr, _npoints, gpu_time, device)
+        print()
diff --git a/benchmarks/transforms_cpu.jl b/benchmarks/transforms_cpu.jl
diff --git a/benchmarks/transforms_cpu.py b/benchmarks/transforms_cpu.py