FluxML · nirmal-suthar · May 14, 2020 · May 11, 2020 · May 11, 2020 · May 11, 2020
diff --git a/Manifest.toml b/Manifest.toml
diff --git a/Project.toml b/Project.toml
@@ -4,6 +4,7 @@ authors = ["Nirmal P. Suthar <nirmalps@iitk.ac.in>"]
 version = "0.1.0"
 
 [deps]
+CuArrays = "3a865a2d-5b23-5a0f-bc46-62713ec82fae"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a"

diff --git a/README.md b/README.md
@@ -5,13 +5,28 @@
 [![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://nirmal-suthar.github.io/Flux3D.jl/stable)
 [![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://nirmal-suthar.github.io/Flux3D.jl/dev)
 
-| transforms | Framework | benchmark_time | 
-|:--:|:--:|:--:|
-|ScalePointCloud|Flux3D|__0.0000039 s__|
-||Kaolin| 0.0000222 s|
-|RotatePointCloud|Flux3D|0.0000409 s|
-||Kaolin|__0.0000312 s__|
-|ReAlignPointCloud|Flux3D|__0.0002318 s__|
-||Kaolin|0.0016832 s|
-|NormalizePointCloud|Flux3D|__0.0000715 s__|
-||Kaolin|0.0008790 s|
+## CPU Benchmarks [Google Colab, runtime:GPU]
+
+| transforms | Framework | npoints=2<sup>14</sup> | npoints=2<sup>16</sup> | npoints=2<sup>20</sup> |
+|:--:|:--:|:--:|:--:|:--:|
+|ScalePointCloud|Flux3D|__0.0032 ms__|__0.0128 ms__|__0.4952 ms__|
+||Kaolin|0.0224 ms|0.3638 ms|1.9645 ms|
+|RotatePointCloud|Flux3D|__0.0348 ms__|__0.1920 ms__|3.5250 ms|
+||Kaolin|0.0379 ms|0.1957 ms|__2.8698 ms__|
+|ReAlignPointCloud|Flux3D|__0.1710 ms__|__0.7320 ms__|__11.824 ms__|
+||Kaolin|1.6744 ms|7.2798 ms|111.22 ms|
+|NormalizePointCloud|Flux3D|__0.0813 ms__|__0.3930 ms__|__7.8250 ms__|
+||Kaolin|0.8723 ms|3.8008 ms|57.468 ms|
+
+## GPU Benchmarks [Google Colab, runtime:GPU]
+
+| transforms | Framework | npoints=2<sup>14</sup> | npoints=2<sup>16</sup> | npoints=2<sup>20</sup> |
+|:--:|:--:|:--:|:--:|:--:|
+|ScalePointCloud|Flux3D|__0.0350 ms__|__0.0423 ms__|__0.1448 ms__|
+||Kaolin|0.0918 ms|0.07312 ms|0.1634 ms|
+|RotatePointCloud|Flux3D|__0.0236 ms__|__0.0313 ms__|__0.2227 ms__|
+||Kaolin|0.0409 ms|0.0396 ms|0.3421 ms|
+|ReAlignPointCloud|Flux3D|__0.7195 ms__|__0.7083 ms__|__1.0020 ms__|
+||Kaolin|3.2031 ms|12.607 ms|330.80 ms|
+|NormalizePointCloud|Flux3D|1.3030 ms|__1.4050 ms__|__1.6380 ms__|
+||Kaolin|__0.9214 ms__|3.6641 ms|57.498 ms|
diff --git a/benchmarks/transforms.jl b/benchmarks/transforms.jl
diff --git a/benchmarks/transforms.py b/benchmarks/transforms.py
diff --git a/benchmarks/transforms_cpu.jl b/benchmarks/transforms_cpu.jl
@@ -0,0 +1,75 @@
+using Flux3D, BenchmarkTools
+
+function run_benchmarks_cpu(arr, npoints)
+	for (t, name) in arr
+		println("*"^10," $(name) ","*"^10)
+		p = PointCloud(rand(npoints,3))
+		@btime $t($p)
+	end
+end
+
+for npoints in [2^14, 2^16, 2^18, 2^20]
+
+	arr = [(ScalePointCloud(0.5),"ScalePointCloud"),
+			(RotatePointCloud(rand(3,3)),"RotatePointCloud"),
+			(ReAlignPointCloud(PointCloud(rand(npoints,3))),"ReAlignPointCloud"),
+			(NormalizePointCloud(),"NormalizePointCloud"),
+			(Compose(ScalePointCloud(0.5),
+					RotatePointCloud(rand(3,3)),
+					ReAlignPointCloud(PointCloud(rand(npoints,3))),
+					NormalizePointCloud()), "Compose")]
+
+    println("*"^10," npoints = $(npoints), device = cpu ","*"^10)
+    run_benchmarks_cpu(arr, npoints)
+    println()
+end
+
+# Google Colab output [runtime: GPU]
+
+# ********** npoints = 16384, device = cpu **********
+# ********** ScalePointCloud **********
+#   3.212 μs (1 allocation: 16 bytes)
+# ********** RotatePointCloud **********
+#   34.843 μs (2 allocations: 192.08 KiB)
+# ********** ReAlignPointCloud **********
+#   171.015 μs (36 allocations: 193.13 KiB)
+# ********** NormalizePointCloud **********
+#   81.388 μs (14 allocations: 192.58 KiB)
+# ********** Compose **********
+#   295.170 μs (54 allocations: 577.83 KiB)
+
+# ********** npoints = 65536, device = cpu **********
+# ********** ScalePointCloud **********
+#   12.812 μs (1 allocation: 16 bytes)
+# ********** RotatePointCloud **********
+#   192.015 μs (2 allocations: 768.08 KiB)
+# ********** ReAlignPointCloud **********
+#   732.088 μs (36 allocations: 769.13 KiB)
+# ********** NormalizePointCloud **********
+#   393.066 μs (14 allocations: 768.58 KiB)
+# ********** Compose **********
+#   2.238 ms (54 allocations: 2.25 MiB)
+
+# ********** npoints = 262144, device = cpu **********
+# ********** ScalePointCloud **********
+#   117.308 μs (1 allocation: 16 bytes)
+# ********** RotatePointCloud **********
+#   836.519 μs (2 allocations: 3.00 MiB)
+# ********** ReAlignPointCloud **********
+#   2.910 ms (36 allocations: 3.00 MiB)
+# ********** NormalizePointCloud **********
+#   1.863 ms (14 allocations: 3.00 MiB)
+# ********** Compose **********
+#   9.166 ms (54 allocations: 9.00 MiB)
+
+# ********** npoints = 1048576, device = cpu **********
+# ********** ScalePointCloud **********
+#   495.236 μs (1 allocation: 16 bytes)
+# ********** RotatePointCloud **********
+#   3.525 ms (2 allocations: 12.00 MiB)
+# ********** ReAlignPointCloud **********
+#   11.824 ms (36 allocations: 12.00 MiB)
+# ********** NormalizePointCloud **********
+#   7.825 ms (14 allocations: 12.00 MiB)
+# ********** Compose **********
+#   37.393 ms (54 allocations: 36.00 MiB)
diff --git a/benchmarks/transforms_cpu.py b/benchmarks/transforms_cpu.py
@@ -0,0 +1,94 @@
+import torch, time
+import kaolin as kal
+import kaolin.transforms as T 
+
+def run_benchmarks_cpu(arr, npoints, n_iters):
+
+    for (t, name) in arr:
+        print("*"*10, "Transforms {}".format(name), "*"*10)
+        benchmark_time = []
+
+        points = torch.randn(npoints, 3)
+        p = kal.rep.PointCloud(points, device='cpu')
+
+        # bug in kaolin (normalize doesn't accept PointCloud)
+        if name == "NormalizePointCloud":
+            p = points
+
+        for i in range(n_iters):
+
+            start_time = time.time()
+            t(p)
+            end_time = time.time()
+
+            if i is 1: # Ignore first iteration
+                continue
+            benchmark_time.append(end_time - start_time)
+
+        print("Benchmark Time : {}".format(min(benchmark_time)))
+
+n_iters = 101
+device = torch.device('cpu')
+
+for npoints in [2**14, 2**16, 2**18, 2**20]:
+    arr = [(T.ScalePointCloud(torch.Tensor([.5]).to(device=device)), "ScalePointCloud"),
+       (T.RotatePointCloud(torch.randn(3,3).to(device)), "RotatePointCloud"),
+       (T.RealignPointCloud(kal.rep.PointCloud(torch.randn(npoints,3), device=device)), "ReAlignPointCloud"),
+       (T.NormalizePointCloud(), "NormalizePointCloud"),
+       (T.Compose([T.ScalePointCloud(torch.Tensor([.5]).to(device)),
+                  T.RotatePointCloud(torch.randn(3,3).to(device)),
+                  T.RealignPointCloud(kal.rep.PointCloud(torch.randn(npoints,3), device=device)),
+                  T.NormalizePointCloud()]), "Compose")]
+    print("*"*10, "npoints = {}, device = {}".format(npoints, device), "*"*10)
+    run_benchmarks_cpu(arr, npoints, n_iters, device=device)    
+    print()
+
+# Google Colab output [runtime: GPU, npoints: 16384] 
+
+# ********** npoints = 16384, device = cpu **********
+# ********** Transforms ScalePointCloud **********
+# Benchmark Time : 2.2411346435546875e-05
+# ********** Transforms RotatePointCloud **********
+# Benchmark Time : 3.790855407714844e-05
+# ********** Transforms ReAlignPointCloud **********
+# Benchmark Time : 0.0016744136810302734
+# ********** Transforms NormalizePointCloud **********
+# Benchmark Time : 0.0008723735809326172
+# ********** Transforms Compose **********
+# Benchmark Time : 0.001056671142578125
+
+# ********** npoints = 65536, device = cpu **********
+# ********** Transforms ScalePointCloud **********
+# Benchmark Time : 0.0003638267517089844
+# ********** Transforms RotatePointCloud **********
+# Benchmark Time : 0.0001957416534423828
+# ********** Transforms ReAlignPointCloud **********
+# Benchmark Time : 0.007279872894287109
+# ********** Transforms NormalizePointCloud **********
+# Benchmark Time : 0.0038008689880371094
+# ********** Transforms Compose **********
+# Benchmark Time : 0.0050737857818603516
+
+# ********** npoints = 262144, device = cpu **********
+# ********** Transforms ScalePointCloud **********
+# Benchmark Time : 0.0006532669067382812
+# ********** Transforms RotatePointCloud **********
+# Benchmark Time : 0.0007166862487792969
+# ********** Transforms ReAlignPointCloud **********
+# Benchmark Time : 0.027949810028076172
+# ********** Transforms NormalizePointCloud **********
+# Benchmark Time : 0.014463663101196289
+# ********** Transforms Compose **********
+# Benchmark Time : 0.0179598331451416
+
+# ********** npoints = 1048576, device = cpu **********
+# ********** Transforms ScalePointCloud **********
+# Benchmark Time : 0.001964569091796875
+# ********** Transforms RotatePointCloud **********
+# Benchmark Time : 0.002869844436645508
+# ********** Transforms ReAlignPointCloud **********
+# Benchmark Time : 0.11122250556945801
+# ********** Transforms NormalizePointCloud **********
+# Benchmark Time : 0.05746889114379883
+# ********** Transforms Compose **********
+# Benchmark Time : 0.07275104522705078