less unsafe replace StagingWriteBeltBufferTyped with StagingWriteBelt…

…BufferTypedView
rerun-io · Dec 19, 2022 · 40013a3 · 40013a3 · github-actions · Dec 19, 2022
1 parent ac4b395
commit 40013a3
Show file tree

Hide file tree

Showing 3 changed files with 74 additions and 76 deletions.
diff --git a/crates/re_renderer/src/point_cloud_builder.rs b/crates/re_renderer/src/point_cloud_builder.rs
@@ -1,10 +1,8 @@
-use std::ops::Range;
-
 use crate::{
     renderer::{
         PointCloudBatchInfo, PointCloudDrawData, PointCloudDrawDataError, PointCloudVertex,
     },
-    staging_write_belt::StagingWriteBeltBufferTyped,
+    staging_write_belt::{StagingWriteBeltBuffer, StagingWriteBeltBufferTypedView},
     Color32, DebugLabel, RenderContext, Size,
 };
 
@@ -22,8 +20,8 @@ pub struct PointCloudBuilder<PerPointUserData> {
 
     pub batches: Vec<PointCloudBatchInfo>,
 
-    pub(crate) vertices_gpu: StagingWriteBeltBufferTyped<PointCloudVertex>,
-    pub(crate) colors_gpu: StagingWriteBeltBufferTyped<Color32>,
+    pub(crate) vertices_gpu: StagingWriteBeltBuffer,
+    pub(crate) colors_gpu: StagingWriteBeltBuffer,
 
     /// z value given to the next 2d point.
     pub next_2d_z: f32,
@@ -37,25 +35,21 @@ where
         // TODO: check max_num_points bound
 
         let mut staging_belt = ctx.staging_belt.lock();
-        let vertices_gpu = staging_belt
-            .allocate(
-                &ctx.device,
-                &mut ctx.gpu_resources.buffers,
-                (std::mem::size_of::<PointCloudVertex>() * max_num_points) as wgpu::BufferAddress,
-                wgpu::COPY_BYTES_PER_ROW_ALIGNMENT as u64,
-            )
-            .typed_view();
-        let mut colors_gpu = staging_belt
-            .allocate(
-                &ctx.device,
-                &mut ctx.gpu_resources.buffers,
-                (std::mem::size_of::<PointCloudVertex>() * max_num_points) as wgpu::BufferAddress,
-                wgpu::COPY_BYTES_PER_ROW_ALIGNMENT as u64,
-            )
-            .typed_view::<Color32>();
+        let vertices_gpu = staging_belt.allocate(
+            &ctx.device,
+            &mut ctx.gpu_resources.buffers,
+            (std::mem::size_of::<PointCloudVertex>() * max_num_points) as wgpu::BufferAddress,
+            wgpu::COPY_BYTES_PER_ROW_ALIGNMENT as u64,
+        );
+        let mut colors_gpu = staging_belt.allocate(
+            &ctx.device,
+            &mut ctx.gpu_resources.buffers,
+            (std::mem::size_of::<PointCloudVertex>() * max_num_points) as wgpu::BufferAddress,
+            wgpu::COPY_BYTES_PER_ROW_ALIGNMENT as u64,
+        );
         // Default unassigned colors to white.
         // TODO(andreas): Do we actually need this? Can we do this lazily if no color was specified?
-        colors_gpu.buffer.memset(255);
+        colors_gpu.memset(255);
 
         Self {
             vertices: Vec::with_capacity(max_num_points),
@@ -138,11 +132,14 @@ where
         self.0
             .user_data
             .extend(std::iter::repeat(PerPointUserData::default()).take(num_points));
-        let num_vertices = self.0.vertices.len();
+
+        let new_range = old_size..self.0.vertices.len();
 
         PointsBuilder {
-            builder: self.0,
-            range: old_size..num_vertices,
+            vertices: &mut self.0.vertices[new_range.clone()],
+            vertices_gpu: self.0.vertices_gpu.typed_view(old_size),
+            colors_gpu: self.0.colors_gpu.typed_view(old_size),
+            user_data: &mut self.0.user_data[new_range],
         }
     }
 
@@ -161,7 +158,7 @@ where
     pub fn add_point(&mut self, position: glam::Vec3) -> PointBuilder<'_, PerPointUserData> {
         debug_assert_eq!(self.0.vertices.len(), self.0.user_data.len());
 
-        let num_points_before = self.0.vertices.len();
+        let old_size = self.0.vertices.len();
         let vertex = PointCloudVertex {
             position,
             radius: Size::AUTO,
@@ -172,8 +169,10 @@ where
         self.batch_mut().point_count += 1;
 
         PointBuilder {
-            builder: self.0,
-            offset: num_points_before,
+            vertex: self.0.vertices.last_mut().unwrap(),
+            vertices_gpu: self.0.vertices_gpu.typed_view(old_size),
+            colors_gpu: self.0.colors_gpu.typed_view(old_size),
+            user_data: self.0.user_data.last_mut().unwrap(),
         }
     }
 
@@ -198,8 +197,10 @@ where
 }
 
 pub struct PointBuilder<'a, PerPointUserData> {
-    builder: &'a mut PointCloudBuilder<PerPointUserData>,
-    offset: usize,
+    vertex: &'a mut PointCloudVertex,
+    colors_gpu: StagingWriteBeltBufferTypedView<'a, Color32>,
+    vertices_gpu: StagingWriteBeltBufferTypedView<'a, PointCloudVertex>,
+    user_data: &'a mut PerPointUserData,
 }
 
 impl<'a, PerPointUserData> PointBuilder<'a, PerPointUserData>
@@ -208,33 +209,34 @@ where
 {
     #[inline]
     pub fn radius(self, radius: Size) -> Self {
-        self.builder.vertices[self.offset].radius = radius;
+        self.vertex.radius = radius;
         self
     }
 
     #[inline]
-    pub fn color(self, color: Color32) -> Self {
-        self.builder.colors_gpu.write_single(&color, self.offset);
+    pub fn color(mut self, color: Color32) -> Self {
+        self.colors_gpu.write_single(&color, 0);
         self
     }
 
     pub fn user_data(self, data: PerPointUserData) -> Self {
-        self.builder.user_data[self.offset] = data;
+        *self.user_data = data;
         self
     }
 }
 
 impl<'a, PerPointUserData> Drop for PointBuilder<'a, PerPointUserData> {
+    #[inline]
     fn drop(&mut self) {
-        self.builder
-            .vertices_gpu
-            .write_single(&self.builder.vertices[self.offset], self.offset);
+        self.vertices_gpu.write_single(self.vertex, 0);
     }
 }
 
 pub struct PointsBuilder<'a, PerPointUserData> {
-    builder: &'a mut PointCloudBuilder<PerPointUserData>,
-    range: Range<usize>,
+    vertices: &'a mut [PointCloudVertex],
+    colors_gpu: StagingWriteBeltBufferTypedView<'a, Color32>,
+    vertices_gpu: StagingWriteBeltBufferTypedView<'a, PointCloudVertex>,
+    user_data: &'a mut [PerPointUserData],
 }
 
 impl<'a, PerPointUserData> PointsBuilder<'a, PerPointUserData>
@@ -244,43 +246,37 @@ where
     /// Splats a radius to all points in this builder.
     #[inline]
     pub fn radius(self, radius: Size) -> Self {
-        for point in &mut self.builder.vertices[self.range.clone()] {
+        for point in self.vertices.iter_mut() {
             point.radius = radius;
         }
         self
     }
 
     /// Assigns radii to all points.
     ///
-    /// The slice is required to cover all points.
+    /// If the iterator doesn't cover all points, some will not be assigned.
+    /// If the iterator provides more values than there are points, the extra values will be ignored.
     #[inline]
     pub fn radii(self, radii: &[Size]) -> Self {
-        debug_assert_eq!(radii.len(), self.range.len());
-        for (point, radius) in self.builder.vertices[self.range.clone()]
-            .iter_mut()
-            .zip(radii)
-        {
+        for (point, radius) in self.vertices.iter_mut().zip(radii.iter()) {
             point.radius = *radius;
         }
         self
     }
 
     /// Splats a color to all points in this builder.
     #[inline]
-    pub fn color(self, color: Color32) -> Self {
-        for offset in self.range.clone() {
-            self.builder.colors_gpu.write_single(&color, offset);
-        }
+    pub fn color(mut self, color: Color32) -> Self {
+        self.colors_gpu.fill(&color);
         self
     }
 
     /// Assigns colors to all points.
     ///
     /// The slice is required to cover all points.
     #[inline]
-    pub fn colors(self, colors: &[Color32]) -> Self {
-        debug_assert_eq!(colors.len(), self.range.len());
-        self.builder.colors_gpu.write(colors, self.range.start);
+    pub fn colors(mut self, colors: &[Color32]) -> Self {
+        self.colors_gpu.write(colors, 0);
         self
     }
 
@@ -289,17 +285,16 @@ where
     /// User data is currently not available on the GPU.
     #[inline]
     pub fn user_data(self, data: PerPointUserData) -> Self {
-        for user_data in &mut self.builder.user_data[self.range.clone()] {
+        for user_data in self.user_data.iter_mut() {
             *user_data = data.clone();
         }
         self
     }
 }
 
 impl<'a, PerPointUserData> Drop for PointsBuilder<'a, PerPointUserData> {
+    #[inline]
     fn drop(&mut self) {
-        self.builder
-            .vertices_gpu
-            .write(&self.builder.vertices[self.range.clone()], self.range.start);
+        self.vertices_gpu.write(self.vertices, 0);
     }
 }
diff --git a/crates/re_renderer/src/renderer/point_cloud.rs b/crates/re_renderer/src/renderer/point_cloud.rs
@@ -218,7 +218,7 @@ impl PointCloudDrawData {
             depth_or_array_layers: 1,
         };
 
-        vertices_gpu.buffer.copy_to_texture(
+        vertices_gpu.copy_to_texture(
             &mut ctx.frame_global_commands,
             wgpu::ImageCopyTexture {
                 texture: &ctx
@@ -239,7 +239,7 @@ impl PointCloudDrawData {
             size,
         );
 
-        colors_gpu.buffer.copy_to_texture(
+        colors_gpu.copy_to_texture(
             &mut ctx.frame_global_commands,
             wgpu::ImageCopyTexture {
                 texture: &ctx

diff --git a/crates/re_renderer/src/staging_write_belt.rs b/crates/re_renderer/src/staging_write_belt.rs
@@ -23,12 +23,11 @@ pub struct StagingWriteBeltBuffer {
     offset_in_chunk: wgpu::BufferAddress,
 }
 
-pub struct StagingWriteBeltBufferTyped<T: bytemuck::Pod + 'static> {
-    pub buffer: StagingWriteBeltBuffer,
-    memory: &'static mut [T],
+pub struct StagingWriteBeltBufferTypedView<'a, T: bytemuck::Pod> {
+    write_only_memory: &'a mut [T],
 }
 
-impl<T> StagingWriteBeltBufferTyped<T>
+impl<'a, T> StagingWriteBeltBufferTypedView<'a, T>
 where
     T: bytemuck::Pod + 'static,
 {
@@ -40,7 +39,7 @@ where
     /// Reading would work, but it can be *insanely* slow.
     #[inline]
     pub fn write(&mut self, elements: &[T], offset_in_element_sizes: usize) {
-        self.memory[offset_in_element_sizes..(offset_in_element_sizes + elements.len())]
+        self.write_only_memory[offset_in_element_sizes..(offset_in_element_sizes + elements.len())]
             .copy_from_slice(elements);
     }
 
@@ -49,23 +48,27 @@ where
     /// (panics otherwise)
     #[inline]
     pub fn write_single(&mut self, element: &T, offset_in_element_sizes: usize) {
-        self.memory[offset_in_element_sizes] = *element;
+        self.write_only_memory[offset_in_element_sizes] = *element;
+    }
+
+    /// Overwrites all elements in the buffer with a copy of the given value.
+    pub fn fill(&mut self, element: &T) {
+        for buffer_element in self.write_only_memory.iter_mut() {
+            *buffer_element = *element;
+        }
     }
 }
 
 impl StagingWriteBeltBuffer {
     #[allow(unsafe_code)]
-    pub fn typed_view<T: bytemuck::Pod + 'static>(mut self) -> StagingWriteBeltBufferTyped<T> {
-        let view_ptr = &mut self.write_view as *mut wgpu::BufferViewMut<'static>;
-
-        // SAFETY:
-        // The memory pointer lifes as long as the view since we store the view into StagingWriteBeltBufferTyped as well.
-        let static_view = Box::leak(unsafe { Box::from_raw(view_ptr) });
-
-        let memory = bytemuck::cast_slice_mut(static_view);
-        StagingWriteBeltBufferTyped {
-            buffer: self,
-            memory,
+    #[inline]
+    pub fn typed_view<T: bytemuck::Pod + 'static>(
+        &mut self,
+        offset_in_elements: usize,
+    ) -> StagingWriteBeltBufferTypedView<'_, T> {
+        StagingWriteBeltBufferTypedView {
+            write_only_memory: &mut bytemuck::cast_slice_mut(&mut self.write_view)
+                [offset_in_elements..],
         }
     }
Benchmark suite	Current: `40013a3`	Previous: `2bff8f8`	Ratio
`datastore/batch/rects/insert`	`1682309` ns/iter (`± 17383`)	`1746658` ns/iter (`± 14377`)	`0.96`
`datastore/batch/rects/query`	`686` ns/iter (`± 2`)	`695` ns/iter (`± 0`)	`0.99`
`obj_mono_points/insert`	`876467093` ns/iter (`± 5622325`)	`886223811` ns/iter (`± 22138001`)	`0.99`
`obj_mono_points/query`	`324845` ns/iter (`± 1450`)	`344080` ns/iter (`± 5884`)	`0.94`
`obj_batch_points/insert`	`86235229` ns/iter (`± 350090`)	`85945675` ns/iter (`± 579779`)	`1.00`
`obj_batch_points/query`	`11326` ns/iter (`± 31`)	`11317` ns/iter (`± 10`)	`1.00`
`obj_batch_points_sequential/insert`	`22726142` ns/iter (`± 228780`)	`23099793` ns/iter (`± 153931`)	`0.98`
`obj_batch_points_sequential/query`	`7759` ns/iter (`± 18`)	`7813` ns/iter (`± 23`)	`0.99`
`arrow_mono_points/insert`	`242255112` ns/iter (`± 609547`)	`242678745` ns/iter (`± 1223451`)	`1.00`
`arrow_mono_points/query`	`715327` ns/iter (`± 2195`)	`721104` ns/iter (`± 2236`)	`0.99`
`arrow_batch_points/insert`	`978977` ns/iter (`± 3365`)	`966455` ns/iter (`± 2532`)	`1.01`
`arrow_batch_points/query`	`11299` ns/iter (`± 138`)	`11373` ns/iter (`± 76`)	`0.99`
`obj_batch_points_sequential/Tuid::random`	`37` ns/iter (`± 0`)	`37` ns/iter (`± 0`)	`1`