Skip to content

Commit 2cd0bd7

Browse files
committed
improve compile time by type-erasing wgpu structs (#5950)
# Objective structs containing wgpu types take a long time to compile. this is particularly bad for generics containing the wgpu structs (like the depth pipeline builder with `#[derive(SystemParam)]` i've been working on). we can avoid that by boxing and type-erasing in the bevy `render_resource` wrappers. type system magic is not a strength of mine so i guess there will be a cleaner way to achieve this, happy to take feedback or for it to be taken as a proof of concept if someone else wants to do a better job. ## Solution - add macros to box and type-erase in debug mode - leave current impl for release mode timings: <html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:x="urn:schemas-microsoft-com:office:excel" xmlns="http://www.w3.org/TR/REC-html40"> <head> <meta name=ProgId content=Excel.Sheet> <meta name=Generator content="Microsoft Excel 15"> <link id=Main-File rel=Main-File href="file:///C:/Users/robfm/AppData/Local/Temp/msohtmlclip1/01/clip.htm"> <link rel=File-List href="file:///C:/Users/robfm/AppData/Local/Temp/msohtmlclip1/01/clip_filelist.xml"> <!--table {mso-displayed-decimal-separator:"\."; mso-displayed-thousand-separator:"\,";} @page {margin:.75in .7in .75in .7in; mso-header-margin:.3in; mso-footer-margin:.3in;} tr {mso-height-source:auto;} col {mso-width-source:auto;} br {mso-data-placement:same-cell;} td {padding-top:1px; padding-right:1px; padding-left:1px; mso-ignore:padding; color:black; font-size:11.0pt; font-weight:400; font-style:normal; text-decoration:none; font-family:Calibri, sans-serif; mso-font-charset:0; mso-number-format:General; text-align:general; vertical-align:bottom; border:none; mso-background-source:auto; mso-pattern:auto; mso-protection:locked visible; white-space:nowrap; mso-rotate:0;} .xl65 {mso-number-format:0%;} .xl66 {vertical-align:middle; white-space:normal;} .xl67 {vertical-align:middle;} --> </head> <body link="#0563C1" vlink="#954F72"> current |   |   |   -- | -- | -- | --   | Total time: | 64.9s |     | bevy_pbr v0.9.0-dev | 19.2s |     | bevy_render v0.9.0-dev | 17.0s |     | bevy_sprite v0.9.0-dev | 15.1s |     | DepthPipelineBuilder | 18.7s |     |   |   |   with type-erasing |   |   | diff   | Total time: | 49.0s | -24%   | bevy_render v0.9.0-dev | 12.0s | -38%   | bevy_pbr v0.9.0-dev | 8.7s | -49%   | bevy_sprite v0.9.0-dev | 6.1s | -60%   | DepthPipelineBuilder | 1.2s | -94% </body> </html> the depth pipeline builder is a binary with body: ```rust use std::{marker::PhantomData, hash::Hash}; use bevy::{prelude::*, ecs::system::SystemParam, pbr::{RenderMaterials, MaterialPipeline, ShadowPipeline}, render::{renderer::RenderDevice, render_resource::{SpecializedMeshPipelines, PipelineCache}, render_asset::RenderAssets}}; fn main() { println!("Hello, world p!\n"); } #[derive(SystemParam)] pub struct DepthPipelineBuilder<'w, 's, M: Material> where M::Data: Eq + Hash + Clone, { render_device: Res<'w, RenderDevice>, material_pipeline: Res<'w, MaterialPipeline<M>>, material_pipelines: ResMut<'w, SpecializedMeshPipelines<MaterialPipeline<M>>>, shadow_pipeline: Res<'w, ShadowPipeline>, pipeline_cache: ResMut<'w, PipelineCache>, render_meshes: Res<'w, RenderAssets<Mesh>>, render_materials: Res<'w, RenderMaterials<M>>, msaa: Res<'w, Msaa>, #[system_param(ignore)] _p: PhantomData<&'s M>, } ```
1 parent 5972879 commit 2cd0bd7

File tree

10 files changed

+202
-47
lines changed

10 files changed

+202
-47
lines changed

crates/bevy_render/src/render_resource/bind_group.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,12 @@ use crate::{
99
texture::FallbackImage,
1010
};
1111
use bevy_reflect::Uuid;
12-
use std::{ops::Deref, sync::Arc};
12+
use std::ops::Deref;
1313
use wgpu::BindingResource;
1414

15+
use crate::render_resource::resource_macros::*;
16+
render_resource_wrapper!(ErasedBindGroup, wgpu::BindGroup);
17+
1518
/// A [`BindGroup`] identifier.
1619
#[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)]
1720
pub struct BindGroupId(Uuid);
@@ -25,7 +28,7 @@ pub struct BindGroupId(Uuid);
2528
#[derive(Clone, Debug)]
2629
pub struct BindGroup {
2730
id: BindGroupId,
28-
value: Arc<wgpu::BindGroup>,
31+
value: ErasedBindGroup,
2932
}
3033

3134
impl BindGroup {
@@ -40,7 +43,7 @@ impl From<wgpu::BindGroup> for BindGroup {
4043
fn from(value: wgpu::BindGroup) -> Self {
4144
BindGroup {
4245
id: BindGroupId(Uuid::new_v4()),
43-
value: Arc::new(value),
46+
value: ErasedBindGroup::new(value),
4447
}
4548
}
4649
}

crates/bevy_render/src/render_resource/bind_group_layout.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
1+
use crate::render_resource::resource_macros::*;
12
use bevy_reflect::Uuid;
2-
use std::{ops::Deref, sync::Arc};
3+
use std::ops::Deref;
34

45
#[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)]
56
pub struct BindGroupLayoutId(Uuid);
67

8+
render_resource_wrapper!(ErasedBindGroupLayout, wgpu::BindGroupLayout);
9+
710
#[derive(Clone, Debug)]
811
pub struct BindGroupLayout {
912
id: BindGroupLayoutId,
10-
value: Arc<wgpu::BindGroupLayout>,
13+
value: ErasedBindGroupLayout,
1114
}
1215

1316
impl PartialEq for BindGroupLayout {
@@ -32,7 +35,7 @@ impl From<wgpu::BindGroupLayout> for BindGroupLayout {
3235
fn from(value: wgpu::BindGroupLayout) -> Self {
3336
BindGroupLayout {
3437
id: BindGroupLayoutId(Uuid::new_v4()),
35-
value: Arc::new(value),
38+
value: ErasedBindGroupLayout::new(value),
3639
}
3740
}
3841
}

crates/bevy_render/src/render_resource/buffer.rs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
11
use bevy_utils::Uuid;
2-
use std::{
3-
ops::{Bound, Deref, RangeBounds},
4-
sync::Arc,
5-
};
2+
use std::ops::{Bound, Deref, RangeBounds};
3+
4+
use crate::render_resource::resource_macros::*;
65

76
#[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)]
87
pub struct BufferId(Uuid);
98

9+
render_resource_wrapper!(ErasedBuffer, wgpu::Buffer);
10+
1011
#[derive(Clone, Debug)]
1112
pub struct Buffer {
1213
id: BufferId,
13-
value: Arc<wgpu::Buffer>,
14+
value: ErasedBuffer,
1415
}
1516

1617
impl Buffer {
@@ -42,7 +43,7 @@ impl From<wgpu::Buffer> for Buffer {
4243
fn from(value: wgpu::Buffer) -> Self {
4344
Buffer {
4445
id: BufferId(Uuid::new_v4()),
45-
value: Arc::new(value),
46+
value: ErasedBuffer::new(value),
4647
}
4748
}
4849
}

crates/bevy_render/src/render_resource/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ mod buffer_vec;
55
mod pipeline;
66
mod pipeline_cache;
77
mod pipeline_specializer;
8+
pub mod resource_macros;
89
mod shader;
910
mod storage_buffer;
1011
mod texture;

crates/bevy_render/src/render_resource/pipeline.rs

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,28 @@
11
use crate::render_resource::{BindGroupLayout, Shader};
22
use bevy_asset::Handle;
33
use bevy_reflect::Uuid;
4-
use std::{borrow::Cow, ops::Deref, sync::Arc};
4+
use std::{borrow::Cow, ops::Deref};
55
use wgpu::{
66
BufferAddress, ColorTargetState, DepthStencilState, MultisampleState, PrimitiveState,
77
VertexAttribute, VertexFormat, VertexStepMode,
88
};
99

10+
use crate::render_resource::resource_macros::*;
11+
1012
/// A [`RenderPipeline`] identifier.
1113
#[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)]
1214
pub struct RenderPipelineId(Uuid);
1315

16+
render_resource_wrapper!(ErasedRenderPipeline, wgpu::RenderPipeline);
17+
1418
/// A [`RenderPipeline`] represents a graphics pipeline and its stages (shaders), bindings and vertex buffers.
1519
///
1620
/// May be converted from and dereferences to a wgpu [`RenderPipeline`](wgpu::RenderPipeline).
1721
/// Can be created via [`RenderDevice::create_render_pipeline`](crate::renderer::RenderDevice::create_render_pipeline).
1822
#[derive(Clone, Debug)]
1923
pub struct RenderPipeline {
2024
id: RenderPipelineId,
21-
value: Arc<wgpu::RenderPipeline>,
25+
value: ErasedRenderPipeline,
2226
}
2327

2428
impl RenderPipeline {
@@ -32,7 +36,7 @@ impl From<wgpu::RenderPipeline> for RenderPipeline {
3236
fn from(value: wgpu::RenderPipeline) -> Self {
3337
RenderPipeline {
3438
id: RenderPipelineId(Uuid::new_v4()),
35-
value: Arc::new(value),
39+
value: ErasedRenderPipeline::new(value),
3640
}
3741
}
3842
}
@@ -50,14 +54,16 @@ impl Deref for RenderPipeline {
5054
#[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)]
5155
pub struct ComputePipelineId(Uuid);
5256

57+
render_resource_wrapper!(ErasedComputePipeline, wgpu::ComputePipeline);
58+
5359
/// A [`ComputePipeline`] represents a compute pipeline and its single shader stage.
5460
///
5561
/// May be converted from and dereferences to a wgpu [`ComputePipeline`](wgpu::ComputePipeline).
5662
/// Can be created via [`RenderDevice::create_compute_pipeline`](crate::renderer::RenderDevice::create_compute_pipeline).
5763
#[derive(Clone, Debug)]
5864
pub struct ComputePipeline {
5965
id: ComputePipelineId,
60-
value: Arc<wgpu::ComputePipeline>,
66+
value: ErasedComputePipeline,
6167
}
6268

6369
impl ComputePipeline {
@@ -72,7 +78,7 @@ impl From<wgpu::ComputePipeline> for ComputePipeline {
7278
fn from(value: wgpu::ComputePipeline) -> Self {
7379
ComputePipeline {
7480
id: ComputePipelineId(Uuid::new_v4()),
75-
value: Arc::new(value),
81+
value: ErasedComputePipeline::new(value),
7682
}
7783
}
7884
}

crates/bevy_render/src/render_resource/pipeline_cache.rs

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,17 @@ use bevy_utils::{
1717
tracing::{debug, error},
1818
Entry, HashMap, HashSet,
1919
};
20-
use std::{hash::Hash, iter::FusedIterator, mem, ops::Deref, sync::Arc};
20+
use std::{hash::Hash, iter::FusedIterator, mem, ops::Deref};
2121
use thiserror::Error;
2222
use wgpu::{
23-
BufferBindingType, PipelineLayoutDescriptor, ShaderModule,
24-
VertexBufferLayout as RawVertexBufferLayout,
23+
BufferBindingType, PipelineLayoutDescriptor, VertexBufferLayout as RawVertexBufferLayout,
2524
};
2625

26+
use crate::render_resource::resource_macros::*;
27+
28+
render_resource_wrapper!(ErasedShaderModule, wgpu::ShaderModule);
29+
render_resource_wrapper!(ErasedPipelineLayout, wgpu::PipelineLayout);
30+
2731
/// A descriptor for a [`Pipeline`].
2832
///
2933
/// Used to store an heterogenous collection of render and compute pipeline descriptors together.
@@ -103,7 +107,7 @@ impl CachedPipelineState {
103107
#[derive(Default)]
104108
struct ShaderData {
105109
pipelines: HashSet<CachedPipelineId>,
106-
processed_shaders: HashMap<Vec<String>, Arc<ShaderModule>>,
110+
processed_shaders: HashMap<Vec<String>, ErasedShaderModule>,
107111
resolved_imports: HashMap<ShaderImport, Handle<Shader>>,
108112
dependents: HashSet<Handle<Shader>>,
109113
}
@@ -124,7 +128,7 @@ impl ShaderCache {
124128
pipeline: CachedPipelineId,
125129
handle: &Handle<Shader>,
126130
shader_defs: &[String],
127-
) -> Result<Arc<ShaderModule>, PipelineCacheError> {
131+
) -> Result<ErasedShaderModule, PipelineCacheError> {
128132
let shader = self
129133
.shaders
130134
.get(handle)
@@ -204,7 +208,7 @@ impl ShaderCache {
204208
return Err(PipelineCacheError::CreateShaderModule(description));
205209
}
206210

207-
entry.insert(Arc::new(shader_module))
211+
entry.insert(ErasedShaderModule::new(shader_module))
208212
}
209213
};
210214

@@ -276,7 +280,7 @@ impl ShaderCache {
276280

277281
#[derive(Default)]
278282
struct LayoutCache {
279-
layouts: HashMap<Vec<BindGroupLayoutId>, wgpu::PipelineLayout>,
283+
layouts: HashMap<Vec<BindGroupLayoutId>, ErasedPipelineLayout>,
280284
}
281285

282286
impl LayoutCache {
@@ -291,10 +295,12 @@ impl LayoutCache {
291295
.iter()
292296
.map(|l| l.value())
293297
.collect::<Vec<_>>();
294-
render_device.create_pipeline_layout(&PipelineLayoutDescriptor {
295-
bind_group_layouts: &bind_group_layouts,
296-
..default()
297-
})
298+
ErasedPipelineLayout::new(render_device.create_pipeline_layout(
299+
&PipelineLayoutDescriptor {
300+
bind_group_layouts: &bind_group_layouts,
301+
..default()
302+
},
303+
))
298304
})
299305
}
300306
}
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
// structs containing wgpu types take a long time to compile. this is particularly bad for generic
2+
// structs containing wgpu structs. we avoid that in debug builds (and for cargo check and rust analyzer)
3+
// by boxing and type-erasing with the `render_resource_wrapper` macro.
4+
// analysis from https://github.com/bevyengine/bevy/pull/5950#issuecomment-1243473071 indicates this is
5+
// due to `evaluate_obligations`. we should check if this can be removed after a fix lands for
6+
// https://github.com/rust-lang/rust/issues/99188 (and after other `evaluate_obligations`-related changes).
7+
#[cfg(debug_assertions)]
8+
#[macro_export]
9+
macro_rules! render_resource_wrapper {
10+
($wrapper_type:ident, $wgpu_type:ty) => {
11+
#[derive(Clone, Debug)]
12+
pub struct $wrapper_type(Option<std::sync::Arc<Box<()>>>);
13+
14+
impl $wrapper_type {
15+
pub fn new(value: $wgpu_type) -> Self {
16+
unsafe {
17+
Self(Some(std::sync::Arc::new(std::mem::transmute(Box::new(
18+
value,
19+
)))))
20+
}
21+
}
22+
23+
pub fn try_unwrap(mut self) -> Option<$wgpu_type> {
24+
let inner = self.0.take();
25+
if let Some(inner) = inner {
26+
match std::sync::Arc::try_unwrap(inner) {
27+
Ok(untyped_box) => {
28+
let typed_box = unsafe {
29+
std::mem::transmute::<Box<()>, Box<$wgpu_type>>(untyped_box)
30+
};
31+
Some(*typed_box)
32+
}
33+
Err(inner) => {
34+
let _ = unsafe {
35+
std::mem::transmute::<
36+
std::sync::Arc<Box<()>>,
37+
std::sync::Arc<Box<$wgpu_type>>,
38+
>(inner)
39+
};
40+
None
41+
}
42+
}
43+
} else {
44+
None
45+
}
46+
}
47+
}
48+
49+
impl std::ops::Deref for $wrapper_type {
50+
type Target = $wgpu_type;
51+
52+
fn deref(&self) -> &Self::Target {
53+
let untyped_box = self
54+
.0
55+
.as_ref()
56+
.expect("render_resource_wrapper inner value has already been taken (via drop or try_unwrap")
57+
.as_ref();
58+
59+
let typed_box =
60+
unsafe { std::mem::transmute::<&Box<()>, &Box<$wgpu_type>>(untyped_box) };
61+
typed_box.as_ref()
62+
}
63+
}
64+
65+
impl Drop for $wrapper_type {
66+
fn drop(&mut self) {
67+
let inner = self.0.take();
68+
if let Some(inner) = inner {
69+
let _ = unsafe {
70+
std::mem::transmute::<
71+
std::sync::Arc<Box<()>>,
72+
std::sync::Arc<Box<$wgpu_type>>,
73+
>(inner)
74+
};
75+
}
76+
}
77+
}
78+
79+
// Arc<Box<()>> and Arc<()> will be Sync and Send even when $wgpu_type is not Sync or Send.
80+
// We ensure correctness by checking that $wgpu_type does implement Send and Sync.
81+
// If in future there is a case where a wrapper is required for a non-send/sync type
82+
// we can implement a macro variant that also does `impl !Send for $wrapper_type {}` and
83+
// `impl !Sync for $wrapper_type {}`
84+
const _: () = {
85+
trait AssertSendSyncBound: Send + Sync {}
86+
impl AssertSendSyncBound for $wgpu_type {}
87+
};
88+
};
89+
}
90+
91+
#[cfg(not(debug_assertions))]
92+
#[macro_export]
93+
macro_rules! render_resource_wrapper {
94+
($wrapper_type:ident, $wgpu_type:ty) => {
95+
#[derive(Clone, Debug)]
96+
pub struct $wrapper_type(std::sync::Arc<$wgpu_type>);
97+
98+
impl $wrapper_type {
99+
pub fn new(value: $wgpu_type) -> Self {
100+
Self(std::sync::Arc::new(value))
101+
}
102+
103+
pub fn try_unwrap(self) -> Option<$wgpu_type> {
104+
std::sync::Arc::try_unwrap(self.0).ok()
105+
}
106+
}
107+
108+
impl std::ops::Deref for $wrapper_type {
109+
type Target = $wgpu_type;
110+
111+
fn deref(&self) -> &Self::Target {
112+
self.0.as_ref()
113+
}
114+
}
115+
116+
const _: () = {
117+
trait AssertSendSyncBound: Send + Sync {}
118+
impl AssertSendSyncBound for $wgpu_type {}
119+
};
120+
};
121+
}
122+
123+
pub use render_resource_wrapper;

0 commit comments

Comments
 (0)