Skip to content

Commit 8548770

Browse files
committed
Sprite Batching (#3060)
This implements the following: * **Sprite Batching**: Collects sprites in a vertex buffer to draw many sprites with a single draw call. Sprites are batched by their `Handle<Image>` within a specific z-level. When possible, sprites are opportunistically batched _across_ z-levels (when no sprites with a different texture exist between two sprites with the same texture on different z levels). With these changes, I can now get ~130,000 sprites at 60fps on the `bevymark_pipelined` example. * **Sprite Color Tints**: The `Sprite` type now has a `color` field. Non-white color tints result in a specialized render pipeline that passes the color in as a vertex attribute. I chose to specialize this because passing vertex colors has a measurable price (without colors I get ~130,000 sprites on bevymark, with colors I get ~100,000 sprites). "Colored" sprites cannot be batched with "uncolored" sprites, but I think this is fine because the chance of a "colored" sprite needing to batch with other "colored" sprites is generally probably way higher than an "uncolored" sprite needing to batch with a "colored" sprite. * **Sprite Flipping**: Sprites can be flipped on their x or y axis using `Sprite::flip_x` and `Sprite::flip_y`. This is also true for `TextureAtlasSprite`. * **Simpler BufferVec/UniformVec/DynamicUniformVec Clearing**: improved the clearing interface by removing the need to know the size of the final buffer at the initial clear. ![image](https://user-images.githubusercontent.com/2694663/140001821-99be0d96-025d-489e-9bfa-ba19c1dc9548.png) Note that this moves sprites away from entity-driven rendering and back to extracted lists. We _could_ use entities here, but it necessitates that an intermediate list is allocated / populated to collect and sort extracted sprites. This redundant copy, combined with the normal overhead of spawning extracted sprite entities, brings bevymark down to ~80,000 sprites at 60fps. I think making sprites a bit more fixed (by default) is worth it. I view this as acceptable because batching makes normal entity-driven rendering pretty useless anyway (and we would want to batch most custom materials too). We can still support custom shaders with custom bindings, we'll just need to define a specific interface for it.
1 parent 2f22f5c commit 8548770

File tree

13 files changed

+384
-236
lines changed

13 files changed

+384
-236
lines changed

examples/tools/bevymark_pipelined.rs

+16-5
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@ use bevy::{
44
ecs::prelude::*,
55
input::Input,
66
math::Vec3,
7-
prelude::{App, AssetServer, Handle, MouseButton, Transform},
7+
prelude::{info, App, AssetServer, Handle, MouseButton, Transform},
88
render2::{camera::OrthographicCameraBundle, color::Color, texture::Image},
9-
sprite2::PipelinedSpriteBundle,
9+
sprite2::{PipelinedSpriteBundle, Sprite},
1010
window::WindowDescriptor,
1111
PipelinedDefaultPlugins,
1212
};
13-
use rand::Rng;
13+
use rand::{random, Rng};
1414

1515
const BIRDS_PER_SECOND: u32 = 10000;
1616
const _BASE_COLOR: Color = Color::rgb(5.0, 5.0, 5.0);
@@ -21,6 +21,7 @@ const HALF_BIRD_SIZE: f32 = 256. * BIRD_SCALE * 0.5;
2121

2222
struct BevyCounter {
2323
pub count: u128,
24+
pub color: Color,
2425
}
2526

2627
struct Bird {
@@ -52,7 +53,10 @@ fn main() {
5253
.add_plugin(FrameTimeDiagnosticsPlugin::default())
5354
.add_plugin(LogDiagnosticsPlugin::default())
5455
// .add_plugin(WgpuResourceDiagnosticsPlugin::default())
55-
.insert_resource(BevyCounter { count: 0 })
56+
.insert_resource(BevyCounter {
57+
count: 0,
58+
color: Color::WHITE,
59+
})
5660
// .init_resource::<BirdMaterial>()
5761
.add_startup_system(setup)
5862
.add_system(mouse_handler)
@@ -161,6 +165,9 @@ fn mouse_handler(
161165
// texture: Some(texture_handle),
162166
// });
163167
// }
168+
if mouse_button_input.just_released(MouseButton::Left) {
169+
counter.color = Color::rgb(random(), random(), random());
170+
}
164171

165172
if mouse_button_input.pressed(MouseButton::Left) {
166173
let spawn_count = (BIRDS_PER_SECOND as f64 * time.delta_seconds_f64()) as u128;
@@ -194,6 +201,10 @@ fn spawn_birds(
194201
scale: Vec3::splat(BIRD_SCALE),
195202
..Default::default()
196203
},
204+
sprite: Sprite {
205+
color: counter.color,
206+
..Default::default()
207+
},
197208
..Default::default()
198209
})
199210
.insert(Bird {
@@ -255,7 +266,7 @@ fn counter_system(
255266
counter: Res<BevyCounter>,
256267
) {
257268
if timer.timer.tick(time.delta()).finished() {
258-
println!("counter: {}", counter.count);
269+
info!("counter: {}", counter.count);
259270
}
260271
}
261272

pipelined/bevy_core_pipeline/src/lib.rs

+4-5
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ pub use main_pass_3d::*;
77
pub use main_pass_driver::*;
88

99
use bevy_app::{App, Plugin};
10-
use bevy_asset::Handle;
1110
use bevy_core::FloatOrd;
1211
use bevy_ecs::{
1312
prelude::*,
@@ -23,7 +22,7 @@ use bevy_render2::{
2322
},
2423
render_resource::*,
2524
renderer::RenderDevice,
26-
texture::{Image, TextureCache},
25+
texture::TextureCache,
2726
view::{ExtractedView, Msaa, ViewDepthTexture},
2827
RenderApp, RenderStage, RenderWorld,
2928
};
@@ -131,18 +130,18 @@ impl Plugin for CorePipelinePlugin {
131130
}
132131

133132
pub struct Transparent2d {
134-
pub sort_key: Handle<Image>,
133+
pub sort_key: FloatOrd,
135134
pub entity: Entity,
136135
pub pipeline: CachedPipelineId,
137136
pub draw_function: DrawFunctionId,
138137
}
139138

140139
impl PhaseItem for Transparent2d {
141-
type SortKey = Handle<Image>;
140+
type SortKey = FloatOrd;
142141

143142
#[inline]
144143
fn sort_key(&self) -> Self::SortKey {
145-
self.sort_key.clone_weak()
144+
self.sort_key
146145
}
147146

148147
#[inline]

pipelined/bevy_pbr2/src/render/light.rs

+4-5
Original file line numberDiff line numberDiff line change
@@ -383,10 +383,7 @@ pub fn prepare_lights(
383383
point_lights: Query<&ExtractedPointLight>,
384384
directional_lights: Query<&ExtractedDirectionalLight>,
385385
) {
386-
// PERF: view.iter().count() could be views.iter().len() if we implemented ExactSizeIterator for archetype-only filters
387-
light_meta
388-
.view_gpu_lights
389-
.reserve_and_clear(views.iter().count(), &render_device);
386+
light_meta.view_gpu_lights.clear();
390387

391388
let ambient_color = ambient_light.color.as_rgba_linear() * ambient_light.brightness;
392389
// set up light data for each view
@@ -605,7 +602,9 @@ pub fn prepare_lights(
605602
});
606603
}
607604

608-
light_meta.view_gpu_lights.write_buffer(&render_queue);
605+
light_meta
606+
.view_gpu_lights
607+
.write_buffer(&render_device, &render_queue);
609608
}
610609

611610
pub fn queue_shadow_view_bind_group(

pipelined/bevy_render2/src/color/colorspace.rs

+4
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ pub trait SrgbColorSpace {
55

66
// source: https://entropymine.com/imageworsener/srgbformula/
77
impl SrgbColorSpace for f32 {
8+
#[inline]
89
fn linear_to_nonlinear_srgb(self) -> f32 {
910
if self <= 0.0 {
1011
return self;
@@ -17,6 +18,7 @@ impl SrgbColorSpace for f32 {
1718
}
1819
}
1920

21+
#[inline]
2022
fn nonlinear_to_linear_srgb(self) -> f32 {
2123
if self <= 0.0 {
2224
return self;
@@ -32,6 +34,7 @@ impl SrgbColorSpace for f32 {
3234
pub struct HslRepresentation;
3335
impl HslRepresentation {
3436
/// converts a color in HLS space to sRGB space
37+
#[inline]
3538
pub fn hsl_to_nonlinear_srgb(hue: f32, saturation: f32, lightness: f32) -> [f32; 3] {
3639
// https://en.wikipedia.org/wiki/HSL_and_HSV#HSL_to_RGB
3740
let chroma = (1.0 - (2.0 * lightness - 1.0).abs()) * saturation;
@@ -60,6 +63,7 @@ impl HslRepresentation {
6063
}
6164

6265
/// converts a color in sRGB space to HLS space
66+
#[inline]
6367
pub fn nonlinear_srgb_to_hsl([red, green, blue]: [f32; 3]) -> (f32, f32, f32) {
6468
// https://en.wikipedia.org/wiki/HSL_and_HSV#From_RGB
6569
let x_max = red.max(green.max(blue));

pipelined/bevy_render2/src/color/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,7 @@ impl Color {
416416
}
417417

418418
/// Converts a `Color` to a `[f32; 4]` from linear RBG colorspace
419+
#[inline]
419420
pub fn as_linear_rgba_f32(self: Color) -> [f32; 4] {
420421
match self {
421422
Color::Rgba {

pipelined/bevy_render2/src/render_component.rs

+4-5
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,7 @@ fn prepare_uniform_components<C: Component>(
9292
) where
9393
C: AsStd140 + Clone,
9494
{
95-
let len = components.iter().len();
96-
component_uniforms
97-
.uniforms
98-
.reserve_and_clear(len, &render_device);
95+
component_uniforms.uniforms.clear();
9996
for (entity, component) in components.iter() {
10097
commands
10198
.get_or_spawn(entity)
@@ -105,7 +102,9 @@ fn prepare_uniform_components<C: Component>(
105102
});
106103
}
107104

108-
component_uniforms.uniforms.write_buffer(&render_queue);
105+
component_uniforms
106+
.uniforms
107+
.write_buffer(&render_device, &render_queue);
109108
}
110109

111110
pub struct ExtractComponentPlugin<C, F = ()>(PhantomData<fn() -> (C, F)>);

pipelined/bevy_render2/src/render_resource/buffer_vec.rs

+18-16
Original file line numberDiff line numberDiff line change
@@ -43,17 +43,20 @@ impl<T: Pod> BufferVec<T> {
4343
self.capacity
4444
}
4545

46+
#[inline]
47+
pub fn len(&self) -> usize {
48+
self.values.len()
49+
}
50+
51+
#[inline]
52+
pub fn is_empty(&self) -> bool {
53+
self.values.is_empty()
54+
}
55+
4656
pub fn push(&mut self, value: T) -> usize {
47-
let len = self.values.len();
48-
if len < self.capacity {
49-
self.values.push(value);
50-
len
51-
} else {
52-
panic!(
53-
"Cannot push value because capacity of {} has been reached",
54-
self.capacity
55-
);
56-
}
57+
let index = self.values.len();
58+
self.values.push(value);
59+
index
5760
}
5861

5962
pub fn reserve(&mut self, capacity: usize, device: &RenderDevice) {
@@ -69,12 +72,11 @@ impl<T: Pod> BufferVec<T> {
6972
}
7073
}
7174

72-
pub fn reserve_and_clear(&mut self, capacity: usize, device: &RenderDevice) {
73-
self.clear();
74-
self.reserve(capacity, device);
75-
}
76-
77-
pub fn write_buffer(&mut self, queue: &RenderQueue) {
75+
pub fn write_buffer(&mut self, device: &RenderDevice, queue: &RenderQueue) {
76+
if self.values.is_empty() {
77+
return;
78+
}
79+
self.reserve(self.values.len(), device);
7880
if let Some(buffer) = &self.buffer {
7981
let range = 0..self.item_size * self.values.len();
8082
let bytes: &[u8] = cast_slice(&self.values);

pipelined/bevy_render2/src/render_resource/uniform_vec.rs

+14-24
Original file line numberDiff line numberDiff line change
@@ -58,19 +58,12 @@ impl<T: AsStd140> UniformVec<T> {
5858
}
5959

6060
pub fn push(&mut self, value: T) -> usize {
61-
let len = self.values.len();
62-
if len < self.capacity {
63-
self.values.push(value);
64-
len
65-
} else {
66-
panic!(
67-
"Cannot push value because capacity of {} has been reached",
68-
self.capacity
69-
);
70-
}
61+
let index = self.values.len();
62+
self.values.push(value);
63+
index
7164
}
7265

73-
pub fn reserve(&mut self, capacity: usize, device: &RenderDevice) {
66+
pub fn reserve(&mut self, capacity: usize, device: &RenderDevice) -> bool {
7467
if capacity > self.capacity {
7568
self.capacity = capacity;
7669
let size = self.item_size * capacity;
@@ -81,15 +74,17 @@ impl<T: AsStd140> UniformVec<T> {
8174
usage: BufferUsages::COPY_DST | BufferUsages::UNIFORM,
8275
mapped_at_creation: false,
8376
}));
77+
true
78+
} else {
79+
false
8480
}
8581
}
8682

87-
pub fn reserve_and_clear(&mut self, capacity: usize, device: &RenderDevice) {
88-
self.clear();
89-
self.reserve(capacity, device);
90-
}
91-
92-
pub fn write_buffer(&mut self, queue: &RenderQueue) {
83+
pub fn write_buffer(&mut self, device: &RenderDevice, queue: &RenderQueue) {
84+
if self.values.is_empty() {
85+
return;
86+
}
87+
self.reserve(self.values.len(), device);
9388
if let Some(uniform_buffer) = &self.uniform_buffer {
9489
let range = 0..self.item_size * self.values.len();
9590
let mut writer = std140::Writer::new(&mut self.scratch[range.clone()]);
@@ -152,13 +147,8 @@ impl<T: AsStd140> DynamicUniformVec<T> {
152147
}
153148

154149
#[inline]
155-
pub fn reserve_and_clear(&mut self, capacity: usize, device: &RenderDevice) {
156-
self.uniform_vec.reserve_and_clear(capacity, device);
157-
}
158-
159-
#[inline]
160-
pub fn write_buffer(&mut self, queue: &RenderQueue) {
161-
self.uniform_vec.write_buffer(queue);
150+
pub fn write_buffer(&mut self, device: &RenderDevice, queue: &RenderQueue) {
151+
self.uniform_vec.write_buffer(device, queue);
162152
}
163153

164154
#[inline]

pipelined/bevy_render2/src/view/mod.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,9 @@ fn prepare_view_uniforms(
9090
render_device: Res<RenderDevice>,
9191
render_queue: Res<RenderQueue>,
9292
mut view_uniforms: ResMut<ViewUniforms>,
93-
mut views: Query<(Entity, &ExtractedView)>,
93+
views: Query<(Entity, &ExtractedView)>,
9494
) {
95-
view_uniforms
96-
.uniforms
97-
.reserve_and_clear(views.iter_mut().len(), &render_device);
95+
view_uniforms.uniforms.clear();
9896
for (entity, camera) in views.iter() {
9997
let projection = camera.projection;
10098
let view_uniforms = ViewUniformOffset {
@@ -108,7 +106,9 @@ fn prepare_view_uniforms(
108106
commands.entity(entity).insert(view_uniforms);
109107
}
110108

111-
view_uniforms.uniforms.write_buffer(&render_queue);
109+
view_uniforms
110+
.uniforms
111+
.write_buffer(&render_device, &render_queue);
112112
}
113113

114114
fn prepare_view_targets(

pipelined/bevy_sprite2/src/lib.rs

+7-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,11 @@ use bevy_app::prelude::*;
1818
use bevy_asset::{AddAsset, Assets, HandleUntyped};
1919
use bevy_core_pipeline::Transparent2d;
2020
use bevy_reflect::TypeUuid;
21-
use bevy_render2::{render_phase::DrawFunctions, render_resource::Shader, RenderApp, RenderStage};
21+
use bevy_render2::{
22+
render_phase::DrawFunctions,
23+
render_resource::{Shader, SpecializedPipelines},
24+
RenderApp, RenderStage,
25+
};
2226

2327
#[derive(Default)]
2428
pub struct SpritePlugin;
@@ -36,8 +40,9 @@ impl Plugin for SpritePlugin {
3640
render_app
3741
.init_resource::<ImageBindGroups>()
3842
.init_resource::<SpritePipeline>()
43+
.init_resource::<SpecializedPipelines<SpritePipeline>>()
3944
.init_resource::<SpriteMeta>()
40-
.add_system_to_stage(RenderStage::Extract, render::extract_atlases)
45+
.init_resource::<ExtractedSprites>()
4146
.add_system_to_stage(RenderStage::Extract, render::extract_sprites)
4247
.add_system_to_stage(RenderStage::Prepare, render::prepare_sprites)
4348
.add_system_to_stage(RenderStage::Queue, queue_sprites);

0 commit comments

Comments
 (0)