Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
0622940
[spv-out] Support for mesh shaders
cwfitzgerald Dec 12, 2025
dc69d86
Update mesh_shader.rs
inner-daemons Dec 13, 2025
dfd360f
Tried one thing
inner-daemons Dec 14, 2025
c1973fe
Updated snapshots
inner-daemons Dec 14, 2025
50c2aa9
Tried another thing
inner-daemons Dec 14, 2025
7749867
Removed per primitive stuff + cull primitive
inner-daemons Dec 14, 2025
ca3f93a
Testing new thing
inner-daemons Dec 14, 2025
4fbfc60
Ahh well I think I'm done for the night
inner-daemons Dec 14, 2025
7551f6a
Slight improvements
inner-daemons Dec 14, 2025
3225030
Fixed another comment
inner-daemons Dec 14, 2025
c34c474
Added note on feature
inner-daemons Dec 14, 2025
18164ee
Preparing for merge as is
inner-daemons Dec 14, 2025
0b9bf09
Ok I'm tired
inner-daemons Dec 14, 2025
da72786
Blah blah blah
inner-daemons Dec 14, 2025
8862d56
Updated loop logic
inner-daemons Dec 14, 2025
0286af7
Tried something else
inner-daemons Dec 14, 2025
7cc51c9
Tried another little fix
inner-daemons Dec 14, 2025
3818efe
Tried something new
inner-daemons Dec 14, 2025
ea50cb0
Told it to skip instead of expect a failure
inner-daemons Dec 14, 2025
8d9a451
Redocumented feature, made tests run on AMD
inner-daemons Dec 15, 2025
3edc37c
Removed obseleted files, updated changelog, updated shaders
inner-daemons Dec 15, 2025
6ffd2d5
Added task shader to the changelog entry
inner-daemons Dec 15, 2025
094a5ac
Enabled debugigng
inner-daemons Dec 15, 2025
6417327
Fixed typo
inner-daemons Dec 15, 2025
2e863e2
Trying with better aligned task payload stuff
inner-daemons Dec 15, 2025
05eada6
Made the tests actually run on LLVMPIPE
inner-daemons Dec 15, 2025
6dc1fd0
Testing on LLVMPIPE if removing task payload reads does anything
inner-daemons Dec 15, 2025
883a443
Undid test that didnt work
inner-daemons Dec 15, 2025
3457ed5
Tried making it write a barrier
inner-daemons Dec 15, 2025
cad1bdf
Wrote another barrier I guess
inner-daemons Dec 15, 2025
4927043
Gonna see if this one does anything
inner-daemons Dec 15, 2025
19af909
Jeez im stupid
inner-daemons Dec 15, 2025
3d7327f
Removed debugging files
inner-daemons Dec 15, 2025
c4574e7
Fixed the example shader sorta
inner-daemons Dec 15, 2025
ff46752
Blah blah blah
inner-daemons Dec 15, 2025
a5324ee
Merge remote-tracking branch 'upstream/trunk' into mesh-shading/spv-w…
inner-daemons Dec 15, 2025
c36f9f8
Final cleanup
inner-daemons Dec 15, 2025
4c29d13
Added new mesh shader tasks
inner-daemons Dec 16, 2025
c08d00a
Fixed test
inner-daemons Dec 16, 2025
7703aef
Fixed some test shenanigans
inner-daemons Dec 16, 2025
90624c8
Seeing if this breaks anything
inner-daemons Dec 16, 2025
307f908
Tried to fix one issue
inner-daemons Dec 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,71 @@ Bottom level categories:

### Major Changes

#### Added support for mesh shaders

This has been a long time coming. See [the tracking issue](https://github.com/gfx-rs/wgpu/issues/7197) for more information.
They are now fully supported on Vulkan, and supported on Metal and DX12 with passthrough shaders. WGSL parsing and rewriting
is supported, meaning they can be used through WESL or naga_oil.

Mesh shader pipelines replace standard vertex shader pipelines and allow new ways to render meshes. They form the core of
some rendering engines, including Unreal Engine's nanite. This is because they are ideal for meshlet rendering, a form
of rendering where small groups of triangles are handled together, for culling and for rendering.

The core idea is that compute-like shaders will generate primitives directly that will then be passed to the rasterizer, rather
than having a list of vertices generated individually and then using a static index buffer. This means that certain computations
on nearby groups of triangles can be done together, the relationship between vertices and primitives is more programmable, and
you can even pass non-interpolated per-primitive data to the fragment shader, independent of vertices.

Mesh shaders are very versatile, and are powerful enough to replace vertex shaders, tesselation shaders, and geometry shaders
on their own or with task shaders.

A full example of mesh shaders in use can be seen in the `mesh_shader` example. Below is a small snippet of shader code
demonstrating their usage:
```wgsl
@task
@payload(taskPayload)
@workgroup_size(1)
fn ts_main() -> @builtin(mesh_task_size) vec3<u32> {
// Task shaders can use workgroup variables like compute shaders
workgroupData = 1.0;
// Pass some data to all mesh shaders dispatched by this workgroup
taskPayload.colorMask = vec4(1.0, 1.0, 0.0, 1.0);
taskPayload.visible = 1;
// Dispatch a mesh shader grid with one workgroup
return vec3(1, 1, 1);
}

@mesh(mesh_output)
@payload(taskPayload)
@workgroup_size(1)
fn ms_main(@builtin(local_invocation_index) index: u32, @builtin(global_invocation_id) id: vec3<u32>) {
// Set how many outputs this workgroup will generate
mesh_output.vertex_count = 3;
mesh_output.primitive_count = 1;
// Can also use workgroup variables
workgroupData = 2.0;

// Set vertex outputs
mesh_output.vertices[0].position = positions[0];
mesh_output.vertices[0].color = colors[0] * taskPayload.colorMask;

mesh_output.vertices[1].position = positions[1];
mesh_output.vertices[1].color = colors[1] * taskPayload.colorMask;

mesh_output.vertices[2].position = positions[2];
mesh_output.vertices[2].color = colors[2] * taskPayload.colorMask;

// Set the vertex indices for the only primitive
mesh_output.primitives[0].indices = vec3<u32>(0, 1, 2);
// Cull it if the data passed by the task shader says to
mesh_output.primitives[0].cull = taskPayload.visible == 1;
// Give a noninterpolated per-primitive vec4 to the fragment shader
mesh_output.primitives[0].colorMask = vec4<f32>(1.0, 0.0, 1.0, 1.0);
}
```

See other changes in this changelog for more information.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Credit yourself lmao


#### Switch from `gpu-alloc` to `gpu-allocator` in the `vulkan` backend

`gpu-allocator` is the allocator used in the `dx12` backend, allowing to configure
Expand Down Expand Up @@ -251,6 +316,7 @@ By @cwfitzgerald in [#8609](https://github.com/gfx-rs/wgpu/pull/8609).
#### Vulkan

- Fixed a validation error regarding atomic memory semantics. By @atlv24 in [#8391](https://github.com/gfx-rs/wgpu/pull/8391).
- Add mesh shader writer support, allowing WGSL shaders to be used on the vulkan backend. Only works on NVIDIA and Intel GPUs. By @inner-daemons in [#8456](https://github.com/gfx-rs/wgpu/pull/8456).

#### Metal
- Fixed a variety of feature detection related bugs. By @inner-daemons in [#8439](https://github.com/gfx-rs/wgpu/pull/8439).
Expand Down
3 changes: 2 additions & 1 deletion docs/api-specs/mesh_shading.md
Original file line number Diff line number Diff line change
Expand Up @@ -241,12 +241,13 @@ struct MeshOutput {
@builtin(vertex_count) vertex_count: u32,
@builtin(primitive_count) primitive_count: u32,
}

var<workgroup> mesh_output: MeshOutput;

@mesh(mesh_output)
@payload(taskPayload)
@workgroup_size(1)
fn ms_main(@builtin(local_invocation_index) index: u32, @builtin(global_invocation_id) id: vec3<u32>) {
fn ms_main() {
mesh_output.vertex_count = 3;
mesh_output.primitive_count = 1;
workgroupData = 2.0;
Expand Down
70 changes: 33 additions & 37 deletions examples/features/src/mesh_shader/mod.rs
Original file line number Diff line number Diff line change
@@ -1,32 +1,9 @@
use std::process::Stdio;

// Same as in mesh shader tests
fn compile_glsl(device: &wgpu::Device, shader_stage: &'static str) -> wgpu::ShaderModule {
let cmd = std::process::Command::new("glslc")
.args([
&format!(
"{}/src/mesh_shader/shader.{shader_stage}",
env!("CARGO_MANIFEST_DIR")
),
"-o",
"-",
"--target-env=vulkan1.2",
"--target-spv=spv1.4",
])
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.expect("Failed to call glslc");
let output = cmd.wait_with_output().expect("Error waiting for glslc");
assert!(output.status.success());
unsafe {
device.create_shader_module_passthrough(wgpu::ShaderModuleDescriptorPassthrough {
entry_point: "main".into(),
label: None,
spirv: Some(wgpu::util::make_spirv_raw(&output.stdout)),
..Default::default()
})
}
fn compile_wgsl(device: &wgpu::Device) -> wgpu::ShaderModule {
device.create_shader_module(wgpu::ShaderModuleDescriptor {
label: None,
source: wgpu::ShaderSource::Wgsl(include_str!("shader.wgsl").into()),
})
}
fn compile_hlsl(device: &wgpu::Device, entry: &str, stage_str: &str) -> wgpu::ShaderModule {
let out_path = format!(
Expand Down Expand Up @@ -83,21 +60,30 @@ impl crate::framework::Example for Example {
device: &wgpu::Device,
_queue: &wgpu::Queue,
) -> Self {
let (ts, ms, fs) = match adapter.get_info().backend {
let (ts, ms, fs, ts_name, ms_name, fs_name) = match adapter.get_info().backend {
wgpu::Backend::Vulkan => (
compile_glsl(device, "task"),
compile_glsl(device, "mesh"),
compile_glsl(device, "frag"),
compile_wgsl(device),
compile_wgsl(device),
compile_wgsl(device),
"ts_main",
"ms_main",
"fs_main",
),
wgpu::Backend::Dx12 => (
compile_hlsl(device, "Task", "as"),
compile_hlsl(device, "Mesh", "ms"),
compile_hlsl(device, "Frag", "ps"),
"main",
"main",
"main",
),
wgpu::Backend::Metal => (
compile_msl(device, "taskShader"),
compile_msl(device, "meshShader"),
compile_msl(device, "fragShader"),
"main",
"main",
"main",
),
_ => panic!("Example can currently only run on vulkan, dx12 or metal"),
};
Expand All @@ -111,17 +97,17 @@ impl crate::framework::Example for Example {
layout: Some(&pipeline_layout),
task: Some(wgpu::TaskState {
module: &ts,
entry_point: Some("main"),
entry_point: Some(ts_name),
compilation_options: Default::default(),
}),
mesh: wgpu::MeshState {
module: &ms,
entry_point: Some("main"),
entry_point: Some(ms_name),
compilation_options: Default::default(),
},
fragment: Some(wgpu::FragmentState {
module: &fs,
entry_point: Some("main"),
entry_point: Some(fs_name),
compilation_options: Default::default(),
targets: &[Some(config.view_formats[0].into())],
}),
Expand Down Expand Up @@ -208,7 +194,17 @@ pub static TEST: crate::framework::ExampleTestParams = crate::framework::Example
wgpu::Features::EXPERIMENTAL_MESH_SHADER
| wgpu::Features::EXPERIMENTAL_PASSTHROUGH_SHADERS,
)
.limits(wgpu::Limits::defaults().using_recommended_minimum_mesh_shader_values()),
comparisons: &[wgpu_test::ComparisonType::Mean(0.01)],
.instance_flags(wgpu::InstanceFlags::advanced_debugging())
.limits(wgpu::Limits::defaults().using_recommended_minimum_mesh_shader_values())
.skip(wgpu_test::FailureCase {
backends: None,
// Skip Mesa because LLVMPIPE has what is believed to be a driver bug
vendor: Some(0x10005),
adapter: None,
driver: None,
reasons: vec![],
behavior: wgpu_test::FailureBehavior::Ignore,
}),
comparisons: &[wgpu_test::ComparisonType::Mean(0.005)],
_phantom: std::marker::PhantomData::<Example>,
};
11 changes: 0 additions & 11 deletions examples/features/src/mesh_shader/shader.frag

This file was deleted.

38 changes: 0 additions & 38 deletions examples/features/src/mesh_shader/shader.mesh

This file was deleted.

16 changes: 0 additions & 16 deletions examples/features/src/mesh_shader/shader.task

This file was deleted.

78 changes: 78 additions & 0 deletions examples/features/src/mesh_shader/shader.wgsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
enable wgpu_mesh_shader;

const positions = array(
vec4(0., 1., 0., 1.),
vec4(-1., -1., 0., 1.),
vec4(1., -1., 0., 1.)
);
const colors = array(
vec4(0., 1., 0., 1.),
vec4(0., 0., 1., 1.),
vec4(1., 0., 0., 1.)
);

struct TaskPayload {
colorMask: vec4<f32>,
visible: bool,
}
struct VertexOutput {
@builtin(position) position: vec4<f32>,
@location(0) color: vec4<f32>,
}
struct PrimitiveOutput {
@builtin(triangle_indices) indices: vec3<u32>,
@builtin(cull_primitive) cull: bool,
@per_primitive @location(1) colorMask: vec4<f32>,
}
struct PrimitiveInput {
@per_primitive @location(1) colorMask: vec4<f32>,
}

var<task_payload> taskPayload: TaskPayload;
var<workgroup> workgroupData: f32;

@task
@payload(taskPayload)
@workgroup_size(1)
fn ts_main() -> @builtin(mesh_task_size) vec3<u32> {
workgroupData = 1.0;
taskPayload.colorMask = vec4(1.0, 1.0, 0.0, 1.0);
taskPayload.visible = true;
return vec3(1, 1, 1);
}

struct MeshOutput {
@builtin(vertices) vertices: array<VertexOutput, 3>,
@builtin(primitives) primitives: array<PrimitiveOutput, 1>,
@builtin(vertex_count) vertex_count: u32,
@builtin(primitive_count) primitive_count: u32,
}

var<workgroup> mesh_output: MeshOutput;

@mesh(mesh_output)
@payload(taskPayload)
@workgroup_size(1)
fn ms_main() {
mesh_output.vertex_count = 3;
mesh_output.primitive_count = 1;
workgroupData = 2.0;

mesh_output.vertices[0].position = positions[0];
mesh_output.vertices[0].color = colors[0] * taskPayload.colorMask;

mesh_output.vertices[1].position = positions[1];
mesh_output.vertices[1].color = colors[1] * taskPayload.colorMask;

mesh_output.vertices[2].position = positions[2];
mesh_output.vertices[2].color = colors[2] * taskPayload.colorMask;

mesh_output.primitives[0].indices = vec3<u32>(0, 1, 2);
mesh_output.primitives[0].cull = !taskPayload.visible;
mesh_output.primitives[0].colorMask = vec4<f32>(1.0, 0.0, 1.0, 1.0);
}

@fragment
fn fs_main(vertex: VertexOutput, primitive: PrimitiveInput) -> @location(0) vec4<f32> {
return vertex.color * primitive.colorMask;
}
Loading