Skip to content

Commit

Permalink
invalidate the device when we encounter driver-induced device loss or…
Browse files Browse the repository at this point in the history
… on unexpected errors
  • Loading branch information
teoxoy committed Sep 9, 2024
1 parent ce6a46e commit eb47449
Show file tree
Hide file tree
Showing 14 changed files with 176 additions and 186 deletions.
4 changes: 2 additions & 2 deletions wgpu-core/src/command/clear.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ impl Global {

// actual hal barrier & operation
let dst_barrier = dst_pending.map(|pending| pending.into_hal(&dst_buffer, &snatch_guard));
let cmd_buf_raw = cmd_buf_data.encoder.open()?;
let cmd_buf_raw = cmd_buf_data.encoder.open(&cmd_buf.device)?;
unsafe {
cmd_buf_raw.transition_buffers(dst_barrier.as_slice());
cmd_buf_raw.clear_buffer(dst_raw, offset..end_offset);
Expand Down Expand Up @@ -249,7 +249,7 @@ impl Global {

let device = &cmd_buf.device;
device.check_is_valid()?;
let (encoder, tracker) = cmd_buf_data.open_encoder_and_tracker()?;
let (encoder, tracker) = cmd_buf_data.open_encoder_and_tracker(&cmd_buf.device)?;

let snatch_guard = device.snatchable_lock.read();
clear_texture(
Expand Down
12 changes: 7 additions & 5 deletions wgpu-core/src/command/compute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -433,10 +433,10 @@ impl Global {
// We automatically keep extending command buffers over time, and because
// we want to insert a command buffer _before_ what we're about to record,
// we need to make sure to close the previous one.
encoder.close().map_pass_err(pass_scope)?;
encoder.close(&cmd_buf.device).map_pass_err(pass_scope)?;
// will be reset to true if recording is done without errors
*status = CommandEncoderStatus::Error;
let raw_encoder = encoder.open().map_pass_err(pass_scope)?;
let raw_encoder = encoder.open(&cmd_buf.device).map_pass_err(pass_scope)?;

let mut state = State {
binder: Binder::new(),
Expand Down Expand Up @@ -617,12 +617,12 @@ impl Global {
} = state;

// Stop the current command buffer.
encoder.close().map_pass_err(pass_scope)?;
encoder.close(&cmd_buf.device).map_pass_err(pass_scope)?;

// Create a new command buffer, which we will insert _before_ the body of the compute pass.
//
// Use that buffer to insert barriers and clear discarded images.
let transit = encoder.open().map_pass_err(pass_scope)?;
let transit = encoder.open(&cmd_buf.device).map_pass_err(pass_scope)?;
fixup_discarded_surfaces(
pending_discard_init_fixups.into_iter(),
transit,
Expand All @@ -637,7 +637,9 @@ impl Global {
&snatch_guard,
);
// Close the command buffer, and swap it with the previous.
encoder.close_and_swap().map_pass_err(pass_scope)?;
encoder
.close_and_swap(&cmd_buf.device)
.map_pass_err(pass_scope)?;

Ok(())
}
Expand Down
32 changes: 19 additions & 13 deletions wgpu-core/src/command/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,10 +172,10 @@ impl CommandEncoder {
/// [l]: CommandEncoder::list
/// [`transition_buffers`]: hal::CommandEncoder::transition_buffers
/// [`transition_textures`]: hal::CommandEncoder::transition_textures
fn close_and_swap(&mut self) -> Result<(), DeviceError> {
fn close_and_swap(&mut self, device: &Device) -> Result<(), DeviceError> {
if self.is_open {
self.is_open = false;
let new = unsafe { self.raw.end_encoding()? };
let new = unsafe { self.raw.end_encoding() }.map_err(|e| device.handle_hal_error(e))?;
self.list.insert(self.list.len() - 1, new);
}

Expand All @@ -192,10 +192,11 @@ impl CommandEncoder {
/// On return, the underlying hal encoder is closed.
///
/// [l]: CommandEncoder::list
fn close(&mut self) -> Result<(), DeviceError> {
fn close(&mut self, device: &Device) -> Result<(), DeviceError> {
if self.is_open {
self.is_open = false;
let cmd_buf = unsafe { self.raw.end_encoding()? };
let cmd_buf =
unsafe { self.raw.end_encoding() }.map_err(|e| device.handle_hal_error(e))?;
self.list.push(cmd_buf);
}

Expand All @@ -215,11 +216,15 @@ impl CommandEncoder {
/// Begin recording a new command buffer, if we haven't already.
///
/// The underlying hal encoder is put in the "recording" state.
pub(crate) fn open(&mut self) -> Result<&mut dyn hal::DynCommandEncoder, DeviceError> {
pub(crate) fn open(
&mut self,
device: &Device,
) -> Result<&mut dyn hal::DynCommandEncoder, DeviceError> {
if !self.is_open {
self.is_open = true;
let hal_label = self.hal_label.as_deref();
unsafe { self.raw.begin_encoding(hal_label)? };
unsafe { self.raw.begin_encoding(hal_label) }
.map_err(|e| device.handle_hal_error(e))?;
}

Ok(self.raw.as_mut())
Expand All @@ -229,9 +234,9 @@ impl CommandEncoder {
/// its own label.
///
/// The underlying hal encoder is put in the "recording" state.
fn open_pass(&mut self, hal_label: Option<&str>) -> Result<(), DeviceError> {
fn open_pass(&mut self, hal_label: Option<&str>, device: &Device) -> Result<(), DeviceError> {
self.is_open = true;
unsafe { self.raw.begin_encoding(hal_label)? };
unsafe { self.raw.begin_encoding(hal_label) }.map_err(|e| device.handle_hal_error(e))?;

Ok(())
}
Expand Down Expand Up @@ -276,8 +281,9 @@ pub struct CommandBufferMutable {
impl CommandBufferMutable {
pub(crate) fn open_encoder_and_tracker(
&mut self,
device: &Device,
) -> Result<(&mut dyn hal::DynCommandEncoder, &mut Tracker), DeviceError> {
let encoder = self.encoder.open()?;
let encoder = self.encoder.open(device)?;
let tracker = &mut self.trackers;

Ok((encoder, tracker))
Expand Down Expand Up @@ -621,7 +627,7 @@ impl Global {
let cmd_buf_data = cmd_buf_data.as_mut().unwrap();
match cmd_buf_data.status {
CommandEncoderStatus::Recording => {
if let Err(e) = cmd_buf_data.encoder.close() {
if let Err(e) = cmd_buf_data.encoder.close(&cmd_buf.device) {
Some(e.into())
} else {
cmd_buf_data.status = CommandEncoderStatus::Finished;
Expand Down Expand Up @@ -671,7 +677,7 @@ impl Global {
list.push(TraceCommand::PushDebugGroup(label.to_string()));
}

let cmd_buf_raw = cmd_buf_data.encoder.open()?;
let cmd_buf_raw = cmd_buf_data.encoder.open(&cmd_buf.device)?;
if !self
.instance
.flags
Expand Down Expand Up @@ -713,7 +719,7 @@ impl Global {
.flags
.contains(wgt::InstanceFlags::DISCARD_HAL_LABELS)
{
let cmd_buf_raw = cmd_buf_data.encoder.open()?;
let cmd_buf_raw = cmd_buf_data.encoder.open(&cmd_buf.device)?;
unsafe {
cmd_buf_raw.insert_debug_marker(label);
}
Expand Down Expand Up @@ -744,7 +750,7 @@ impl Global {
list.push(TraceCommand::PopDebugGroup);
}

let cmd_buf_raw = cmd_buf_data.encoder.open()?;
let cmd_buf_raw = cmd_buf_data.encoder.open(&cmd_buf.device)?;
if !self
.instance
.flags
Expand Down
4 changes: 2 additions & 2 deletions wgpu-core/src/command/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ impl Global {
let encoder = &mut cmd_buf_data.encoder;
let tracker = &mut cmd_buf_data.trackers;

let raw_encoder = encoder.open()?;
let raw_encoder = encoder.open(&cmd_buf.device)?;

let query_set = hub
.query_sets
Expand Down Expand Up @@ -397,7 +397,7 @@ impl Global {
let encoder = &mut cmd_buf_data.encoder;
let tracker = &mut cmd_buf_data.trackers;
let buffer_memory_init_actions = &mut cmd_buf_data.buffer_memory_init_actions;
let raw_encoder = encoder.open()?;
let raw_encoder = encoder.open(&cmd_buf.device)?;

if destination_offset % wgt::QUERY_RESOLVE_BUFFER_ALIGNMENT != 0 {
return Err(QueryError::Resolve(ResolveError::BufferOffsetAlignment));
Expand Down
14 changes: 9 additions & 5 deletions wgpu-core/src/command/render.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1588,10 +1588,12 @@ impl Global {
// We automatically keep extending command buffers over time, and because
// we want to insert a command buffer _before_ what we're about to record,
// we need to make sure to close the previous one.
encoder.close().map_pass_err(pass_scope)?;
encoder.close(&cmd_buf.device).map_pass_err(pass_scope)?;
// We will reset this to `Recording` if we succeed, acts as a fail-safe.
*status = CommandEncoderStatus::Error;
encoder.open_pass(hal_label).map_pass_err(pass_scope)?;
encoder
.open_pass(hal_label, &cmd_buf.device)
.map_pass_err(pass_scope)?;

let info = RenderPassInfo::start(
device,
Expand Down Expand Up @@ -1894,7 +1896,7 @@ impl Global {
.finish(state.raw_encoder, state.snatch_guard)
.map_pass_err(pass_scope)?;

encoder.close().map_pass_err(pass_scope)?;
encoder.close(&cmd_buf.device).map_pass_err(pass_scope)?;
(trackers, pending_discard_init_fixups)
};

Expand All @@ -1906,7 +1908,7 @@ impl Global {
let tracker = &mut cmd_buf_data.trackers;

{
let transit = encoder.open().map_pass_err(pass_scope)?;
let transit = encoder.open(&cmd_buf.device).map_pass_err(pass_scope)?;

fixup_discarded_surfaces(
pending_discard_init_fixups.into_iter(),
Expand All @@ -1922,7 +1924,9 @@ impl Global {
}

*status = CommandEncoderStatus::Recording;
encoder.close_and_swap().map_pass_err(pass_scope)?;
encoder
.close_and_swap(&cmd_buf.device)
.map_pass_err(pass_scope)?;

Ok(())
}
Expand Down
10 changes: 5 additions & 5 deletions wgpu-core/src/command/transfer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ fn handle_texture_init(

// In rare cases we may need to insert an init operation immediately onto the command buffer.
if !immediate_inits.is_empty() {
let cmd_buf_raw = encoder.open()?;
let cmd_buf_raw = encoder.open(device)?;
for init in immediate_inits {
clear_texture(
&init.texture,
Expand Down Expand Up @@ -684,7 +684,7 @@ impl Global {
dst_offset: destination_offset,
size: wgt::BufferSize::new(size).unwrap(),
};
let cmd_buf_raw = cmd_buf_data.encoder.open()?;
let cmd_buf_raw = cmd_buf_data.encoder.open(&cmd_buf.device)?;
let barriers = src_barrier
.into_iter()
.chain(dst_barrier)
Expand Down Expand Up @@ -855,7 +855,7 @@ impl Global {
})
.collect::<Vec<_>>();

let cmd_buf_raw = encoder.open()?;
let cmd_buf_raw = encoder.open(&cmd_buf.device)?;
unsafe {
cmd_buf_raw.transition_textures(&dst_barrier);
cmd_buf_raw.transition_buffers(src_barrier.as_slice());
Expand Down Expand Up @@ -1030,7 +1030,7 @@ impl Global {
}
})
.collect::<Vec<_>>();
let cmd_buf_raw = encoder.open()?;
let cmd_buf_raw = encoder.open(&cmd_buf.device)?;
unsafe {
cmd_buf_raw.transition_buffers(dst_barrier.as_slice());
cmd_buf_raw.transition_textures(&src_barrier);
Expand Down Expand Up @@ -1209,7 +1209,7 @@ impl Global {
}
})
.collect::<Vec<_>>();
let cmd_buf_raw = cmd_buf_data.encoder.open()?;
let cmd_buf_raw = cmd_buf_data.encoder.open(&cmd_buf.device)?;
unsafe {
cmd_buf_raw.transition_textures(&barriers);
cmd_buf_raw.copy_texture_to_texture(
Expand Down
38 changes: 18 additions & 20 deletions wgpu-core/src/device/global.rs
Original file line number Diff line number Diff line change
Expand Up @@ -270,21 +270,27 @@ impl Global {

let snatch_guard = device.snatchable_lock.read();
let raw_buf = buffer.try_raw(&snatch_guard)?;
unsafe {
let mapping = device

let mapping = unsafe {
device
.raw()
.map_buffer(raw_buf, offset..offset + data.len() as u64)
.map_err(DeviceError::from)?;
std::ptr::copy_nonoverlapping(data.as_ptr(), mapping.ptr.as_ptr(), data.len());
if !mapping.is_coherent {
#[allow(clippy::single_range_in_vec_init)]
}
.map_err(|e| device.handle_hal_error(e))?;

unsafe { std::ptr::copy_nonoverlapping(data.as_ptr(), mapping.ptr.as_ptr(), data.len()) };

if !mapping.is_coherent {
#[allow(clippy::single_range_in_vec_init)]
unsafe {
device
.raw()
.flush_mapped_ranges(raw_buf, &[offset..offset + data.len() as u64]);
}
device.raw().unmap_buffer(raw_buf);
.flush_mapped_ranges(raw_buf, &[offset..offset + data.len() as u64])
};
}

unsafe { device.raw().unmap_buffer(raw_buf) };

Ok(())
}

Expand Down Expand Up @@ -2006,7 +2012,9 @@ impl Global {
hal::SurfaceError::Outdated | hal::SurfaceError::Lost => {
E::InvalidSurface
}
hal::SurfaceError::Device(error) => E::Device(error.into()),
hal::SurfaceError::Device(error) => {
E::Device(device.handle_hal_error(error))
}
hal::SurfaceError::Other(message) => {
log::error!("surface configuration failed: {}", message);
E::InvalidSurface
Expand Down Expand Up @@ -2289,16 +2297,6 @@ impl Global {
}
}

pub fn device_mark_lost(&self, device_id: DeviceId, message: &str) {
api_log!("Device::mark_lost {device_id:?}");

let hub = &self.hub;

if let Ok(device) = hub.devices.get(device_id) {
device.lose(message);
}
}

pub fn device_get_internal_counters(&self, device_id: DeviceId) -> wgt::InternalCounters {
let hub = &self.hub;
if let Ok(device) = hub.devices.get(device_id) {
Expand Down
17 changes: 10 additions & 7 deletions wgpu-core/src/device/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ fn map_buffer(
let raw_buffer = buffer.try_raw(snatch_guard)?;
let mapping = unsafe {
raw.map_buffer(raw_buffer, offset..offset + size)
.map_err(DeviceError::from)?
.map_err(|e| buffer.device.handle_hal_error(e))?
};

if !mapping.is_coherent && kind == HostMap::Read {
Expand Down Expand Up @@ -420,13 +420,16 @@ pub enum DeviceError {
DeviceMismatch(#[from] Box<DeviceMismatch>),
}

impl From<hal::DeviceError> for DeviceError {
fn from(error: hal::DeviceError) -> Self {
impl DeviceError {
/// Only use this function in contexts where there is no `Device`.
///
/// Use [`Device::handle_hal_error`] otherwise.
pub fn from_hal(error: hal::DeviceError) -> Self {
match error {
hal::DeviceError::Lost => DeviceError::Lost,
hal::DeviceError::OutOfMemory => DeviceError::OutOfMemory,
hal::DeviceError::ResourceCreationFailed => DeviceError::ResourceCreationFailed,
hal::DeviceError::Unexpected => DeviceError::Lost,
hal::DeviceError::Lost => Self::Lost,
hal::DeviceError::OutOfMemory => Self::OutOfMemory,
hal::DeviceError::ResourceCreationFailed => Self::ResourceCreationFailed,
hal::DeviceError::Unexpected => Self::Lost,
}
}
}
Expand Down
Loading

0 comments on commit eb47449

Please sign in to comment.