Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 63 additions & 4 deletions vm/devices/user_driver/src/vfio.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ use uevent::UeventListener;
use vfio_bindings::bindings::vfio::VFIO_PCI_CONFIG_REGION_INDEX;
use vfio_sys::IommuType;
use vfio_sys::IrqInfo;
use vfio_sys::VfioErrata;
use vmcore::vm_task::VmTaskDriver;
use vmcore::vm_task::VmTaskDriverSource;
use zerocopy::FromBytes;
Expand Down Expand Up @@ -123,22 +124,26 @@ impl VfioDevice {
tracing::info!(pci_id, keepalive, "device arrived");
vfio_sys::print_relevant_params();

let workaround = VfioDelayWorkaroundConfig::new(driver_source.clone(), None);
let container = vfio_sys::Container::new()?;
let group_id = vfio_sys::Group::find_group_for_device(&path)?;
let group = vfio_sys::Group::open_noiommu(group_id)?;
let group = workaround
.retry_unconditional("open_noiommu", async |_| {
Copy link

Copilot AI Jan 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The retry_unconditional method retries on any error, which is overly broad for the stated purpose of working around a specific kernel race condition (ENODEV). This could mask real errors and retry operations that should fail fast. Consider using retry_on_enodev instead, or document why unconditional retry is necessary for this specific operation.

Suggested change
.retry_unconditional("open_noiommu", async |_| {
.retry_on_enodev("open_noiommu", async |_| {

Copilot uses AI. Check for mistakes.
vfio_sys::Group::open_noiommu(group_id)
})
.await?;
group.set_container(&container)?;
if !group.status()?.viable() {
anyhow::bail!("group is not viable");
}

let driver = driver_source.simple();
container.set_iommu(IommuType::NoIommu)?;
if keepalive {
// Prevent physical hardware interaction when restoring.
group.set_keep_alive(pci_id, &driver).await?;
group.set_keep_alive(pci_id).await?;
}
tracing::debug!(pci_id, "about to open device");
let device = group.open_device(pci_id, &driver).await?;
let device = group.open_device(pci_id).await?;
Comment on lines +143 to +146
Copy link

Copilot AI Jan 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The retry logic has been removed but open_device and set_keep_alive are not being retried in the calling code. According to the PR title "Retry all VFIO operations", these operations should be wrapped with retry logic. Currently, only open_noiommu is being retried (line 130-134), but open_device (line 146) and set_keep_alive (line 143) are called without retry wrappers, even though they originally had ENODEV retry logic that was removed from vfio_sys.

Copilot uses AI. Check for mistakes.
let msix_info = device.irq_info(vfio_bindings::bindings::vfio::VFIO_PCI_MSIX_IRQ_INDEX)?;
if msix_info.flags.noresize() {
anyhow::bail!("unsupported: kernel does not support dynamic msix allocation");
Expand Down Expand Up @@ -230,6 +235,60 @@ impl VfioDevice {
}
}

struct VfioDelayWorkaroundConfig {
driver: VmTaskDriverSource,
enodev_sleep_duration: Duration,
}

impl VfioDelayWorkaroundConfig {
const DEFAULT_ENODEV_SLEEP_DURATION: Duration = Duration::from_millis(250);
pub fn new(driver: VmTaskDriverSource, enodev_sleep_duration: Option<Duration>) -> Self {
Self {
driver,
enodev_sleep_duration: enodev_sleep_duration
.unwrap_or(Self::DEFAULT_ENODEV_SLEEP_DURATION),
}
}

pub async fn sleep(&self) {
pal_async::timer::PolledTimer::new(&self.driver.simple())
.sleep(self.enodev_sleep_duration)
.await;
}

// There is a small race window in the 6.1 kernel between when the
// vfio device is visible to userspace, and when it is added to its
// internal list. Try one more time on ENODEV failure after a brief
// sleep.
pub async fn retry_on_enodev<F, T>(&self, op_name: &str, mut f: F) -> anyhow::Result<T>
where
F: AsyncFnMut(bool) -> anyhow::Result<T>,
{
match f(false).await {
Err(e) if VfioErrata::from_error(&e) == VfioErrata::DelayWorkaround => {
tracing::warn!("vfio operation {op_name} got ENODEV, retrying after delay");
self.sleep().await;
f(true).await
}
r => r,
}
}

Comment on lines +259 to +276
Copy link

Copilot AI Jan 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The retry_on_enodev method is defined but never used in the code. Either use it to wrap open_device and set_keep_alive calls (which previously had ENODEV retry logic), or remove it if retry_unconditional is the intended approach for all operations.

Suggested change
// There is a small race window in the 6.1 kernel between when the
// vfio device is visible to userspace, and when it is added to its
// internal list. Try one more time on ENODEV failure after a brief
// sleep.
pub async fn retry_on_enodev<F, T>(&self, op_name: &str, mut f: F) -> anyhow::Result<T>
where
F: AsyncFnMut(bool) -> anyhow::Result<T>,
{
match f(false).await {
Err(e) if VfioErrata::from_error(&e) == VfioErrata::DelayWorkaround => {
tracing::warn!("vfio operation {op_name} got ENODEV, retrying after delay");
self.sleep().await;
f(true).await
}
r => r,
}
}

Copilot uses AI. Check for mistakes.
pub async fn retry_unconditional<F, T>(&self, op_name: &str, mut f: F) -> anyhow::Result<T>
where
F: AsyncFnMut(bool) -> anyhow::Result<T>,
Comment on lines +277 to +279
Copy link

Copilot AI Jan 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The AsyncFnMut closure accepts a boolean parameter 'is_retry' but it's not used by the calling code. In the usage at line 131-133, the closure ignores this parameter with '_'. Consider either removing this unused parameter from the trait bound, or documenting its intended purpose if it's meant for future use.

Copilot uses AI. Check for mistakes.
{
match f(false).await {
Err(e) => {
tracing::warn!("vfio operation {op_name} failed with {e}, retrying after delay");
self.sleep().await;
f(true).await
}
r => r,
}
}
}

/// A mapped region that falls back to read/write if the memory mapped access
/// fails.
///
Expand Down
75 changes: 22 additions & 53 deletions vm/devices/user_driver/vfio_sys/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
use anyhow::Context;
use bitfield_struct::bitfield;
use libc::c_void;
use pal_async::driver::Driver;
use pal_async::timer::PolledTimer;
use std::ffi::CString;
use std::fs;
use std::fs::File;
Expand Down Expand Up @@ -85,6 +83,21 @@ mod ioctl {
);
}

#[derive(Debug, PartialEq, Eq)]
pub enum VfioErrata {
None,
DelayWorkaround,
}

impl VfioErrata {
pub fn from_error(observed: &anyhow::Error) -> Self {
if observed.downcast_ref::<nix::errno::Errno>() == Some(&nix::errno::Errno::ENODEV) {
return VfioErrata::DelayWorkaround;
}
return VfioErrata::None;
Copy link

Copilot AI Jan 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unnecessary return statement. In Rust, the last expression in a function is automatically returned, so this explicit return is redundant and goes against idiomatic Rust style.

Suggested change
return VfioErrata::None;
VfioErrata::None

Copilot uses AI. Check for mistakes.
Comment on lines +95 to +97
Copy link

Copilot AI Jan 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unnecessary return statement. In Rust, the last expression in a function is automatically returned, so this explicit return is redundant and goes against idiomatic Rust style.

Suggested change
return VfioErrata::DelayWorkaround;
}
return VfioErrata::None;
VfioErrata::DelayWorkaround
} else {
VfioErrata::None
}

Copilot uses AI. Check for mistakes.
}
}

pub struct Container {
file: File,
}
Expand Down Expand Up @@ -151,30 +164,12 @@ impl Group {
Ok(group)
}

pub async fn open_device(
&self,
device_id: &str,
driver: &(impl ?Sized + Driver),
) -> anyhow::Result<Device> {
pub async fn open_device(&self, device_id: &str) -> anyhow::Result<Device> {
let id = CString::new(device_id)?;
// SAFETY: The file descriptor is valid and the string is null-terminated.
let file = unsafe {
let fd = ioctl::vfio_group_get_device_fd(self.file.as_raw_fd(), id.as_ptr());
// There is a small race window in the 6.1 kernel between when the
// vfio device is visible to userspace, and when it is added to its
// internal list. Try one more time on ENODEV failure after a brief
// sleep.
let fd = match fd {
Err(nix::errno::Errno::ENODEV) => {
tracing::warn!(pci_id = device_id, "Retrying vfio open_device after delay");
PolledTimer::new(driver)
.sleep(std::time::Duration::from_millis(250))
.await;
ioctl::vfio_group_get_device_fd(self.file.as_raw_fd(), id.as_ptr())
}
_ => fd,
};
let fd = fd.with_context(|| format!("failed to get device fd for {device_id}"))?;
let fd = ioctl::vfio_group_get_device_fd(self.file.as_raw_fd(), id.as_ptr())
.with_context(|| format!("failed to get device fd for {device_id}"))?;
File::from_raw_fd(fd)
};

Expand Down Expand Up @@ -206,39 +201,13 @@ impl Group {
/// Skip VFIO device reset when kernel is reloaded during servicing.
/// This feature is non-upstream version of our kernel and will be
/// eventually replaced with iommufd.
pub async fn set_keep_alive(
&self,
device_id: &str,
driver: &(impl ?Sized + Driver),
) -> anyhow::Result<()> {
pub async fn set_keep_alive(&self, device_id: &str) -> anyhow::Result<()> {
// SAFETY: The file descriptor is valid and a correctly constructed struct is being passed.
unsafe {
let id = CString::new(device_id)?;
let r = ioctl::vfio_group_set_keep_alive(self.file.as_raw_fd(), id.as_ptr());
match r {
Ok(_) => Ok(()),
Err(nix::errno::Errno::ENODEV) => {
// There is a small race window in the kernel between when the
// vfio device is visible to userspace, and when it is added to its
// internal list. Try one more time on ENODEV failure after a brief
// sleep.
tracing::warn!(
pci_id = device_id,
"vfio keepalive got ENODEV, retrying after delay"
);
PolledTimer::new(driver)
.sleep(std::time::Duration::from_millis(250))
.await;
ioctl::vfio_group_set_keep_alive(self.file.as_raw_fd(), id.as_ptr())
.with_context(|| {
format!("failed to set keep-alive after delay for {device_id}")
})
.map(|_| ())
}
Err(_) => r
.with_context(|| format!("failed to set keep-alive for {device_id}"))
.map(|_| ()),
}
ioctl::vfio_group_set_keep_alive(self.file.as_raw_fd(), id.as_ptr())
.with_context(|| format!("failed to set keep-alive for {device_id}"))
.map(|_| ())
}
}
}
Expand Down
Loading