Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
f8eabd1
first pass
gurasinghMS Jan 16, 2026
8ee51ca
no longer using strong ref counting
gurasinghMS Jan 16, 2026
edee1bc
Added a first pass on the strong/weak pointer approach
gurasinghMS Jan 16, 2026
1d6fe56
weak pointer strong pointer approach
gurasinghMS Jan 16, 2026
7175d6c
Now implemented a thin wrapper around Arc<Namespace> so as to make re…
gurasinghMS Jan 20, 2026
5259989
v1 implementation with the is strong and is weak stuff
gurasinghMS Jan 20, 2026
ddeb8dd
More minor cleanup
gurasinghMS Jan 20, 2026
2fb91b0
Uncommenting prep steps
gurasinghMS Jan 20, 2026
db97283
Update vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs
gurasinghMS Jan 20, 2026
f4229c2
Update vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs
gurasinghMS Jan 20, 2026
0af769f
Update vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs
gurasinghMS Jan 21, 2026
127b0db
uncomment test for disk removal to verify changes in CI
gurasinghMS Jan 21, 2026
b18dfce
Remove unused import
gurasinghMS Jan 21, 2026
ed094d7
Fixing clippy related issues
gurasinghMS Jan 21, 2026
22f9912
More clippy issues
gurasinghMS Jan 21, 2026
6172ca3
Another clippy fix
gurasinghMS Jan 21, 2026
a6059c5
Now using the deref trait instead of rewriting all the functions
gurasinghMS Jan 21, 2026
48f59ce
remove changes to prep steps
gurasinghMS Jan 21, 2026
593a118
Minor comment changes
gurasinghMS Jan 21, 2026
ec20c27
Merge branch 'main' into handle-controller-namespaces-leaving
gurasinghMS Jan 21, 2026
8ae9b20
doc build fixes
gurasinghMS Jan 21, 2026
7647119
Forgot to save some changes
gurasinghMS Jan 21, 2026
b357f1c
Added more debugging lines to verify namespace recreation
gurasinghMS Jan 22, 2026
a46466e
Merge branch 'main' into handle-controller-namespaces-leaving
gurasinghMS Jan 22, 2026
5cdb667
Fixed debug output issue
gurasinghMS Jan 22, 2026
98b4064
Merge branch 'main' into handle-controller-namespaces-leaving
gurasinghMS Jan 22, 2026
2e70d85
Increasing the retries for finding device along with longer sleep
gurasinghMS Jan 22, 2026
1ff6ed4
Added a log line with debugging in storvsp as wellg
gurasinghMS Jan 22, 2026
2f0af48
Increase retries to find a disk and reduce total number of iterations
gurasinghMS Jan 22, 2026
7be1e20
Merge branch 'main' into handle-controller-namespaces-leaving
mattkur Jan 25, 2026
9e52f8c
Merge branch 'main' into handle-controller-namespaces-leaving
alandau Jan 26, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions openhcl/underhill_core/src/nvme_manager/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ impl NvmeDevice for VfioNvmeDevice {
async fn namespace(
&mut self,
nsid: u32,
) -> Result<Arc<nvme_driver::Namespace>, nvme_driver::NamespaceError> {
) -> Result<nvme_driver::NamespaceHandle, nvme_driver::NamespaceError> {
self.driver.namespace(nsid).await
}

Expand Down Expand Up @@ -324,7 +324,7 @@ enum NvmeDriverRequest {
Inspect(Deferred),
LoadDriver(Rpc<Span, anyhow::Result<()>>),
/// Get an instance of the supplied namespace (an nvme `nsid`).
GetNamespace(Rpc<(Span, u32), Result<Arc<nvme_driver::Namespace>, NamespaceError>>),
GetNamespace(Rpc<(Span, u32), Result<nvme_driver::NamespaceHandle, NamespaceError>>),
Save(Rpc<Span, anyhow::Result<NvmeDriverSavedState>>),
/// Shutdown the NVMe driver, and the manager of that driver.
/// Takes the span, and a set of options.
Expand Down Expand Up @@ -430,7 +430,7 @@ pub struct NvmeDriverManagerClient {
}

impl NvmeDriverManagerClient {
pub async fn get_namespace(&self, nsid: u32) -> anyhow::Result<Arc<nvme_driver::Namespace>> {
pub async fn get_namespace(&self, nsid: u32) -> anyhow::Result<nvme_driver::NamespaceHandle> {
let span = tracing::info_span!(
"nvme_device_manager_get_namespace",
pci_id = self.pci_id,
Expand Down
9 changes: 4 additions & 5 deletions openhcl/underhill_core/src/nvme_manager/manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ impl NvmeManager {
enum Request {
Inspect(inspect::Deferred),
ForceLoadDriver(inspect::DeferredUpdate),
GetNamespace(Rpc<(String, u32), anyhow::Result<Arc<nvme_driver::Namespace>>>),
GetNamespace(Rpc<(String, u32), anyhow::Result<nvme_driver::NamespaceHandle>>),
Save(Rpc<(), anyhow::Result<NvmeManagerSavedState>>),
Shutdown {
span: tracing::Span,
Expand All @@ -177,7 +177,7 @@ impl NvmeManagerClient {
&self,
pci_id: String,
nsid: u32,
) -> anyhow::Result<Arc<nvme_driver::Namespace>> {
) -> anyhow::Result<nvme_driver::NamespaceHandle> {
self.sender
.call(Request::GetNamespace, (pci_id.clone(), nsid))
.instrument(tracing::info_span!(
Expand Down Expand Up @@ -382,7 +382,7 @@ impl NvmeManagerWorker {
pci_id: String,
nsid: u32,
context: NvmeWorkerContext,
) -> anyhow::Result<Arc<nvme_driver::Namespace>> {
) -> anyhow::Result<nvme_driver::NamespaceHandle> {
// If the driver is already created, use it.
let mut client: Option<NvmeDriverManagerClient> = None;
{
Expand Down Expand Up @@ -557,7 +557,6 @@ mod tests {
use futures::future::join_all;
use inspect::Inspect;
use inspect::InspectionBuilder;
use nvme_driver::Namespace;
use nvme_driver::save_restore::NvmeDriverSavedState;
use pal_async::DefaultDriver;
use pal_async::async_test;
Expand Down Expand Up @@ -641,7 +640,7 @@ mod tests {
async fn namespace(
&mut self,
_nsid: u32,
) -> Result<Arc<Namespace>, nvme_driver::NamespaceError> {
) -> Result<nvme_driver::NamespaceHandle, nvme_driver::NamespaceError> {
// Record start time for concurrency analysis
{
let mut start_times = self.namespace_start_times.write();
Expand Down
3 changes: 1 addition & 2 deletions openhcl/underhill_core/src/nvme_manager/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@

use async_trait::async_trait;
use inspect::Inspect;
use std::sync::Arc;
use thiserror::Error;
use vmcore::vm_task::VmTaskDriverSource;

Expand Down Expand Up @@ -95,7 +94,7 @@ pub trait NvmeDevice: Inspect + Send + Sync {
async fn namespace(
&mut self,
nsid: u32,
) -> Result<Arc<nvme_driver::Namespace>, nvme_driver::NamespaceError>;
) -> Result<nvme_driver::NamespaceHandle, nvme_driver::NamespaceError>;
async fn save(&mut self) -> anyhow::Result<nvme_driver::save_restore::NvmeDriverSavedState>;
async fn shutdown(mut self: Box<Self>);
fn update_servicing_flags(&mut self, keep_alive: bool);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,22 @@ use guestmem::GuestMemory;
use guid::Guid;
use nvme::NvmeController;
use nvme::NvmeControllerCaps;
use nvme_driver::Namespace;
use nvme_driver::NamespaceHandle;
use nvme_driver::NvmeDriver;
use nvme_spec::nvm::DsmRange;
use page_pool_alloc::PagePoolAllocator;
use pal_async::DefaultDriver;
use pci_core::msi::MsiInterruptSet;
use scsi_buffers::OwnedRequestBuffers;
use std::convert::TryFrom;
use std::sync::Arc;
use user_driver_emulated_mock::DeviceTestMemory;
use vmcore::vm_task::SingleDriverBackend;
use vmcore::vm_task::VmTaskDriverSource;

/// Nvme driver fuzzer
pub struct FuzzNvmeDriver {
driver: Option<NvmeDriver<FuzzEmulatedDevice<NvmeController, PagePoolAllocator>>>,
namespace: Arc<Namespace>,
namespace: NamespaceHandle,
payload_mem: GuestMemory,
cpu_count: u32,
}
Expand Down
115 changes: 72 additions & 43 deletions vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@

use super::spec;
use crate::NVME_PAGE_SHIFT;
use crate::Namespace;
use crate::NamespaceError;
use crate::NamespaceHandle;
use crate::RequestError;
use crate::driver::save_restore::IoQueueSavedState;
use crate::namespace::Namespace;
use crate::queue_pair::AdminAerHandler;
use crate::queue_pair::Issuer;
use crate::queue_pair::MAX_CQ_ENTRIES;
Expand All @@ -35,6 +36,7 @@ use std::mem::ManuallyDrop;
use std::ops::Deref;
use std::sync::Arc;
use std::sync::OnceLock;
use std::sync::Weak;
use task_control::AsyncRun;
use task_control::InspectTask;
use task_control::TaskControl;
Expand Down Expand Up @@ -76,15 +78,40 @@ pub struct NvmeDriver<D: DeviceBacking> {
rescan_notifiers: Arc<RwLock<HashMap<u32, mesh::Sender<()>>>>,
/// NVMe namespaces associated with this driver. Mapping nsid to NamespaceHandle.
#[inspect(skip)]
namespaces: HashMap<u32, NamespaceHandle>,
namespaces: HashMap<u32, WeakOrStrong<Namespace>>,
/// Keeps the controller connected (CC.EN==1) while servicing.
nvme_keepalive: bool,
bounce_buffer: bool,
}

struct NamespaceHandle {
namespace: Arc<Namespace>,
in_use: bool,
/// A container that can hold either a weak or strong reference to a value.
///
/// During normal operation, the driver ONLY stores weak references. After restore
/// strong references are temporarily held until the StorageController retrieves them.
/// Once retrieved, the strong reference is downgraded to a weak one, resuming
/// normal behavior.
enum WeakOrStrong<T> {
Weak(Weak<T>),
Strong(Arc<T>),
}

impl<T> WeakOrStrong<T> {
/// Returns a strong reference to the underlying value when possible.
/// Implicitly downgrades Strong to Weak when this function is invoked.
pub fn get_arc(&mut self) -> Option<Arc<T>> {
match self {
WeakOrStrong::Strong(arc) => {
let strong = arc.clone();
*self = WeakOrStrong::Weak(Arc::downgrade(arc));
Some(strong)
}
WeakOrStrong::Weak(weak) => weak.upgrade(),
}
}

pub fn is_weak(&self) -> bool {
matches!(self, WeakOrStrong::Weak(_))
}
}

#[derive(Inspect)]
Expand Down Expand Up @@ -576,19 +603,23 @@ impl<D: DeviceBacking> NvmeDriver<D> {
}

/// Gets the namespace with namespace ID `nsid`.
pub async fn namespace(&mut self, nsid: u32) -> Result<Arc<Namespace>, NamespaceError> {
if let Some(handle) = self.namespaces.get_mut(&nsid) {
// After reboot ns will be present but unused.
if !handle.in_use {
handle.in_use = true;
return Ok(handle.namespace.clone());
}
pub async fn namespace(&mut self, nsid: u32) -> Result<NamespaceHandle, NamespaceError> {
if let Some(namespace) = self.namespaces.get_mut(&nsid) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if there is a more idiomatic way to do this ..... it looks a little off to me.

// After restore we will have a strong ref -> downgrade and return.
// If we have a weak ref, make sure it is not upgradeable (that means we have a duplicate somewhere).
let is_weak = namespace.is_weak(); // This value will change after invoking get_arc().
let namespace = namespace.get_arc();
if let Some(namespace) = namespace {
if is_weak && namespace.check_active().is_ok() {
return Err(NamespaceError::Duplicate(nsid));
}

// Prevent multiple references to the same Namespace.
// Allowing this could lead to undefined behavior if multiple components
// concurrently read or write to the same namespace. To avoid this,
// return an error if the namespace is already requested.
return Err(NamespaceError::DuplicateRequest { nsid });
tracing::debug!(
"reusing existing namespace nsid={}. This should only happen after restore.",
nsid
);
return Ok(NamespaceHandle::new(namespace));
}
}

let (send, recv) = mesh::channel::<()>();
Expand All @@ -603,18 +634,13 @@ impl<D: DeviceBacking> NvmeDriver<D> {
)
.await?,
);
self.namespaces.insert(
nsid,
NamespaceHandle {
namespace: namespace.clone(),
in_use: true,
},
);
self.namespaces
.insert(nsid, WeakOrStrong::Weak(Arc::downgrade(&namespace)));

// Append the sender to the list of notifiers for this nsid.
let mut notifiers = self.rescan_notifiers.write();
notifiers.insert(nsid, send);
Ok(namespace)
Ok(NamespaceHandle::new(namespace))
}

/// Returns the number of CPUs that are in fallback mode (that are using a
Expand Down Expand Up @@ -655,13 +681,19 @@ impl<D: DeviceBacking> NvmeDriver<D> {
"saving namespaces",
);
let mut saved_namespaces = vec![];
for (nsid, handle) in self.namespaces.iter() {
saved_namespaces.push(handle.namespace.save().with_context(|| {
format!(
"failed to save namespace nsid {} device {}",
nsid, self.device_id
)
})?);
for (nsid, namespace) in self.namespaces.iter_mut() {
let is_weak = namespace.is_weak(); // This value will change after invoking get_arc().
if let Some(ns) = namespace.get_arc()
&& ns.check_active().is_ok()
&& is_weak
{
saved_namespaces.push(ns.save().with_context(|| {
format!(
"failed to save namespace nsid {} device {}",
nsid, self.device_id
)
})?);
}
}
Ok(NvmeDriverSavedState {
identify_ctrl: spec::IdentifyController::read_from_bytes(
Expand Down Expand Up @@ -951,17 +983,14 @@ impl<D: DeviceBacking> NvmeDriver<D> {
let (send, recv) = mesh::channel::<()>();
this.namespaces.insert(
ns.nsid,
NamespaceHandle {
namespace: Arc::new(Namespace::restore(
&driver,
admin.issuer().clone(),
recv,
this.identify.clone().unwrap(),
&this.io_issuers,
ns,
)?),
in_use: false,
},
WeakOrStrong::Strong(Arc::new(Namespace::restore(
&driver,
admin.issuer().clone(),
recv,
this.identify.clone().unwrap(),
&this.io_issuers,
ns,
)?)),
);
this.rescan_notifiers.write().insert(ns.nsid, send);
}
Expand Down
2 changes: 1 addition & 1 deletion vm/devices/storage/disk_nvme/nvme_driver/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ mod tests;

pub use self::driver::NvmeDriver;
pub use self::driver::save_restore;
pub use self::namespace::Namespace;
pub use self::namespace::NamespaceError;
pub use self::namespace::NamespaceHandle;
pub use self::queue_pair::RequestError;

use nvme_spec as spec;
Expand Down
42 changes: 37 additions & 5 deletions vm/devices/storage/disk_nvme/nvme_driver/src/namespace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use guestmem::ranges::PagedRange;
use inspect::Inspect;
use pal_async::task::Spawn;
use parking_lot::Mutex;
use std::ops::Deref;
use std::sync::Arc;
use std::sync::atomic::AtomicBool;
use std::sync::atomic::AtomicU64;
Expand All @@ -41,8 +42,32 @@ pub enum NamespaceError {
Request(#[source] RequestError),
#[error("maximum data transfer size too small: 2^{0} pages")]
MdtsInvalid(u8),
#[error("namespace ID {nsid} already exists")]
DuplicateRequest { nsid: u32 },
#[error("requesting a duplicate namespace: {0}")]
Duplicate(u32),
}

/// A thin Namespace wrapper to revoke cloning permissions on `Arc<Namespace>`.
/// This type allows the nvme_driver to force system-wide single-ownership
/// semantics for `Namespace` objects.
/// Because the end-user can no longer call namespace.clone(), `weak.upgrade()` can
/// safely be used to determine when a Namespace is no longer in use by the disk.
#[derive(Debug, Inspect)]
pub struct NamespaceHandle {
namespace: Arc<Namespace>,
}

impl NamespaceHandle {
/// Creates a new handle
pub fn new(namespace: Arc<Namespace>) -> Self {
Self { namespace }
}
}

impl Deref for NamespaceHandle {
type Target = Namespace;
fn deref(&self) -> &Self::Target {
&self.namespace
}
}

/// An NVMe namespace.
Expand Down Expand Up @@ -82,6 +107,13 @@ impl Namespace {
.await
.map_err(NamespaceError::Request)?;

tracing::debug!(
"created namespace from identify nsid={}, nsze={}, nsguid={:?}",
nsid,
identify.nsze,
identify.nguid
);

Namespace::new_from_identify(
driver,
admin,
Expand Down Expand Up @@ -197,7 +229,7 @@ impl Namespace {
1 << self.block_shift
}

fn check_active(&self) -> Result<(), RequestError> {
pub fn check_active(&self) -> Result<(), RequestError> {
if self.state.removed.load(Ordering::Relaxed) {
// The namespace has been removed. Return invalid namespace even if
// the namespace has returned to avoid accidentally accessing the
Expand Down Expand Up @@ -561,7 +593,7 @@ impl DynamicState {
mut rescan_event: mesh::Receiver<()>,
) {
loop {
tracing::debug!("rescan");
tracing::debug!("rescan task started nsid={}", nsid);

// This relies on a mesh channel so notifications will NOT be missed
// even if the task was not started when the first AEN was processed.
Expand All @@ -570,7 +602,7 @@ impl DynamicState {
// Once the sender is dropped, no more repoll signals can be received so
// there is no point in continuing.
if event.is_none() {
tracing::debug!("rescan task exiting");
tracing::debug!("rescan task exiting nsid={}", nsid);
break;
}

Expand Down
Loading