Skip to content

Commit f5d3ef2

Browse files
author
Danilo Krummrich
committed
rust: devres: get rid of Devres' inner Arc
So far Devres uses an inner memory allocation and reference count, i.e. an inner Arc, in order to ensure that the devres callback can't run into a use-after-free in case where the Devres object is dropped while the devres callback runs concurrently. Instead, use a completion in order to avoid a potential UAF: In Devres::drop(), if we detect that we can't remove the devres action anymore, we wait for the completion that is completed from the devres callback. If, in turn, we were able to successfully remove the devres action, we can just go ahead. This, again, allows us to get rid of the internal Arc, and instead let Devres consume an `impl PinInit<T, E>` in order to return an `impl PinInit<Devres<T>, E>`, which enables us to get away with less memory allocations. Additionally, having the resulting explicit synchronization in Devres::drop() prevents potential subtle undesired side effects of the devres callback dropping the final Arc reference asynchronously within the devres callback. Reviewed-by: Benno Lossin <lossin@kernel.org> Reviewed-by: Boqun Feng <boqun.feng@gmail.com> Link: https://lore.kernel.org/r/20250626200054.243480-4-dakr@kernel.org [ Move '# Invariants' below '# Examples'. - Danilo ] Signed-off-by: Danilo Krummrich <dakr@kernel.org>
1 parent 46ae8fd commit f5d3ef2

File tree

5 files changed

+154
-111
lines changed

5 files changed

+154
-111
lines changed

drivers/gpu/nova-core/driver.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// SPDX-License-Identifier: GPL-2.0
22

3-
use kernel::{auxiliary, bindings, c_str, device::Core, pci, prelude::*};
3+
use kernel::{auxiliary, bindings, c_str, device::Core, pci, prelude::*, sync::Arc};
44

55
use crate::gpu::Gpu;
66

@@ -34,7 +34,10 @@ impl pci::Driver for NovaCore {
3434
pdev.enable_device_mem()?;
3535
pdev.set_master();
3636

37-
let bar = pdev.iomap_region_sized::<BAR0_SIZE>(0, c_str!("nova-core/bar0"))?;
37+
let bar = Arc::pin_init(
38+
pdev.iomap_region_sized::<BAR0_SIZE>(0, c_str!("nova-core/bar0")),
39+
GFP_KERNEL,
40+
)?;
3841

3942
let this = KBox::pin_init(
4043
try_pin_init!(Self {

drivers/gpu/nova-core/gpu.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// SPDX-License-Identifier: GPL-2.0
22

3-
use kernel::{device, devres::Devres, error::code::*, pci, prelude::*};
3+
use kernel::{device, devres::Devres, error::code::*, pci, prelude::*, sync::Arc};
44

55
use crate::driver::Bar0;
66
use crate::firmware::{Firmware, FIRMWARE_VERSION};
@@ -161,14 +161,14 @@ impl Spec {
161161
pub(crate) struct Gpu {
162162
spec: Spec,
163163
/// MMIO mapping of PCI BAR 0
164-
bar: Devres<Bar0>,
164+
bar: Arc<Devres<Bar0>>,
165165
fw: Firmware,
166166
}
167167

168168
impl Gpu {
169169
pub(crate) fn new(
170170
pdev: &pci::Device<device::Bound>,
171-
devres_bar: Devres<Bar0>,
171+
devres_bar: Arc<Devres<Bar0>>,
172172
) -> Result<impl PinInit<Self>> {
173173
let bar = devres_bar.access(pdev.as_ref())?;
174174
let spec = Spec::new(bar)?;

rust/kernel/devres.rs

Lines changed: 126 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,21 @@ use crate::{
1313
ffi::c_void,
1414
prelude::*,
1515
revocable::{Revocable, RevocableGuard},
16-
sync::{rcu, Arc, Completion},
17-
types::{ARef, ForeignOwnable},
16+
sync::{rcu, Completion},
17+
types::{ARef, ForeignOwnable, Opaque, ScopeGuard},
1818
};
1919

20+
use pin_init::Wrapper;
21+
22+
/// [`Devres`] inner data accessed from [`Devres::callback`].
2023
#[pin_data]
21-
struct DevresInner<T: Send> {
22-
dev: ARef<Device>,
23-
callback: unsafe extern "C" fn(*mut c_void),
24+
struct Inner<T: Send> {
2425
#[pin]
2526
data: Revocable<T>,
27+
/// Tracks whether [`Devres::callback`] has been completed.
28+
#[pin]
29+
devm: Completion,
30+
/// Tracks whether revoking [`Self::data`] has been completed.
2631
#[pin]
2732
revoke: Completion,
2833
}
@@ -88,100 +93,115 @@ struct DevresInner<T: Send> {
8893
/// # fn no_run(dev: &Device<Bound>) -> Result<(), Error> {
8994
/// // SAFETY: Invalid usage for example purposes.
9095
/// let iomem = unsafe { IoMem::<{ core::mem::size_of::<u32>() }>::new(0xBAAAAAAD)? };
91-
/// let devres = Devres::new(dev, iomem, GFP_KERNEL)?;
96+
/// let devres = KBox::pin_init(Devres::new(dev, iomem), GFP_KERNEL)?;
9297
///
9398
/// let res = devres.try_access().ok_or(ENXIO)?;
9499
/// res.write8(0x42, 0x0);
95100
/// # Ok(())
96101
/// # }
97102
/// ```
98-
pub struct Devres<T: Send>(Arc<DevresInner<T>>);
103+
///
104+
/// # Invariants
105+
///
106+
/// [`Self::inner`] is guaranteed to be initialized and is always accessed read-only.
107+
#[pin_data(PinnedDrop)]
108+
pub struct Devres<T: Send> {
109+
dev: ARef<Device>,
110+
/// Pointer to [`Self::devres_callback`].
111+
///
112+
/// Has to be stored, since Rust does not guarantee to always return the same address for a
113+
/// function. However, the C API uses the address as a key.
114+
callback: unsafe extern "C" fn(*mut c_void),
115+
/// Contains all the fields shared with [`Self::callback`].
116+
// TODO: Replace with `UnsafePinned`, once available.
117+
//
118+
// Subsequently, the `drop_in_place()` in `Devres::drop` and the explicit `Send` and `Sync'
119+
// impls can be removed.
120+
#[pin]
121+
inner: Opaque<Inner<T>>,
122+
}
123+
124+
impl<T: Send> Devres<T> {
125+
/// Creates a new [`Devres`] instance of the given `data`.
126+
///
127+
/// The `data` encapsulated within the returned `Devres` instance' `data` will be
128+
/// (revoked)[`Revocable`] once the device is detached.
129+
pub fn new<'a, E>(
130+
dev: &'a Device<Bound>,
131+
data: impl PinInit<T, E> + 'a,
132+
) -> impl PinInit<Self, Error> + 'a
133+
where
134+
T: 'a,
135+
Error: From<E>,
136+
{
137+
let callback = Self::devres_callback;
99138

100-
impl<T: Send> DevresInner<T> {
101-
fn new(dev: &Device<Bound>, data: T, flags: Flags) -> Result<Arc<DevresInner<T>>> {
102-
let inner = Arc::pin_init(
103-
try_pin_init!( DevresInner {
104-
dev: dev.into(),
105-
callback: Self::devres_callback,
139+
try_pin_init!(&this in Self {
140+
// INVARIANT: `inner` is properly initialized.
141+
inner <- Opaque::pin_init(try_pin_init!(Inner {
106142
data <- Revocable::new(data),
143+
devm <- Completion::new(),
107144
revoke <- Completion::new(),
108-
}),
109-
flags,
110-
)?;
111-
112-
// Convert `Arc<DevresInner>` into a raw pointer and make devres own this reference until
113-
// `Self::devres_callback` is called.
114-
let data = inner.clone().into_raw();
145+
})),
146+
callback,
147+
dev: {
148+
// SAFETY: `this` is a valid pointer to uninitialized memory.
149+
let inner = unsafe { &raw mut (*this.as_ptr()).inner };
115150

116-
// SAFETY: `devm_add_action` guarantees to call `Self::devres_callback` once `dev` is
117-
// detached.
118-
let ret =
119-
unsafe { bindings::devm_add_action(dev.as_raw(), Some(inner.callback), data as _) };
120-
121-
if ret != 0 {
122-
// SAFETY: We just created another reference to `inner` in order to pass it to
123-
// `bindings::devm_add_action`. If `bindings::devm_add_action` fails, we have to drop
124-
// this reference accordingly.
125-
let _ = unsafe { Arc::from_raw(data) };
126-
return Err(Error::from_errno(ret));
127-
}
151+
// SAFETY:
152+
// - `dev.as_raw()` is a pointer to a valid bound device.
153+
// - `inner` is guaranteed to be a valid for the duration of the lifetime of `Self`.
154+
// - `devm_add_action()` is guaranteed not to call `callback` until `this` has been
155+
// properly initialized, because we require `dev` (i.e. the *bound* device) to
156+
// live at least as long as the returned `impl PinInit<Self, Error>`.
157+
to_result(unsafe {
158+
bindings::devm_add_action(dev.as_raw(), Some(callback), inner.cast())
159+
})?;
128160

129-
Ok(inner)
161+
dev.into()
162+
},
163+
})
130164
}
131165

132-
fn as_ptr(&self) -> *const Self {
133-
self as _
166+
fn inner(&self) -> &Inner<T> {
167+
// SAFETY: By the type invairants of `Self`, `inner` is properly initialized and always
168+
// accessed read-only.
169+
unsafe { &*self.inner.get() }
134170
}
135171

136-
fn remove_action(this: &Arc<Self>) -> bool {
137-
// SAFETY:
138-
// - `self.inner.dev` is a valid `Device`,
139-
// - the `action` and `data` pointers are the exact same ones as given to devm_add_action()
140-
// previously,
141-
// - `self` is always valid, even if the action has been released already.
142-
let success = unsafe {
143-
bindings::devm_remove_action_nowarn(
144-
this.dev.as_raw(),
145-
Some(this.callback),
146-
this.as_ptr() as _,
147-
)
148-
} == 0;
149-
150-
if success {
151-
// SAFETY: We leaked an `Arc` reference to devm_add_action() in `DevresInner::new`; if
152-
// devm_remove_action_nowarn() was successful we can (and have to) claim back ownership
153-
// of this reference.
154-
let _ = unsafe { Arc::from_raw(this.as_ptr()) };
155-
}
156-
157-
success
172+
fn data(&self) -> &Revocable<T> {
173+
&self.inner().data
158174
}
159175

160176
#[allow(clippy::missing_safety_doc)]
161177
unsafe extern "C" fn devres_callback(ptr: *mut kernel::ffi::c_void) {
162-
let ptr = ptr as *mut DevresInner<T>;
163-
// Devres owned this memory; now that we received the callback, drop the `Arc` and hence the
164-
// reference.
165-
// SAFETY: Safe, since we leaked an `Arc` reference to devm_add_action() in
166-
// `DevresInner::new`.
167-
let inner = unsafe { Arc::from_raw(ptr) };
178+
// SAFETY: In `Self::new` we've passed a valid pointer to `Inner` to `devm_add_action()`,
179+
// hence `ptr` must be a valid pointer to `Inner`.
180+
let inner = unsafe { &*ptr.cast::<Inner<T>>() };
181+
182+
// Ensure that `inner` can't be used anymore after we signal completion of this callback.
183+
let inner = ScopeGuard::new_with_data(inner, |inner| inner.devm.complete_all());
168184

169185
if !inner.data.revoke() {
170186
// If `revoke()` returns false, it means that `Devres::drop` already started revoking
171-
// `inner.data` for us. Hence we have to wait until `Devres::drop()` signals that it
172-
// completed revoking `inner.data`.
187+
// `data` for us. Hence we have to wait until `Devres::drop` signals that it
188+
// completed revoking `data`.
173189
inner.revoke.wait_for_completion();
174190
}
175191
}
176-
}
177192

178-
impl<T: Send> Devres<T> {
179-
/// Creates a new [`Devres`] instance of the given `data`. The `data` encapsulated within the
180-
/// returned `Devres` instance' `data` will be revoked once the device is detached.
181-
pub fn new(dev: &Device<Bound>, data: T, flags: Flags) -> Result<Self> {
182-
let inner = DevresInner::new(dev, data, flags)?;
183-
184-
Ok(Devres(inner))
193+
fn remove_action(&self) -> bool {
194+
// SAFETY:
195+
// - `self.dev` is a valid `Device`,
196+
// - the `action` and `data` pointers are the exact same ones as given to
197+
// `devm_add_action()` previously,
198+
(unsafe {
199+
bindings::devm_remove_action_nowarn(
200+
self.dev.as_raw(),
201+
Some(self.callback),
202+
core::ptr::from_ref(self.inner()).cast_mut().cast(),
203+
)
204+
} == 0)
185205
}
186206

187207
/// Obtain `&'a T`, bypassing the [`Revocable`].
@@ -213,44 +233,63 @@ impl<T: Send> Devres<T> {
213233
/// }
214234
/// ```
215235
pub fn access<'a>(&'a self, dev: &'a Device<Bound>) -> Result<&'a T> {
216-
if self.0.dev.as_raw() != dev.as_raw() {
236+
if self.dev.as_raw() != dev.as_raw() {
217237
return Err(EINVAL);
218238
}
219239

220240
// SAFETY: `dev` being the same device as the device this `Devres` has been created for
221-
// proves that `self.0.data` hasn't been revoked and is guaranteed to not be revoked as
222-
// long as `dev` lives; `dev` lives at least as long as `self`.
223-
Ok(unsafe { self.0.data.access() })
241+
// proves that `self.data` hasn't been revoked and is guaranteed to not be revoked as long
242+
// as `dev` lives; `dev` lives at least as long as `self`.
243+
Ok(unsafe { self.data().access() })
224244
}
225245

226246
/// [`Devres`] accessor for [`Revocable::try_access`].
227247
pub fn try_access(&self) -> Option<RevocableGuard<'_, T>> {
228-
self.0.data.try_access()
248+
self.data().try_access()
229249
}
230250

231251
/// [`Devres`] accessor for [`Revocable::try_access_with`].
232252
pub fn try_access_with<R, F: FnOnce(&T) -> R>(&self, f: F) -> Option<R> {
233-
self.0.data.try_access_with(f)
253+
self.data().try_access_with(f)
234254
}
235255

236256
/// [`Devres`] accessor for [`Revocable::try_access_with_guard`].
237257
pub fn try_access_with_guard<'a>(&'a self, guard: &'a rcu::Guard) -> Option<&'a T> {
238-
self.0.data.try_access_with_guard(guard)
258+
self.data().try_access_with_guard(guard)
239259
}
240260
}
241261

242-
impl<T: Send> Drop for Devres<T> {
243-
fn drop(&mut self) {
262+
// SAFETY: `Devres` can be send to any task, if `T: Send`.
263+
unsafe impl<T: Send> Send for Devres<T> {}
264+
265+
// SAFETY: `Devres` can be shared with any task, if `T: Sync`.
266+
unsafe impl<T: Send + Sync> Sync for Devres<T> {}
267+
268+
#[pinned_drop]
269+
impl<T: Send> PinnedDrop for Devres<T> {
270+
fn drop(self: Pin<&mut Self>) {
244271
// SAFETY: When `drop` runs, it is guaranteed that nobody is accessing the revocable data
245272
// anymore, hence it is safe not to wait for the grace period to finish.
246-
if unsafe { self.0.data.revoke_nosync() } {
247-
// We revoked `self.0.data` before the devres action did, hence try to remove it.
248-
if !DevresInner::remove_action(&self.0) {
273+
if unsafe { self.data().revoke_nosync() } {
274+
// We revoked `self.data` before the devres action did, hence try to remove it.
275+
if !self.remove_action() {
249276
// We could not remove the devres action, which means that it now runs concurrently,
250-
// hence signal that `self.0.data` has been revoked successfully.
251-
self.0.revoke.complete_all();
277+
// hence signal that `self.data` has been revoked by us successfully.
278+
self.inner().revoke.complete_all();
279+
280+
// Wait for `Self::devres_callback` to be done using this object.
281+
self.inner().devm.wait_for_completion();
252282
}
283+
} else {
284+
// `Self::devres_callback` revokes `self.data` for us, hence wait for it to be done
285+
// using this object.
286+
self.inner().devm.wait_for_completion();
253287
}
288+
289+
// INVARIANT: At this point it is guaranteed that `inner` can't be accessed any more.
290+
//
291+
// SAFETY: `inner` is valid for dropping.
292+
unsafe { core::ptr::drop_in_place(self.inner.get()) };
254293
}
255294
}
256295

rust/kernel/pci.rs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
//! C header: [`include/linux/pci.h`](srctree/include/linux/pci.h)
66
77
use crate::{
8-
alloc::flags::*,
98
bindings, container_of, device,
109
device_id::RawDeviceId,
1110
devres::Devres,
@@ -398,19 +397,20 @@ impl Device {
398397
impl Device<device::Bound> {
399398
/// Mapps an entire PCI-BAR after performing a region-request on it. I/O operation bound checks
400399
/// can be performed on compile time for offsets (plus the requested type size) < SIZE.
401-
pub fn iomap_region_sized<const SIZE: usize>(
402-
&self,
400+
pub fn iomap_region_sized<'a, const SIZE: usize>(
401+
&'a self,
403402
bar: u32,
404-
name: &CStr,
405-
) -> Result<Devres<Bar<SIZE>>> {
406-
let bar = Bar::<SIZE>::new(self, bar, name)?;
407-
let devres = Devres::new(self.as_ref(), bar, GFP_KERNEL)?;
408-
409-
Ok(devres)
403+
name: &'a CStr,
404+
) -> impl PinInit<Devres<Bar<SIZE>>, Error> + 'a {
405+
Devres::new(self.as_ref(), Bar::<SIZE>::new(self, bar, name))
410406
}
411407

412408
/// Mapps an entire PCI-BAR after performing a region-request on it.
413-
pub fn iomap_region(&self, bar: u32, name: &CStr) -> Result<Devres<Bar>> {
409+
pub fn iomap_region<'a>(
410+
&'a self,
411+
bar: u32,
412+
name: &'a CStr,
413+
) -> impl PinInit<Devres<Bar>, Error> + 'a {
414414
self.iomap_region_sized::<0>(bar, name)
415415
}
416416
}

0 commit comments

Comments
 (0)