Skip to content

Commit e131a78

Browse files
authored
TDX: Pull TLB flush ringbuf struct into support/, add tests (#1387)
The added tests use [loom](https://docs.rs/loom/0.7.2/loom/) to ensure that all possible order of operations work correctly. This required tweaking the orderings we use. Can be reviewed commit-by-commit.
1 parent 40aff86 commit e131a78

File tree

9 files changed

+331
-99
lines changed

9 files changed

+331
-99
lines changed

.config/nextest.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,11 @@ fail-fast = false
2828
path = "junit.xml"
2929
store-success-output = "true"
3030

31+
[[profile.ci.overrides]]
32+
# allow loom based tests more time, as they take a while
33+
filter = 'test(loom)'
34+
slow-timeout = { period = "30s", terminate-after = 2 }
35+
3136
[[profile.ci.overrides]]
3237
# use fuzzy-matching for the package() to allow out-of-tree tests to use the
3338
# same profile

Cargo.lock

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,16 @@ version = "1.1.2"
197197
source = "registry+https://github.com/rust-lang/crates.io-index"
198198
checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
199199

200+
[[package]]
201+
name = "atomic_ringbuf"
202+
version = "0.0.0"
203+
dependencies = [
204+
"cfg-if",
205+
"inspect",
206+
"loom",
207+
"parking_lot",
208+
]
209+
200210
[[package]]
201211
name = "autocfg"
202212
version = "1.4.0"
@@ -7963,6 +7973,7 @@ dependencies = [
79637973
"aarch64defs",
79647974
"aarch64emu",
79657975
"anyhow",
7976+
"atomic_ringbuf",
79667977
"bitfield-struct 0.10.1",
79677978
"bitvec",
79687979
"build_rs_guest_arch",

Cargo.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ pipette_protocol = { path = "petri/pipette_protocol" }
9090
# support crates
9191
address_filter = { path = "support/address_filter" }
9292
arc_cyclic_builder = { path = "support/arc_cyclic_builder" }
93+
atomic_ringbuf = { path = "support/atomic_ringbuf" }
9394
cache_topology = { path = "support/cache_topology" }
9495
closeable_mutex = { path = "support/closeable_mutex" }
9596
ci_logger = { path = "support/ci_logger" }
@@ -463,6 +464,7 @@ libfuzzer-sys = "0.4"
463464
libtest-mimic = "0.8"
464465
linkme = "0.3.9"
465466
log = "0.4"
467+
loom = "0.7.2"
466468
macaddr = "1.0"
467469
mbrman = "0.5"
468470
mimalloc = { version = "0.1.39", default-features = false }
@@ -596,6 +598,10 @@ lto = 'fat'
596598
[profile.dev.package.minimal_rt]
597599
opt-level = 1
598600

601+
# Optimize loom even in the dev profile, as otherwise tests just take too long.
602+
[profile.dev.package.loom]
603+
opt-level = 3
604+
599605
[patch.crates-io]
600606
# Pending <https://github.com/ferrilab/bitvec/pull/273>
601607
bitvec = { git = "https://github.com/smalis-msft/bitvec", branch = "set-aliased-previous-val" }

openhcl/virt_mshv_vtl/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ vmcore.workspace = true
3030
x86defs.workspace = true
3131
x86emu.workspace = true
3232

33+
atomic_ringbuf.workspace = true
3334
inspect_counters.workspace = true
3435
inspect = { workspace = true, features = ["std"] }
3536
mesh.workspace = true

openhcl/virt_mshv_vtl/src/processor/tdx/tlb_flush.rs

Lines changed: 8 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,15 @@
55
66
use crate::TdxBacked;
77
use crate::UhProcessor;
8+
use atomic_ringbuf::AtomicRingBuffer;
89
use hcl::GuestVtl;
910
use hcl::ioctl::ProcessorRunner;
1011
use hcl::ioctl::tdx::Tdx;
1112
use hvdef::hypercall::HvGvaRange;
1213
use inspect::Inspect;
13-
use parking_lot::Mutex;
14-
use parking_lot::MutexGuard;
1514
use safeatomic::AtomicSliceOps;
1615
use std::num::Wrapping;
1716
use std::sync::atomic::AtomicU32;
18-
use std::sync::atomic::AtomicU64;
19-
use std::sync::atomic::AtomicUsize;
2017
use std::sync::atomic::Ordering;
2118
use x86defs::tdx::TdGlaVmAndFlags;
2219
use x86defs::tdx::TdxGlaListInfo;
@@ -29,7 +26,7 @@ pub(super) const FLUSH_GVA_LIST_SIZE: usize = 32;
2926
#[derive(Debug, Inspect)]
3027
pub(super) struct TdxPartitionFlushState {
3128
/// A fixed-size ring buffer of GVAs that need to be flushed.
32-
pub(super) gva_list: AtomicTlbRingBuffer,
29+
pub(super) gva_list: AtomicRingBuffer<FLUSH_GVA_LIST_SIZE, HvGvaRange>,
3330
/// The number of times an entire TLB flush has been requested.
3431
pub(super) flush_entire_counter: AtomicU32,
3532
/// The number of times a non-global TLB flush has been requested.
@@ -40,7 +37,7 @@ pub(super) struct TdxPartitionFlushState {
4037
impl TdxPartitionFlushState {
4138
pub(super) fn new() -> Self {
4239
Self {
43-
gva_list: AtomicTlbRingBuffer::new(),
40+
gva_list: AtomicRingBuffer::new(),
4441
flush_entire_counter: AtomicU32::new(0),
4542
flush_entire_non_global_counter: AtomicU32::new(0),
4643
}
@@ -112,7 +109,7 @@ impl UhProcessor<'_, TdxBacked> {
112109
if flush_entire_required {
113110
self_flush_state.flush_entire_counter = Wrapping(partition_flush_entire);
114111
self_flush_state.flush_entire_non_global_counter = Wrapping(partition_flush_non_global);
115-
self_flush_state.gva_list_count = Wrapping(partition_flush_state.gva_list.count());
112+
self_flush_state.gva_list_count = partition_flush_state.gva_list.count();
116113
Self::set_flush_entire(
117114
true,
118115
&mut self.backing.vtls[target_vtl].private_regs.vp_entry_flags,
@@ -139,7 +136,7 @@ impl UhProcessor<'_, TdxBacked> {
139136
flush_page: &user_driver::memory::MemoryBlock,
140137
) -> bool {
141138
// Check quickly to see whether any new addresses are in the list.
142-
let partition_list_count = Wrapping(partition_flush_state.gva_list.count());
139+
let partition_list_count = partition_flush_state.gva_list.count();
143140
if partition_list_count == *gva_list_count {
144141
return true;
145142
}
@@ -151,10 +148,10 @@ impl UhProcessor<'_, TdxBacked> {
151148
}
152149

153150
// The last `count_diff` addresses are the new ones, copy them locally.
154-
let flush_addrs = &mut [0; FLUSH_GVA_LIST_SIZE][..count_diff];
151+
let flush_addrs = &mut [HvGvaRange(0); FLUSH_GVA_LIST_SIZE][..count_diff];
155152
if !partition_flush_state
156153
.gva_list
157-
.try_copy(*gva_list_count, flush_addrs)
154+
.try_copy(gva_list_count.0, flush_addrs)
158155
{
159156
return false;
160157
}
@@ -165,7 +162,7 @@ impl UhProcessor<'_, TdxBacked> {
165162

166163
if count_diff == 1 {
167164
runner
168-
.invgla(gla_flags, TdxGlaListInfo::from(flush_addrs[0]))
165+
.invgla(gla_flags, TdxGlaListInfo::from(flush_addrs[0].0))
169166
.unwrap();
170167
} else {
171168
gla_flags.set_list(true);
@@ -198,91 +195,3 @@ impl UhProcessor<'_, TdxBacked> {
198195
}
199196
}
200197
}
201-
202-
#[derive(Debug, Inspect)]
203-
pub(super) struct AtomicTlbRingBuffer {
204-
/// The contents of the buffer.
205-
#[inspect(hex, with = "|x| inspect::iter_by_index(x.iter())")]
206-
buffer: Box<[AtomicU64; FLUSH_GVA_LIST_SIZE]>,
207-
/// The number of GVAs that have been added over the lifetime of the VM.
208-
gva_list_count: AtomicUsize,
209-
/// The number of GVAs that have started being added to the list over the
210-
/// lifetime of the VM.
211-
in_progress_count: AtomicUsize,
212-
/// A guard to ensure that only one thread is writing to the list at a time.
213-
write_lock: Mutex<()>,
214-
}
215-
216-
pub(super) struct AtomicTlbRingBufferWriteGuard<'a> {
217-
buf: &'a AtomicTlbRingBuffer,
218-
_write_lock: MutexGuard<'a, ()>,
219-
}
220-
221-
impl AtomicTlbRingBuffer {
222-
fn new() -> Self {
223-
Self {
224-
buffer: Box::new(std::array::from_fn(|_| AtomicU64::new(0))),
225-
gva_list_count: AtomicUsize::new(0),
226-
in_progress_count: AtomicUsize::new(0),
227-
write_lock: Mutex::new(()),
228-
}
229-
}
230-
231-
fn count(&self) -> usize {
232-
self.gva_list_count.load(Ordering::Acquire)
233-
}
234-
235-
pub fn write(&self) -> AtomicTlbRingBufferWriteGuard<'_> {
236-
let write_lock = self.write_lock.lock();
237-
AtomicTlbRingBufferWriteGuard {
238-
buf: self,
239-
_write_lock: write_lock,
240-
}
241-
}
242-
243-
fn try_copy(&self, start_count: Wrapping<usize>, flush_addrs: &mut [u64]) -> bool {
244-
let mut index = start_count;
245-
for flush_addr in flush_addrs.iter_mut() {
246-
*flush_addr = self.buffer[index.0 % FLUSH_GVA_LIST_SIZE].load(Ordering::Relaxed);
247-
index += 1;
248-
}
249-
std::sync::atomic::fence(Ordering::Acquire);
250-
251-
// Check to see whether any additional entries have been added
252-
// that would have caused a wraparound. If so, the local list is
253-
// incomplete and the copy has failed.
254-
if (Wrapping(self.in_progress_count.load(Ordering::Acquire)) - start_count).0
255-
> FLUSH_GVA_LIST_SIZE
256-
{
257-
return false;
258-
}
259-
true
260-
}
261-
}
262-
263-
impl AtomicTlbRingBufferWriteGuard<'_> {
264-
pub fn extend(&self, items: impl ExactSizeIterator<Item = HvGvaRange>) {
265-
debug_assert_eq!(
266-
self.buf.in_progress_count.load(Ordering::Relaxed),
267-
self.buf.gva_list_count.load(Ordering::Relaxed)
268-
);
269-
// Adding a new item to the buffer must be done in three steps:
270-
// 1. Indicate that an entry is about to be added so that any flush
271-
// code executing simultaneously will know that it might lose an
272-
// entry that it is expecting to see.
273-
// 2. Add the entry.
274-
// 3. Increment the valid entry count so that any flush code executing
275-
// simultaneously will know it is valid.
276-
let len = items.len();
277-
let start_count = self.buf.in_progress_count.load(Ordering::Relaxed);
278-
let end_count = start_count.wrapping_add(len);
279-
self.buf
280-
.in_progress_count
281-
.store(end_count, Ordering::Relaxed);
282-
for (i, v) in items.enumerate() {
283-
self.buf.buffer[(start_count.wrapping_add(i)) % FLUSH_GVA_LIST_SIZE]
284-
.store(v.0, Ordering::Release);
285-
}
286-
self.buf.gva_list_count.store(end_count, Ordering::Release);
287-
}
288-
}

support/atomic_ringbuf/Cargo.toml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Copyright (c) Microsoft Corporation.
2+
# Licensed under the MIT License.
3+
4+
[package]
5+
name = "atomic_ringbuf"
6+
rust-version.workspace = true
7+
edition.workspace = true
8+
9+
[dependencies]
10+
cfg-if.workspace = true
11+
inspect.workspace = true
12+
parking_lot.workspace = true
13+
14+
[target.'cfg(target_arch = "x86_64")'.dev-dependencies]
15+
loom.workspace = true
16+
17+
[lints]
18+
workspace = true

0 commit comments

Comments
 (0)