Skip to content

Commit 77b606b

Browse files
authored
Merge pull request #127 from Dstack-TEE/teepod-gpu
teepod: Support for GPU device integration with CVM
2 parents f190c8c + 93c91ff commit 77b606b

File tree

15 files changed

+1180
-218
lines changed

15 files changed

+1180
-218
lines changed

Cargo.lock

Lines changed: 11 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ members = [
3535
"key-provider-client",
3636
"dstack-types",
3737
"cert-client",
38+
"lspci",
3839
]
3940
resolver = "2"
4041

@@ -60,6 +61,7 @@ load_config = { path = "load_config" }
6061
key-provider-client = { path = "key-provider-client" }
6162
dstack-types = { path = "dstack-types" }
6263
cert-client = { path = "cert-client" }
64+
lspci = { path = "lspci" }
6365

6466
# Core dependencies
6567
anyhow = "1.0.95"

lspci/Cargo.toml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
[package]
2+
name = "lspci"
3+
version.workspace = true
4+
authors.workspace = true
5+
edition.workspace = true
6+
license.workspace = true
7+
8+
[dependencies]
9+
anyhow.workspace = true
10+
11+
[dev-dependencies]
12+
insta.workspace = true

lspci/src/lib.rs

Lines changed: 305 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,305 @@
1+
use std::process::Command;
2+
3+
use anyhow::{Context, Result};
4+
5+
/// Represents a PCI device with the specified fields.
6+
#[derive(Debug)]
7+
pub struct Device {
8+
pub slot: String,
9+
pub class: String,
10+
pub class_id: String,
11+
pub description: String,
12+
pub vendor_id: String,
13+
pub product_id: String,
14+
pub control: Control,
15+
pub numa_node: Option<u32>,
16+
}
17+
18+
/// Represents the control settings of a PCI device.
19+
#[derive(Debug)]
20+
pub struct Control {
21+
pub io: bool,
22+
pub mem: bool,
23+
pub bus_master: bool,
24+
}
25+
26+
impl Device {
27+
pub fn full_product_id(&self) -> String {
28+
format!("{}:{}", self.vendor_id, self.product_id)
29+
}
30+
31+
pub fn in_use(&self) -> bool {
32+
self.control.bus_master
33+
}
34+
}
35+
36+
/// Runs `lspci` and parses the output into a vector of `Device` structs.
37+
pub fn lspci_filtered(filter: impl Fn(&Device) -> bool) -> Result<Vec<Device>> {
38+
let output = Command::new("lspci")
39+
.args(["-nn", "-vv"])
40+
.output()
41+
.context("Failed to run lspci")?;
42+
Ok(parse_lspci(
43+
&String::from_utf8(output.stdout).context("Failed to parse lspci output")?,
44+
filter,
45+
))
46+
}
47+
48+
/// Parses the `lspci` output into a vector of `Device` structs.
49+
///
50+
/// # Arguments
51+
/// * `output` - A string slice containing the `lspci` output.
52+
///
53+
/// # Returns
54+
/// A vector of `Device` structs, each representing a parsed PCI device.
55+
pub fn parse_lspci(output: &str, filter: impl Fn(&Device) -> bool) -> Vec<Device> {
56+
let lines: Vec<&str> = output.lines().collect();
57+
let mut devices = Vec::new();
58+
let mut current_device = Vec::new();
59+
60+
// Group lines into device sections
61+
for line in lines {
62+
let line = line.trim_end();
63+
if !line.is_empty() {
64+
if !line.starts_with("\t") {
65+
if !current_device.is_empty() {
66+
let device = parse_device(&current_device);
67+
if filter(&device) {
68+
devices.push(device);
69+
}
70+
current_device = Vec::new();
71+
}
72+
current_device.push(line);
73+
} else {
74+
current_device.push(line.trim_start()); // Remove leading tab
75+
}
76+
}
77+
}
78+
if !current_device.is_empty() {
79+
let device = parse_device(&current_device);
80+
if filter(&device) {
81+
devices.push(device);
82+
}
83+
}
84+
devices
85+
}
86+
87+
/// Parses a single device section into a `Device` struct.
88+
///
89+
/// # Arguments
90+
/// * `device_lines` - A slice of strings representing the lines for one device.
91+
///
92+
/// # Returns
93+
/// A `Device` struct with the parsed fields.
94+
fn parse_device(device_lines: &[&str]) -> Device {
95+
let device_line = device_lines[0];
96+
let (slot, class, class_id, description, vendor_id, product_id) =
97+
parse_device_line(device_line);
98+
99+
let mut control = Control {
100+
io: false,
101+
mem: false,
102+
bus_master: false,
103+
};
104+
let mut numa_node = None;
105+
106+
// Parse detail lines
107+
for line in device_lines.iter().skip(1) {
108+
if line.starts_with("Control: ") {
109+
control = parse_control_line(line);
110+
} else if line.starts_with("NUMA node: ") {
111+
numa_node = parse_numa_node_line(line);
112+
}
113+
}
114+
115+
Device {
116+
slot,
117+
class,
118+
class_id,
119+
description,
120+
vendor_id,
121+
product_id,
122+
control,
123+
numa_node,
124+
}
125+
}
126+
127+
/// Parses the main device line to extract slot, class, class_id, description, vendor_id, and product_id.
128+
///
129+
/// # Arguments
130+
/// * `line` - The main line of a device entry (e.g., "ff:1e.5 System peripheral [0880]: ...").
131+
///
132+
/// # Returns
133+
/// A tuple of strings containing the parsed fields.
134+
fn parse_device_line(line: &str) -> (String, String, String, String, String, String) {
135+
let words: Vec<&str> = line.split_whitespace().collect();
136+
if let Some(i) = words.iter().position(|w| w.ends_with("]:")) {
137+
let slot = words[0].to_string();
138+
let class = words[1..i].join(" ");
139+
let class_id = &words[i][1..words[i].len() - 2]; // Remove [ and ]:
140+
141+
// Look for the vendor ID and product ID pattern [vendor_id:product_id]
142+
if let Some(j) = words[i + 1..]
143+
.iter()
144+
.position(|w| w.starts_with("[") && w.contains(":") && w.ends_with("]"))
145+
{
146+
// Extract vendor name without including the "Device" keyword
147+
let vendor_end = i + 1 + j;
148+
let mut vendor_words = Vec::new();
149+
150+
#[allow(clippy::needless_range_loop)]
151+
for k in i + 1..vendor_end {
152+
// Skip the word "Device" if it's standalone
153+
if words[k] != "Device" {
154+
vendor_words.push(words[k]);
155+
}
156+
}
157+
158+
let description = vendor_words.join(" ");
159+
let id_part = &words[vendor_end][1..words[vendor_end].len() - 1]; // Remove [ and ]
160+
161+
if let Some((vendor_id, product_id)) = id_part.split_once(':') {
162+
return (
163+
slot,
164+
class,
165+
class_id.to_string(),
166+
description,
167+
vendor_id.to_string(),
168+
product_id.to_string(),
169+
);
170+
}
171+
}
172+
}
173+
// Return defaults if parsing fails
174+
(
175+
String::new(),
176+
String::new(),
177+
String::new(),
178+
String::new(),
179+
String::new(),
180+
String::new(),
181+
)
182+
}
183+
184+
/// Parses the control line to extract io, mem, and bus_master settings.
185+
///
186+
/// # Arguments
187+
/// * `line` - The control line (e.g., "Control: I/O- Mem- BusMaster- ...").
188+
///
189+
/// # Returns
190+
/// A `Control` struct with the parsed settings.
191+
fn parse_control_line(line: &str) -> Control {
192+
let mut control = Control {
193+
io: false,
194+
mem: false,
195+
bus_master: false,
196+
};
197+
if let Some(items) = line.strip_prefix("Control: ") {
198+
for item in items.split_whitespace() {
199+
match item {
200+
"I/O+" => control.io = true,
201+
"I/O-" => control.io = false,
202+
"Mem+" => control.mem = true,
203+
"Mem-" => control.mem = false,
204+
"BusMaster+" => control.bus_master = true,
205+
"BusMaster-" => control.bus_master = false,
206+
_ => {}
207+
}
208+
}
209+
}
210+
control
211+
}
212+
213+
/// Parses the NUMA node line to extract the node number.
214+
///
215+
/// # Arguments
216+
/// * `line` - The NUMA node line (e.g., "NUMA node: 1").
217+
///
218+
/// # Returns
219+
/// An `Option<u32>` with the parsed NUMA node number, or `None` if not applicable.
220+
fn parse_numa_node_line(line: &str) -> Option<u32> {
221+
if let Some(numa_str) = line.strip_prefix("NUMA node: ") {
222+
numa_str.parse::<u32>().ok()
223+
} else {
224+
None
225+
}
226+
}
227+
228+
#[test]
229+
fn test_lspci() {
230+
let lspci_output = r#"
231+
bc:02.0 PCI bridge [0604]: PMC-Sierra Inc. Device [11f8:4128] (prog-if 00 [Normal decode])
232+
Subsystem: NVIDIA Corporation Device [10de:1643]
233+
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR+ FastB2B- DisINTx+
234+
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
235+
Latency: 0, Cache Line Size: 32 bytes
236+
Interrupt: pin ? routed to IRQ 92
237+
NUMA node: 1
238+
IOMMU group: 82
239+
Bus: primary=bc, secondary=bf, subordinate=bf, sec-latency=0
240+
I/O behind bridge: 0000f000-00000fff [disabled] [32-bit]
241+
Memory behind bridge: de000000-dfffffff [size=32M] [32-bit]
242+
Prefetchable memory behind bridge: 00000000fff00000-00000000000fffff [disabled] [64-bit]
243+
Secondary status: 66MHz- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- <SERR- <PERR-
244+
BridgeCtl: Parity+ SERR+ NoISA- VGA- VGA16- MAbort- >Reset- FastB2B-
245+
PriDiscTmr- SecDiscTmr- DiscTmrStat- DiscTmrSERREn-
246+
Capabilities: <access denied>
247+
Kernel driver in use: pcieport
248+
249+
bc:03.0 PCI bridge [0604]: PMC-Sierra Inc. Device [11f8:4128] (prog-if 00 [Normal decode])
250+
Subsystem: NVIDIA Corporation Device [10de:1643]
251+
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR+ FastB2B- DisINTx+
252+
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
253+
Latency: 0, Cache Line Size: 32 bytes
254+
Interrupt: pin ? routed to IRQ 93
255+
NUMA node: 1
256+
IOMMU group: 82
257+
Bus: primary=bc, secondary=c0, subordinate=c0, sec-latency=0
258+
I/O behind bridge: 0000f000-00000fff [disabled] [32-bit]
259+
Memory behind bridge: dc000000-ddffffff [size=32M] [32-bit]
260+
Prefetchable memory behind bridge: 00000000fff00000-00000000000fffff [disabled] [64-bit]
261+
Secondary status: 66MHz- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- <SERR- <PERR-
262+
BridgeCtl: Parity+ SERR+ NoISA- VGA- VGA16- MAbort- >Reset- FastB2B-
263+
PriDiscTmr- SecDiscTmr- DiscTmrStat- DiscTmrSERREn-
264+
Capabilities: <access denied>
265+
Kernel driver in use: pcieport
266+
267+
bd:00.0 Bridge [0680]: NVIDIA Corporation GH100 [H100 NVSwitch] [10de:22a3] (rev a1)
268+
Subsystem: NVIDIA Corporation GH100 [H100 NVSwitch] [10de:1796]
269+
Physical Slot: 1-1
270+
Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR+ FastB2B- DisINTx-
271+
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
272+
Interrupt: pin A routed to IRQ 10
273+
NUMA node: 1
274+
IOMMU group: 82
275+
Region 0: Memory at e2000000 (64-bit, non-prefetchable) [disabled] [size=32M]
276+
Capabilities: <access denied>
277+
278+
be:00.0 Bridge [0680]: NVIDIA Corporation GH100 [H100 NVSwitch] [10de:22a3] (rev a1)
279+
Subsystem: NVIDIA Corporation GH100 [H100 NVSwitch] [10de:1796]
280+
Physical Slot: 2-1
281+
Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR+ FastB2B- DisINTx-
282+
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
283+
Interrupt: pin A routed to IRQ 5
284+
NUMA node: 1
285+
IOMMU group: 82
286+
Region 0: Memory at e0000000 (64-bit, non-prefetchable) [disabled] [size=32M]
287+
Capabilities: <access denied>
288+
289+
bf:00.0 Bridge [0680]: NVIDIA Corporation GH100 [H100 NVSwitch] [10de:22a3] (rev a1)
290+
Subsystem: NVIDIA Corporation GH100 [H100 NVSwitch] [10de:1796]
291+
Physical Slot: 3-1
292+
Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR+ FastB2B- DisINTx-
293+
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
294+
Interrupt: pin A routed to IRQ 11
295+
NUMA node: 1
296+
IOMMU group: 82
297+
Region 0: Memory at de000000 (64-bit, non-prefetchable) [disabled] [size=32M]
298+
Capabilities: <access denied>
299+
IOMMU group: 468
300+
"#;
301+
302+
let devices = parse_lspci(lspci_output, |_| true);
303+
insta::assert_debug_snapshot!(devices);
304+
assert_eq!(devices[0].full_product_id(), "11f8:4128");
305+
}

0 commit comments

Comments
 (0)