From a88ce6f961c51d37c731f0ffd6d66745e5894ef5 Mon Sep 17 00:00:00 2001 From: Changyuan Lyu Date: Sun, 24 Nov 2024 11:18:43 -0800 Subject: [PATCH] vfio msi cap Signed-off-by: Changyuan Lyu --- alioth/src/vfio/pci.rs | 150 +++++++++++++++++++++++++---------------- 1 file changed, 91 insertions(+), 59 deletions(-) diff --git a/alioth/src/vfio/pci.rs b/alioth/src/vfio/pci.rs index b34cb0e..7848ec7 100644 --- a/alioth/src/vfio/pci.rs +++ b/alioth/src/vfio/pci.rs @@ -14,6 +14,7 @@ use std::cmp::min; use std::fs::File; +use std::iter::zip; use std::mem::size_of; use std::ops::Range; use std::os::fd::{AsFd, AsRawFd}; @@ -34,7 +35,7 @@ use crate::mem::mapped::ArcMemPages; use crate::mem::{IoRegion, MemRange, MemRegion, MemRegionEntry, MemRegionType}; use crate::pci::cap::{ MsiCapHdr, MsiMsgCtrl, MsixCap, MsixCapMmio, MsixTableEntry, MsixTableMmio, MsixTableMmioEntry, - PciCapHdr, PciCapId, + NullCap, PciCapHdr, PciCapId, }; use crate::pci::config::{ Command, CommonHeader, ConfigHeader, DeviceHeader, EmulatedHeader, HeaderData, HeaderType, @@ -350,12 +351,14 @@ where { pub fn new(name: Arc, dev: D, msi_sender: M) -> Result> { let cdev = Arc::new(VfioDev { dev, name }); + cdev.dev.reset()?; + let msi_sender = Arc::new(msi_sender); let region_config = cdev.dev.get_region_info(VfioPciRegion::CONFIG.raw())?; log::info!("{}: config region: {region_config:#x?}", cdev.name); - let pci_command = Command::MEM | Command::BUS_MASTER | Command::INTX_DISABLE; + let pci_command = Command::IO | Command::MEM | Command::BUS_MASTER | Command::INTX_DISABLE; cdev.dev.write( region_config.offset + CommonHeader::OFFSET_COMMAND as u64, CommonHeader::SIZE_COMMAND as u8, @@ -375,8 +378,9 @@ where dev_header.intx_pin = 0; dev_header.common.command = Command::empty(); - let mut msix_cap = None; let mut masked_caps: Vec<(u64, Box)> = vec![]; + let mut msix_info = None; + let mut msi_info = None; if dev_header.common.status.contains(Status::CAP) { let mut cap_offset = dev_header.capability_pointer as usize; @@ -396,13 +400,16 @@ where }; c.control.set_enabled(false); c.control.set_masked(false); - masked_caps.push(( - cap_offset as u64, - Box::new(MsixCapMmio { - cap: RwLock::new(c.clone()), - }), - )); - msix_cap = Some(c); + msix_info = Some((cap_offset, c.clone())); + // for testing + // msix_info = None; + // masked_caps.push(( + // cap_offset as u64, + // Box::new(NullCap { + // size: size_of_val(&c) as u8, + // next: c.header.next, + // }), + // )); } else if cap_header.id == PciCapId::Msi as u8 { let Ok((mut c, _)) = MsiCapHdr::read_from_prefix(cap_buf) else { log::error!( @@ -416,21 +423,59 @@ where c.control.set_ext_msg_data_cap(true); let multi_msg_cap = min(5, c.control.multi_msg_cap()); c.control.set_multi_msg_cap(multi_msg_cap); - let irqfds = (0..(1 << multi_msg_cap)) - .map(|_| msi_sender.create_irqfd()) - .collect::, _>>()?; - let msi_cap = MsiCapMmio:: { - cap: RwLock::new((c, MsiCapBody { data: [0; 4] })), - dev: cdev.clone(), - irqfds, - }; - log::info!("{}: created MSI cap: {msi_cap:#x?}", cdev.name); - masked_caps.push((cap_offset as u64, Box::new(msi_cap))); + msi_info = Some((cap_offset, c)); } cap_offset = cap_header.next as usize; } } + let mut msix_cap = None; + if let Some((offset, cap)) = msix_info { + msix_cap = Some(cap.clone()); + let msix_cap_mmio = MsixCapMmio { + cap: RwLock::new(cap), + }; + masked_caps.push((offset as u64, Box::new(msix_cap_mmio))); + if let Some((offset, hdr)) = msi_info { + let null_cap = NullCap { + size: hdr.control.cap_size(), + next: hdr.header.next, + }; + masked_caps.push((offset as u64, Box::new(null_cap))); + } + } else if let Some((offset, hdr)) = msi_info { + let count = 1 << hdr.control.multi_msg_cap(); + let irqfds = (0..count) + .map(|_| msi_sender.create_irqfd()) + .collect::, _>>()?; + + let mut eventfds = [-1; 32]; + for (fd, irqfd) in zip(&mut eventfds, &irqfds) { + *fd = irqfd.as_fd().as_raw_fd(); + } + let set_eventfd = VfioIrqSet { + argsz: (size_of::>() + size_of::() * count) as u32, + flags: VfioIrqSetFlag::DATA_EVENTFD | VfioIrqSetFlag::ACTION_TRIGGER, + index: VfioPciIrq::MSI.raw(), + start: 0, + count: count as u32, + data: VfioIrqSetData { eventfds }, + }; + log::info!( + "{}: update msi eventfds to {:?}", + cdev.name, + &eventfds[0..count] + ); + cdev.dev.set_irqs(&set_eventfd)?; + + let msi_cap_mmio = MsiCapMmio:: { + cap: RwLock::new((hdr, MsiCapBody { data: [0; 4] })), + dev: cdev.clone(), + irqfds, + }; + masked_caps.push((offset as u64, Box::new(msi_cap_mmio))); + } + let mut extra_areas: MmioBus> = MmioBus::new(); masked_caps.sort_by_key(|(offset, _)| *offset); let mut area_end = 0x40; @@ -462,7 +507,6 @@ where let config_header = ConfigHeader::Device(dev_header); - cdev.dev.reset()?; let msix_info = cdev.dev.get_irq_info(VfioPciIrq::MSIX.raw())?; let msix_entries = RwLock::new( (0..msix_info.count) @@ -541,15 +585,15 @@ where } fn reset(&self) -> Result<()> { - let disable_msix = VfioIrqSet { - argsz: size_of::>() as u32, - flags: VfioIrqSetFlag::DATA_NONE | VfioIrqSetFlag::ACTION_TRIGGER, - index: VfioPciIrq::MSIX.raw(), - start: 0, - count: 0, - data: VfioIrqSetData { eventfds: [] }, - }; - self.config.dev.dev.set_irqs(&disable_msix)?; + // let disable_msix = VfioIrqSet { + // argsz: size_of::>() as u32, + // flags: VfioIrqSetFlag::DATA_NONE | VfioIrqSetFlag::ACTION_TRIGGER, + // index: VfioPciIrq::MSIX.raw(), + // start: 0, + // count: 0, + // data: VfioIrqSetData { eventfds: [] }, + // }; + // self.config.dev.dev.set_irqs(&disable_msix)?; self.msix_table.reset(); self.config.dev.dev.reset() @@ -704,10 +748,6 @@ where D: Device, { fn update_msi(&self, ctrl: MsiMsgCtrl, data: &[u32; 4]) -> Result<()> { - let r = self.dev.dev.disable_all_irqs(VfioPciIrq::MSI); - if !ctrl.enable() { - return r; - } let msg_mask = if ctrl.ext_msg_data() { u32::MAX } else { @@ -724,33 +764,18 @@ where (_, false) => 0, }; let count = 1 << ctrl.multi_msg(); - let mut eventfds = [-1; 32]; - for i in 0..count { - let irqfd = &self.irqfds[i]; - let msg = msg | i as u32; + for (index, irqfd) in self.irqfds.iter().enumerate() { irqfd.set_masked(true)?; + if !ctrl.enable() || index >= count || mask & (1 << index) > 0 { + continue; + } + let msg = msg | index as u32; irqfd.set_addr_hi((addr >> 32) as u32)?; irqfd.set_addr_lo(addr as u32)?; irqfd.set_data(msg)?; - if mask & (1 << i) == 0 { - irqfd.set_masked(false)?; - } - eventfds[i] = irqfd.as_fd().as_raw_fd(); + irqfd.set_masked(false)?; } - let set_eventfd = VfioIrqSet { - argsz: (size_of::>() + size_of::() * count) as u32, - flags: VfioIrqSetFlag::DATA_EVENTFD | VfioIrqSetFlag::ACTION_TRIGGER, - index: VfioPciIrq::MSI.raw(), - start: 0, - count: count as u32, - data: VfioIrqSetData { eventfds }, - }; - log::info!( - "{}: update msi eventfds to {:?}", - self.dev.name, - &eventfds[0..count] - ); - self.dev.dev.set_irqs(&set_eventfd) + Ok(()) } } @@ -774,6 +799,11 @@ where } } fn write(&self, offset: u64, size: u8, val: u64) -> mem::Result { + log::info!( + "{}: write 0x{val:0width$x} to offset 0x{offset:x}.", + self.dev.name, + width = 2 * size as usize + ); let (hdr, body) = &mut *self.cap.write(); let mut need_update = false; match (offset as usize, size) { @@ -785,11 +815,11 @@ where ctrl.set_multi_msg(multi_msg); } need_update = ctrl.enable() != new_ctrl.enable() - || (ctrl.enable() && ctrl.ext_msg_data() != new_ctrl.ext_msg_data()); + || (new_ctrl.enable() && ctrl.ext_msg_data() != new_ctrl.ext_msg_data()); ctrl.set_ext_msg_data(new_ctrl.ext_msg_data()); ctrl.set_enable(new_ctrl.enable()); } - (0x4 | 0x8 | 0xc | 0x10, 4) => { + (0x4 | 0x8 | 0xc | 0x10, 2 | 4) => { let data_offset = (offset as usize - size_of_val(hdr)) >> 2; let reg = &mut body.data[data_offset]; need_update = hdr.control.enable() && *reg != val as u32; @@ -815,5 +845,7 @@ where D: Device, M: MsiSender, { - fn reset(&self) {} + fn reset(&self) { + // TODO + } }