Skip to content

Commit

Permalink
Bug fix #1096 - Prevent mass detach based on source PCI address
Browse files Browse the repository at this point in the history
Before this commit, Kimchi was mass detaching PCI devices when
trying to detach a single dev 'hostdev0' following this criteria:

- using the source address element, see all hostdevs that share
the same PCI address (domain x bus x slot) as 'hostdev0';

- if more than one is found, 'hostdev0' is considered to be
a multifunction device an a mass detachment of all devices found
is made.

This logic is flawed when considering SR-IOV devices (CX4 cards
for example). Although all virtual functions share the same
source PCI address, each VF should be attached and detached
individually.

Following the current libvirt documentation
(https://libvirt.org/formatdomain.html), there is an attribute
called 'multifunction' that can be used:

'Also available is the multifunction attribute, which controls
turning on the multifunction bit for a particular slot/function
in the PCI control register (...) multifunction defaults to 'off',
but should be set to 'on' for function 0 of a slot that will have
multiple functions used.'

Further experimentation showed that, in multifunction devices,
the device PCI address differs only in the 'function' attribute,
while in the VFs for example they are different. This means that
the device address is a better place to look for paired devices
instead of using the source PCI address.

This patch implements these changes to fix #1096: use the multifunction
attribute to determine if a hostdev is multifuction and changed
the 'get_devices_same_addr' method to use the device address instead
of the source PCI address as comparison.

This patch also makes changes on the work done in bug fix #1073.
The re-attach of the recently detached devices is not necessary
when the device was set to 'managed' mode. In fact, re-attaching
VFs set with managed='yes' was causing libvirt instability. This patch
adds this verification and avoids the re-attach in these cases.

Minor simplications of vmhostdevs.py code were also made, such
as use a dict instead of an array of tuples to easily retrieve
elements by a key and moved the NotFound verification up in
the '_detach_device' method.

Unit tests included.

Signed-off-by: Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com>
  • Loading branch information
danielhb authored and alinefm committed Jan 30, 2017
1 parent e24dfb5 commit 08571fe
Show file tree
Hide file tree
Showing 2 changed files with 249 additions and 49 deletions.
134 changes: 85 additions & 49 deletions model/vmhostdevs.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,14 +635,24 @@ def _event_devices(self, conn, dom, alias, opaque):
wok_log.error('opaque must be valid')
return

wok_log.info("Device %s removed successfuly" % alias)
# Re-attach device to host
try:
dev = conn.get().nodeDeviceLookupByName(alias)
dev.reAttach()
except libvirt.libvirtError, e:
wok_log.error("Unable to attach device %s back to host. Error: %s",
alias, e.message)
wok_log.info("Device %s removed successfully" % alias)

# Re-attach device to host if it's not managed mode
if not opaque._managed:
try:
dev = conn.get().nodeDeviceLookupByName(alias)
dev.reAttach()
except libvirt.libvirtError, e:
wok_log.error(
"Unable to attach device %s back to host. Error: %s",
alias, e.message
)
else:
wok_log.info(
"Device %s was attached in 'managed' mode. "
"Skipping re-attach()." % alias
)

opaque._cb('OK', True)

def _detach_device(self, cb, params):
Expand All @@ -655,18 +665,27 @@ def _detach_device(self, cb, params):
lock = params['lock']

with lock:
pci_devs = [(DeviceModel.deduce_dev_name(e, self.conn), e)
for e in hostdev if e.attrib['type'] == 'pci']
pci_devs = {DeviceModel.deduce_dev_name(e, self.conn): e
for e in hostdev if e.attrib['type'] == 'pci'}

dev_info = self.dev_model.lookup(dev_name)
is_3D_device = self.dev_model.is_device_3D_controller(dev_info)
if is_3D_device and DOM_STATE_MAP[dom.info()[0]] != "shutoff":
raise InvalidOperation('KCHVMHDEV0006E',
{'name': dev_info['name']})

if not pci_devs.get(dev_name):
raise NotFoundError('KCHVMHDEV0001E',
{'vmid': vmid, 'dev_name': dev_name})

dev_name_elem = pci_devs[dev_name]
self._managed = dev_name_elem.get('managed', 'no') == 'yes'

# check for multifunction and detach all functions together
try:
multi = self._unplug_multifunction_pci(dom, hostdev, dev_name)
multi = self.unplug_multifunction_pci(
dom, hostdev, dev_name_elem
)
except libvirt.libvirtError:
multi = False

Expand All @@ -680,54 +699,71 @@ def _detach_device(self, cb, params):
cb('OK', True)
return

# detach each function individually
for e in hostdev:
if DeviceModel.deduce_dev_name(e, self.conn) == dev_name:
xmlstr = etree.tostring(e)
dom.detachDeviceFlags(
xmlstr, get_vm_config_flag(dom, mode='all'))
if e.attrib['type'] == 'pci':
self._delete_affected_pci_devices(dom, dev_name,
pci_devs)
if is_3D_device:
devsmodel = VMHostDevsModel(conn=self.conn)
devsmodel.update_mmio_guest(vmid, False)
break
else:
msg = WokMessage('KCHVMHDEV0001E',
{'vmid': vmid, 'dev_name': dev_name})
cb(msg.get_text(), False)
raise NotFoundError('KCHVMHDEV0001E',
{'vmid': vmid, 'dev_name': dev_name})
# detach individually
xmlstr = etree.tostring(dev_name_elem)
dom.detachDeviceFlags(
xmlstr, get_vm_config_flag(dom, mode='all'))
if dev_name_elem.attrib['type'] == 'pci':
self._delete_affected_pci_devices(dom, dev_name,
pci_devs)
if is_3D_device:
devsmodel = VMHostDevsModel(conn=self.conn)
devsmodel.update_mmio_guest(vmid, False)

if DOM_STATE_MAP[dom.info()[0]] == "shutoff":
cb('OK', True)

def _get_devices_same_addr(self, hostdev, domain, bus, slot):
def get_devices_same_addr(self, hostdevs, device_elem):
def elem_has_valid_address(elem):
if elem.get('type') != 'pci' or \
elem.address is None or \
elem.address.get('domain') is None or \
elem.address.get('bus') is None or \
elem.address.get('slot') is None:
return False
return True

if not elem_has_valid_address(device_elem):
return []

devices = []
for device in hostdev:
if device.attrib['type'] != 'pci':
continue
device_domain = device_elem.address.get('domain')
device_bus = device_elem.address.get('bus')
device_slot = device_elem.address.get('slot')

address = device.source.address
if int(address.attrib['domain'], 16) != domain or \
int(address.attrib['bus'], 16) != bus or \
int(address.attrib['slot'], 16) != slot:
for dev in hostdevs:

if not elem_has_valid_address(dev):
continue

devices.append(etree.tostring(device))
dev_domain = dev.address.get('domain')
dev_bus = dev.address.get('bus')
dev_slot = dev.address.get('slot')

if dev_domain == device_domain and dev_bus == device_bus and \
dev_slot == device_slot:
devices.append(etree.tostring(dev))

return devices

def _unplug_multifunction_pci(self, dom, hostdev, dev_name):
# get all devices attached to the guest in the same domain+bus+slot
# that the one we are going to detach because they must be detached
# together
domain, bus, slot, _ = dev_name.split('_')[1:]
devices = self._get_devices_same_addr(hostdev,
int(domain, 16),
int(bus, 16),
int(slot, 16))
def is_hostdev_multifunction(self, dev_elem):
if dev_elem.address is None or \
dev_elem.address.get('multifunction') is None or \
dev_elem.address.get('function') is None:

return False

is_multi = dev_elem.address.get('multifunction') == 'on' and \
dev_elem.address.get('function') == '0x0'

return is_multi

def unplug_multifunction_pci(self, dom, hostdevs, dev_elem):
if not self.is_hostdev_multifunction(dev_elem):
return False

devices = self.get_devices_same_addr(hostdevs, dev_elem)

if len(devices) <= 1:
return False

Expand All @@ -747,7 +783,7 @@ def _delete_affected_pci_devices(self, dom, dev_name, pci_devs):
DevicesModel(
conn=self.conn).get_list(_passthrough_affected_by=dev_name))

for pci_name, e in pci_devs:
for pci_name, e in pci_devs.iteritems():
if pci_name in affected_names:
xmlstr = etree.tostring(e)
dom.detachDeviceFlags(
Expand Down
164 changes: 164 additions & 0 deletions tests/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
import time
import unittest

from lxml import objectify

import tests.utils as utils

import wok.objectstore
Expand Down Expand Up @@ -1783,6 +1785,168 @@ def test_pci_hotplug_requires_usb_controller(self):
self.assertTrue(
inst.vmhostdevs_have_usb_controller('kimchi-vm1'))

def get_hostdevs_xml(self):
return """\
<domain type='kvm' id='N'>
<name>vm_name</name>
<devices>
<emulator>/usr/bin/qemu-kvm</emulator>
<hostdev mode='subsystem' type='pci' managed='yes'>
<driver name='vfio'/>
<source>
<address domain='0x0001' bus='0x0d' slot='0x00' function='0x1'/>
</source>
<alias name='hostdev0'/>
<address type='pci' domain='0x0000' bus='0x00' slot='0x05' \
function='0x1'/>
</hostdev>
<hostdev mode='subsystem' type='pci' managed='yes'>
<driver name='vfio'/>
<source>
<address domain='0x0001' bus='0x0d' slot='0x00' function='0x0'/>
</source>
<alias name='hostdev1'/>
<address type='pci' domain='0x0000' bus='0x00' slot='0x05' \
function='0x0' multifunction='on'/>
</hostdev>
<hostdev mode='subsystem' type='pci' managed='yes'>
<driver name='vfio'/>
<source>
<address domain='0x0001' bus='0x0d' slot='0x00' function='0x2'/>
</source>
<alias name='hostdev2'/>
<address type='pci' domain='0x0000' bus='0x00' slot='0x06' \
function='0x0'/>
</hostdev>
<hostdev mode='subsystem' type='pci' managed='yes'>
<driver name='vfio'/>
<source>
<address domain='0x0001' bus='0x0d' slot='0x00' function='0x4'/>
</source>
<alias name='hostdev3'/>
<address type='pci' domain='0x0000' bus='0x00' slot='0x07' \
function='0x0'/>
</hostdev>
<hostdev mode='subsystem' type='pci' managed='yes'>
<driver name='vfio'/>
<source>
<address domain='0x0001' bus='0x0d' slot='0x00' function='0x5'/>
</source>
<alias name='hostdev4'/>
<address type='pci' domain='0x0000' bus='0x00' slot='0x08' \
function='0x0'/>
</hostdev>
</devices>
</domain>
"""

def get_hostdev_multifunction_xml(self):
return """\
<hostdev mode='subsystem' type='pci' managed='yes'>
<driver name='vfio'/>
<source>
<address domain='0x0001' bus='0x0d' slot='0x00' function='0x0'/>
</source>
<alias name='hostdev1'/>
<address type='pci' domain='0x0000' bus='0x00' slot='0x05' function='0x0' \
multifunction='on'/>
</hostdev>
"""

def get_hostdev_nomultifunction_xml(self):
return """\
<hostdev mode='subsystem' type='pci' managed='yes'>
<driver name='vfio'/>
<source>
<address domain='0x0001' bus='0x0d' slot='0x00' function='0x5'/>
</source>
<alias name='hostdev4'/>
<address type='pci' domain='0x0000' bus='0x00' slot='0x08' function='0x0'/>
</hostdev>
"""

def test_vmhostdev_is_hostdev_multifunction(self):
inst = model.Model(None, objstore_loc=self.tmp_store)

hostdev_multi_elem = objectify.fromstring(
self.get_hostdev_multifunction_xml()
)
self.assertTrue(
inst.vmhostdev_is_hostdev_multifunction(hostdev_multi_elem)
)

hostdev_nomulti_elem = objectify.fromstring(
self.get_hostdev_nomultifunction_xml()
)
self.assertFalse(
inst.vmhostdev_is_hostdev_multifunction(hostdev_nomulti_elem)
)

def test_vmhostdev_get_devices_same_addr(self):
inst = model.Model(None, objstore_loc=self.tmp_store)

root = objectify.fromstring(self.get_hostdevs_xml())
hostdevs = root.devices.hostdev

hostdev_multi_elem = objectify.fromstring(
self.get_hostdev_multifunction_xml()
)

hostdev_same_addr_str = """\
<hostdev mode="subsystem" type="pci" managed="yes"><driver name="vfio"/>\
<source><address domain="0x0001" bus="0x0d" slot="0x00" function="0x1"/>\
</source><alias name="hostdev0"/>\
<address type="pci" domain="0x0000" bus="0x00" slot="0x05" function="0x1"/>\
</hostdev>"""
same_addr_devices = [
ET.tostring(hostdev_multi_elem), hostdev_same_addr_str
]

self.assertItemsEqual(
same_addr_devices,
inst.vmhostdev_get_devices_same_addr(hostdevs, hostdev_multi_elem)
)

nomatch_elem = objectify.fromstring(
self.get_hostdev_nomultifunction_xml()
)

self.assertEqual(
inst.vmhostdev_get_devices_same_addr(hostdevs, nomatch_elem),
[ET.tostring(nomatch_elem)]
)

@mock.patch('wok.plugins.kimchi.model.vmhostdevs.get_vm_config_flag')
def test_vmhostdev_unplug_multifunction_pci(self, mock_conf_flag):
class FakeDom():
def detachDeviceFlags(self, xml, config_flag):
pass

mock_conf_flag.return_value = ''

inst = model.Model(None, objstore_loc=self.tmp_store)

root = objectify.fromstring(self.get_hostdevs_xml())
hostdevs = root.devices.hostdev

hostdev_multi_elem = objectify.fromstring(
self.get_hostdev_multifunction_xml()
)

self.assertTrue(
inst.vmhostdev_unplug_multifunction_pci(FakeDom(), hostdevs,
hostdev_multi_elem)
)

nomatch_elem = objectify.fromstring(
self.get_hostdev_nomultifunction_xml()
)

self.assertFalse(
inst.vmhostdev_unplug_multifunction_pci(FakeDom(), hostdevs,
nomatch_elem)
)


class BaseModelTests(unittest.TestCase):
class FoosModel(object):
Expand Down

0 comments on commit 08571fe

Please sign in to comment.