Skip to content

Commit 7b4b0e9

Browse files
Vijay Mohan Pandarathilawilliam
Vijay Mohan Pandarathil
authored andcommitted
vfio: QEMU-AER: Qemu changes to support AER for VFIO-PCI devices
Add support for error containment when a VFIO device assigned to a KVM guest encounters an error. This is for PCIe devices/drivers that support AER functionality. When the host OS is notified of an error in a device either through the firmware first approach or through an interrupt handled by the AER root port driver, the error handler registered by the vfio-pci driver gets invoked. The qemu process is signaled through an eventfd registered per VFIO device by the qemu process. In the eventfd handler, qemu decides on what action to take. In this implementation, guest is brought down to contain the error. The kernel patches for the above functionality has been already accepted. This is a refresh of the QEMU patch which was reviewed earlier. http://marc.info/?l=linux-kernel&m=136281557608087&w=2 This patch has the same contents and has been built after refreshing to latest upstream and after the linux headers have been updated in qemu. - Create eventfd per vfio device assigned to a guest and register an event handler - This fd is passed to the vfio_pci driver through the SET_IRQ ioctl - When the device encounters an error, the eventfd is signalled and the qemu eventfd handler gets invoked. - In the handler decide what action to take. Current action taken is to stop the guest. Signed-off-by: Vijay Mohan Pandarathil <vijaymohan.pandarathil@hp.com> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
1 parent 39360f0 commit 7b4b0e9

File tree

1 file changed

+125
-0
lines changed

1 file changed

+125
-0
lines changed

hw/misc/vfio.c

+125
Original file line numberDiff line numberDiff line change
@@ -175,13 +175,15 @@ typedef struct VFIODevice {
175175
PCIHostDeviceAddress host;
176176
QLIST_ENTRY(VFIODevice) next;
177177
struct VFIOGroup *group;
178+
EventNotifier err_notifier;
178179
uint32_t features;
179180
#define VFIO_FEATURE_ENABLE_VGA_BIT 0
180181
#define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
181182
int32_t bootindex;
182183
uint8_t pm_cap;
183184
bool reset_works;
184185
bool has_vga;
186+
bool pci_aer;
185187
} VFIODevice;
186188

187189
typedef struct VFIOGroup {
@@ -2766,6 +2768,7 @@ static int vfio_get_device(VFIOGroup *group, const char *name, VFIODevice *vdev)
27662768
{
27672769
struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
27682770
struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) };
2771+
struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) };
27692772
int ret, i;
27702773

27712774
ret = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
@@ -2909,6 +2912,19 @@ static int vfio_get_device(VFIOGroup *group, const char *name, VFIODevice *vdev)
29092912

29102913
vdev->has_vga = true;
29112914
}
2915+
irq_info.index = VFIO_PCI_ERR_IRQ_INDEX;
2916+
2917+
ret = ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info);
2918+
if (ret) {
2919+
/* This can fail for an old kernel or legacy PCI dev */
2920+
DPRINTF("VFIO_DEVICE_GET_IRQ_INFO failure ret=%d\n", ret);
2921+
ret = 0;
2922+
} else if (irq_info.count == 1) {
2923+
vdev->pci_aer = true;
2924+
} else {
2925+
error_report("vfio: Warning: "
2926+
"Could not enable error recovery for the device\n");
2927+
}
29122928

29132929
error:
29142930
if (ret) {
@@ -2931,6 +2947,113 @@ static void vfio_put_device(VFIODevice *vdev)
29312947
}
29322948
}
29332949

2950+
static void vfio_err_notifier_handler(void *opaque)
2951+
{
2952+
VFIODevice *vdev = opaque;
2953+
2954+
if (!event_notifier_test_and_clear(&vdev->err_notifier)) {
2955+
return;
2956+
}
2957+
2958+
/*
2959+
* TBD. Retrieve the error details and decide what action
2960+
* needs to be taken. One of the actions could be to pass
2961+
* the error to the guest and have the guest driver recover
2962+
* from the error. This requires that PCIe capabilities be
2963+
* exposed to the guest. For now, we just terminate the
2964+
* guest to contain the error.
2965+
*/
2966+
2967+
error_report("%s (%04x:%02x:%02x.%x)"
2968+
"Unrecoverable error detected...\n"
2969+
"Please collect any data possible and then kill the guest",
2970+
__func__, vdev->host.domain, vdev->host.bus,
2971+
vdev->host.slot, vdev->host.function);
2972+
2973+
vm_stop(RUN_STATE_IO_ERROR);
2974+
}
2975+
2976+
/*
2977+
* Registers error notifier for devices supporting error recovery.
2978+
* If we encounter a failure in this function, we report an error
2979+
* and continue after disabling error recovery support for the
2980+
* device.
2981+
*/
2982+
static void vfio_register_err_notifier(VFIODevice *vdev)
2983+
{
2984+
int ret;
2985+
int argsz;
2986+
struct vfio_irq_set *irq_set;
2987+
int32_t *pfd;
2988+
2989+
if (!vdev->pci_aer) {
2990+
return;
2991+
}
2992+
2993+
if (event_notifier_init(&vdev->err_notifier, 0)) {
2994+
error_report("vfio: Warning: "
2995+
"Unable to init event notifier for error detection\n");
2996+
vdev->pci_aer = false;
2997+
return;
2998+
}
2999+
3000+
argsz = sizeof(*irq_set) + sizeof(*pfd);
3001+
3002+
irq_set = g_malloc0(argsz);
3003+
irq_set->argsz = argsz;
3004+
irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
3005+
VFIO_IRQ_SET_ACTION_TRIGGER;
3006+
irq_set->index = VFIO_PCI_ERR_IRQ_INDEX;
3007+
irq_set->start = 0;
3008+
irq_set->count = 1;
3009+
pfd = (int32_t *)&irq_set->data;
3010+
3011+
*pfd = event_notifier_get_fd(&vdev->err_notifier);
3012+
qemu_set_fd_handler(*pfd, vfio_err_notifier_handler, NULL, vdev);
3013+
3014+
ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
3015+
if (ret) {
3016+
error_report("vfio: Failed to set up error notification\n");
3017+
qemu_set_fd_handler(*pfd, NULL, NULL, vdev);
3018+
event_notifier_cleanup(&vdev->err_notifier);
3019+
vdev->pci_aer = false;
3020+
}
3021+
g_free(irq_set);
3022+
}
3023+
3024+
static void vfio_unregister_err_notifier(VFIODevice *vdev)
3025+
{
3026+
int argsz;
3027+
struct vfio_irq_set *irq_set;
3028+
int32_t *pfd;
3029+
int ret;
3030+
3031+
if (!vdev->pci_aer) {
3032+
return;
3033+
}
3034+
3035+
argsz = sizeof(*irq_set) + sizeof(*pfd);
3036+
3037+
irq_set = g_malloc0(argsz);
3038+
irq_set->argsz = argsz;
3039+
irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
3040+
VFIO_IRQ_SET_ACTION_TRIGGER;
3041+
irq_set->index = VFIO_PCI_ERR_IRQ_INDEX;
3042+
irq_set->start = 0;
3043+
irq_set->count = 1;
3044+
pfd = (int32_t *)&irq_set->data;
3045+
*pfd = -1;
3046+
3047+
ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
3048+
if (ret) {
3049+
error_report("vfio: Failed to de-assign error fd: %d\n", ret);
3050+
}
3051+
g_free(irq_set);
3052+
qemu_set_fd_handler(event_notifier_get_fd(&vdev->err_notifier),
3053+
NULL, NULL, vdev);
3054+
event_notifier_cleanup(&vdev->err_notifier);
3055+
}
3056+
29343057
static int vfio_initfn(PCIDevice *pdev)
29353058
{
29363059
VFIODevice *pvdev, *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
@@ -3063,6 +3186,7 @@ static int vfio_initfn(PCIDevice *pdev)
30633186
}
30643187

30653188
add_boot_device_path(vdev->bootindex, &pdev->qdev, NULL);
3189+
vfio_register_err_notifier(vdev);
30663190

30673191
return 0;
30683192

@@ -3082,6 +3206,7 @@ static void vfio_exitfn(PCIDevice *pdev)
30823206
VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
30833207
VFIOGroup *group = vdev->group;
30843208

3209+
vfio_unregister_err_notifier(vdev);
30853210
pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
30863211
vfio_disable_interrupts(vdev);
30873212
if (vdev->intx.mmap_timer) {

0 commit comments

Comments
 (0)