Skip to content

Commit 06152b8

Browse files
RocheWilliamxzpeter
authored andcommitted
migration: prevent migration when VM has poisoned memory
A memory page poisoned from the hypervisor level is no longer readable. The migration of a VM will crash Qemu when it tries to read the memory address space and stumbles on the poisoned page with a similar stack trace: Program terminated with signal SIGBUS, Bus error. #0 _mm256_loadu_si256 #1 buffer_zero_avx2 qemu#2 select_accel_fn qemu#3 buffer_is_zero qemu#4 save_zero_page qemu#5 ram_save_target_page_legacy qemu#6 ram_save_host_page qemu#7 ram_find_and_save_block qemu#8 ram_save_iterate qemu#9 qemu_savevm_state_iterate qemu#10 migration_iteration_run qemu#11 migration_thread qemu#12 qemu_thread_start To avoid this VM crash during the migration, prevent the migration when a known hardware poison exists on the VM. Signed-off-by: William Roche <william.roche@oracle.com> Link: https://lore.kernel.org/r/20240130190640.139364-2-william.roche@oracle.com Signed-off-by: Peter Xu <peterx@redhat.com>
1 parent 39a6e4f commit 06152b8

File tree

4 files changed

+28
-0
lines changed

4 files changed

+28
-0
lines changed

accel/kvm/kvm-all.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1119,6 +1119,11 @@ int kvm_vm_check_extension(KVMState *s, unsigned int extension)
11191119
return ret;
11201120
}
11211121

1122+
/*
1123+
* We track the poisoned pages to be able to:
1124+
* - replace them on VM reset
1125+
* - block a migration for a VM with a poisoned page
1126+
*/
11221127
typedef struct HWPoisonPage {
11231128
ram_addr_t ram_addr;
11241129
QLIST_ENTRY(HWPoisonPage) list;
@@ -1152,6 +1157,11 @@ void kvm_hwpoison_page_add(ram_addr_t ram_addr)
11521157
QLIST_INSERT_HEAD(&hwpoison_page_list, page, list);
11531158
}
11541159

1160+
bool kvm_hwpoisoned_mem(void)
1161+
{
1162+
return !QLIST_EMPTY(&hwpoison_page_list);
1163+
}
1164+
11551165
static uint32_t adjust_ioeventfd_endianness(uint32_t val, uint32_t size)
11561166
{
11571167
#if HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN

accel/stubs/kvm-stub.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,3 +124,8 @@ uint32_t kvm_dirty_ring_size(void)
124124
{
125125
return 0;
126126
}
127+
128+
bool kvm_hwpoisoned_mem(void)
129+
{
130+
return false;
131+
}

include/sysemu/kvm.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -538,4 +538,10 @@ bool kvm_arch_cpu_check_are_resettable(void);
538538
bool kvm_dirty_ring_enabled(void);
539539

540540
uint32_t kvm_dirty_ring_size(void);
541+
542+
/**
543+
* kvm_hwpoisoned_mem - indicate if there is any hwpoisoned page
544+
* reported for the VM.
545+
*/
546+
bool kvm_hwpoisoned_mem(void);
541547
#endif

migration/migration.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
#include "options.h"
6868
#include "sysemu/dirtylimit.h"
6969
#include "qemu/sockets.h"
70+
#include "sysemu/kvm.h"
7071

7172
static NotifierList migration_state_notifiers =
7273
NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
@@ -1906,6 +1907,12 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
19061907
return false;
19071908
}
19081909

1910+
if (kvm_hwpoisoned_mem()) {
1911+
error_setg(errp, "Can't migrate this vm with hardware poisoned memory, "
1912+
"please reboot the vm and try again");
1913+
return false;
1914+
}
1915+
19091916
if (migration_is_blocked(errp)) {
19101917
return false;
19111918
}

0 commit comments

Comments
 (0)