Skip to content

Commit c2afb1c

Browse files
davidhildenbrandsfrothwell
authored andcommitted
virtio-mem: kdump mode to sanitize /proc/vmcore access
Although virtio-mem currently supports reading unplugged memory in the hypervisor, this will change in the future, indicated to the device via a new feature flag. We similarly sanitized /proc/kcore access recently. [1] Let's register a vmcore callback, to allow vmcore code to check if a PFN belonging to a virtio-mem device is either currently plugged and should be dumped or is currently unplugged and should not be accessed, instead mapping the shared zeropage or returning zeroes when reading. This is important when not capturing /proc/vmcore via tools like "makedumpfile" that can identify logically unplugged virtio-mem memory via PG_offline in the memmap, but simply by e.g., copying the file. Distributions that support virtio-mem+kdump have to make sure that the virtio_mem module will be part of the kdump kernel or the kdump initrd; dracut was recently [2] extended to include virtio-mem in the generated initrd. As long as no special kdump kernels are used, this will automatically make sure that virtio-mem will be around in the kdump initrd and sanitize /proc/vmcore access -- with dracut. With this series, we'll send one virtio-mem state request for every ~2 MiB chunk of virtio-mem memory indicated in the vmcore that we intend to read/map. In the future, we might want to allow building virtio-mem for kdump mode only, even without CONFIG_MEMORY_HOTPLUG and friends: this way, we could support special stripped-down kdump kernels that have many other config options disabled; we'll tackle that once required. Further, we might want to try sensing bigger blocks (e.g., memory sections) first before falling back to device blocks on demand. Tested with Fedora rawhide, which contains a recent kexec-tools version (considering "System RAM (virtio_mem)" when creating the vmcore header) and a recent dracut version (including the virtio_mem module in the kdump initrd). [1] https://lkml.kernel.org/r/20210526093041.8800-1-david@redhat.com [2] dracutdevs/dracut#1157 Link: https://lkml.kernel.org/r/20211005121430.30136-10-david@redhat.com Signed-off-by: David Hildenbrand <david@redhat.com> Cc: Baoquan He <bhe@redhat.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Dave Young <dyoung@redhat.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jason Wang <jasowang@redhat.com> Cc: Juergen Gross <jgross@suse.com> Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Mike Rapoport <rppt@kernel.org> Cc: Oscar Salvador <osalvador@suse.de> Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com> Cc: Stefano Stabellini <sstabellini@kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
1 parent 904306c commit c2afb1c

File tree

1 file changed

+124
-12
lines changed

1 file changed

+124
-12
lines changed

drivers/virtio/virtio_mem.c

Lines changed: 124 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -223,13 +223,19 @@ struct virtio_mem {
223223
* When this lock is held the pointers can't change, ONLINE and
224224
* OFFLINE blocks can't change the state and no subblocks will get
225225
* plugged/unplugged.
226+
*
227+
* In kdump mode, used to serialize requests, last_block_addr and
228+
* last_block_plugged.
226229
*/
227230
struct mutex hotplug_mutex;
228231
bool hotplug_active;
229232

230233
/* An error occurred we cannot handle - stop processing requests. */
231234
bool broken;
232235

236+
/* Cached valued of is_kdump_kernel() when the device was probed. */
237+
bool in_kdump;
238+
233239
/* The driver is being removed. */
234240
spinlock_t removal_lock;
235241
bool removing;
@@ -243,6 +249,13 @@ struct virtio_mem {
243249
/* Memory notifier (online/offline events). */
244250
struct notifier_block memory_notifier;
245251

252+
#ifdef CONFIG_PROC_VMCORE
253+
/* vmcore callback for /proc/vmcore handling in kdump mode */
254+
struct vmcore_cb vmcore_cb;
255+
uint64_t last_block_addr;
256+
bool last_block_plugged;
257+
#endif /* CONFIG_PROC_VMCORE */
258+
246259
/* Next device in the list of virtio-mem devices. */
247260
struct list_head next;
248261
};
@@ -2293,6 +2306,12 @@ static void virtio_mem_run_wq(struct work_struct *work)
22932306
uint64_t diff;
22942307
int rc;
22952308

2309+
if (unlikely(vm->in_kdump)) {
2310+
dev_warn_once(&vm->vdev->dev,
2311+
"unexpected workqueue run in kdump kernel\n");
2312+
return;
2313+
}
2314+
22962315
hrtimer_cancel(&vm->retry_timer);
22972316

22982317
if (vm->broken)
@@ -2521,6 +2540,86 @@ static int virtio_mem_init_hotplug(struct virtio_mem *vm)
25212540
return rc;
25222541
}
25232542

2543+
#ifdef CONFIG_PROC_VMCORE
2544+
static int virtio_mem_send_state_request(struct virtio_mem *vm, uint64_t addr,
2545+
uint64_t size)
2546+
{
2547+
const uint64_t nb_vm_blocks = size / vm->device_block_size;
2548+
const struct virtio_mem_req req = {
2549+
.type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_STATE),
2550+
.u.state.addr = cpu_to_virtio64(vm->vdev, addr),
2551+
.u.state.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks),
2552+
};
2553+
int rc = -ENOMEM;
2554+
2555+
dev_dbg(&vm->vdev->dev, "requesting state: 0x%llx - 0x%llx\n", addr,
2556+
addr + size - 1);
2557+
2558+
switch (virtio_mem_send_request(vm, &req)) {
2559+
case VIRTIO_MEM_RESP_ACK:
2560+
return virtio16_to_cpu(vm->vdev, vm->resp.u.state.state);
2561+
case VIRTIO_MEM_RESP_ERROR:
2562+
rc = -EINVAL;
2563+
break;
2564+
default:
2565+
break;
2566+
}
2567+
2568+
dev_dbg(&vm->vdev->dev, "requesting state failed: %d\n", rc);
2569+
return rc;
2570+
}
2571+
2572+
static bool virtio_mem_vmcore_pfn_is_ram(struct vmcore_cb *cb,
2573+
unsigned long pfn)
2574+
{
2575+
struct virtio_mem *vm = container_of(cb, struct virtio_mem,
2576+
vmcore_cb);
2577+
uint64_t addr = PFN_PHYS(pfn);
2578+
bool is_ram;
2579+
int rc;
2580+
2581+
if (!virtio_mem_contains_range(vm, addr, PAGE_SIZE))
2582+
return true;
2583+
if (!vm->plugged_size)
2584+
return false;
2585+
2586+
/*
2587+
* We have to serialize device requests and access to the information
2588+
* about the block queried last.
2589+
*/
2590+
mutex_lock(&vm->hotplug_mutex);
2591+
2592+
addr = ALIGN_DOWN(addr, vm->device_block_size);
2593+
if (addr != vm->last_block_addr) {
2594+
rc = virtio_mem_send_state_request(vm, addr,
2595+
vm->device_block_size);
2596+
/* On any kind of error, we're going to signal !ram. */
2597+
if (rc == VIRTIO_MEM_STATE_PLUGGED)
2598+
vm->last_block_plugged = true;
2599+
else
2600+
vm->last_block_plugged = false;
2601+
vm->last_block_addr = addr;
2602+
}
2603+
2604+
is_ram = vm->last_block_plugged;
2605+
mutex_unlock(&vm->hotplug_mutex);
2606+
return is_ram;
2607+
}
2608+
#endif /* CONFIG_PROC_VMCORE */
2609+
2610+
static int virtio_mem_init_kdump(struct virtio_mem *vm)
2611+
{
2612+
#ifdef CONFIG_PROC_VMCORE
2613+
dev_info(&vm->vdev->dev, "memory hot(un)plug disabled in kdump kernel\n");
2614+
vm->vmcore_cb.pfn_is_ram = virtio_mem_vmcore_pfn_is_ram;
2615+
register_vmcore_cb(&vm->vmcore_cb);
2616+
return 0;
2617+
#else /* CONFIG_PROC_VMCORE */
2618+
dev_warn(&vm->vdev->dev, "disabled in kdump kernel without vmcore\n");
2619+
return -EBUSY;
2620+
#endif /* CONFIG_PROC_VMCORE */
2621+
}
2622+
25242623
static int virtio_mem_init(struct virtio_mem *vm)
25252624
{
25262625
uint16_t node_id;
@@ -2530,15 +2629,6 @@ static int virtio_mem_init(struct virtio_mem *vm)
25302629
return -EINVAL;
25312630
}
25322631

2533-
/*
2534-
* We don't want to (un)plug or reuse any memory when in kdump. The
2535-
* memory is still accessible (but not mapped).
2536-
*/
2537-
if (is_kdump_kernel()) {
2538-
dev_warn(&vm->vdev->dev, "disabled in kdump kernel\n");
2539-
return -EBUSY;
2540-
}
2541-
25422632
/* Fetch all properties that can't change. */
25432633
virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size,
25442634
&vm->plugged_size);
@@ -2562,6 +2652,12 @@ static int virtio_mem_init(struct virtio_mem *vm)
25622652
if (vm->nid != NUMA_NO_NODE && IS_ENABLED(CONFIG_NUMA))
25632653
dev_info(&vm->vdev->dev, "nid: %d", vm->nid);
25642654

2655+
/*
2656+
* We don't want to (un)plug or reuse any memory when in kdump. The
2657+
* memory is still accessible (but not exposed to Linux).
2658+
*/
2659+
if (vm->in_kdump)
2660+
return virtio_mem_init_kdump(vm);
25652661
return virtio_mem_init_hotplug(vm);
25662662
}
25672663

@@ -2640,6 +2736,7 @@ static int virtio_mem_probe(struct virtio_device *vdev)
26402736
hrtimer_init(&vm->retry_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
26412737
vm->retry_timer.function = virtio_mem_timer_expired;
26422738
vm->retry_timer_ms = VIRTIO_MEM_RETRY_TIMER_MIN_MS;
2739+
vm->in_kdump = is_kdump_kernel();
26432740

26442741
/* register the virtqueue */
26452742
rc = virtio_mem_init_vq(vm);
@@ -2654,8 +2751,10 @@ static int virtio_mem_probe(struct virtio_device *vdev)
26542751
virtio_device_ready(vdev);
26552752

26562753
/* trigger a config update to start processing the requested_size */
2657-
atomic_set(&vm->config_changed, 1);
2658-
queue_work(system_freezable_wq, &vm->wq);
2754+
if (!vm->in_kdump) {
2755+
atomic_set(&vm->config_changed, 1);
2756+
queue_work(system_freezable_wq, &vm->wq);
2757+
}
26592758

26602759
return 0;
26612760
out_del_vq:
@@ -2732,11 +2831,21 @@ static void virtio_mem_deinit_hotplug(struct virtio_mem *vm)
27322831
}
27332832
}
27342833

2834+
static void virtio_mem_deinit_kdump(struct virtio_mem *vm)
2835+
{
2836+
#ifdef CONFIG_PROC_VMCORE
2837+
unregister_vmcore_cb(&vm->vmcore_cb);
2838+
#endif /* CONFIG_PROC_VMCORE */
2839+
}
2840+
27352841
static void virtio_mem_remove(struct virtio_device *vdev)
27362842
{
27372843
struct virtio_mem *vm = vdev->priv;
27382844

2739-
virtio_mem_deinit_hotplug(vm);
2845+
if (vm->in_kdump)
2846+
virtio_mem_deinit_kdump(vm);
2847+
else
2848+
virtio_mem_deinit_hotplug(vm);
27402849

27412850
/* reset the device and cleanup the queues */
27422851
vdev->config->reset(vdev);
@@ -2750,6 +2859,9 @@ static void virtio_mem_config_changed(struct virtio_device *vdev)
27502859
{
27512860
struct virtio_mem *vm = vdev->priv;
27522861

2862+
if (unlikely(vm->in_kdump))
2863+
return;
2864+
27532865
atomic_set(&vm->config_changed, 1);
27542866
virtio_mem_retry(vm);
27552867
}

0 commit comments

Comments
 (0)