Skip to content

Commit ac24307

Browse files
minchankhnaz
authored andcommitted
zram: fix lockdep warning of free block handling
Patch series "zram idle page writeback", v3. Inherently, swap device has many idle pages which are rare touched since it was allocated. It is never problem if we use storage device as swap. However, it's just waste for zram-swap. This patchset supports zram idle page writeback feature. * Admin can define what is idle page "no access since X time ago" * Admin can define when zram should writeback them * Admin can define when zram should stop writeback to prevent wearout Details are in each patch's description. This patch (of 7): [ 254.519728] ================================ [ 254.520311] WARNING: inconsistent lock state [ 254.520898] 4.19.0+ torvalds#390 Not tainted [ 254.521387] -------------------------------- [ 254.521732] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. [ 254.521732] zram_verify/2095 [HC0[0]:SC1[1]:HE1:SE0] takes: [ 254.521732] 00000000b1828693 (&(&zram->bitmap_lock)->rlock){+.?.}, at: put_entry_bdev+0x1e/0x50 [ 254.521732] {SOFTIRQ-ON-W} state was registered at: [ 254.521732] _raw_spin_lock+0x2c/0x40 [ 254.521732] zram_make_request+0x755/0xdc9 [ 254.521732] generic_make_request+0x373/0x6a0 [ 254.521732] submit_bio+0x6c/0x140 [ 254.521732] __swap_writepage+0x3a8/0x480 [ 254.521732] shrink_page_list+0x1102/0x1a60 [ 254.521732] shrink_inactive_list+0x21b/0x3f0 [ 254.521732] shrink_node_memcg.constprop.99+0x4f8/0x7e0 [ 254.521732] shrink_node+0x7d/0x2f0 [ 254.521732] do_try_to_free_pages+0xe0/0x300 [ 254.521732] try_to_free_pages+0x116/0x2b0 [ 254.521732] __alloc_pages_slowpath+0x3f4/0xf80 [ 254.521732] __alloc_pages_nodemask+0x2a2/0x2f0 [ 254.521732] __handle_mm_fault+0x42e/0xb50 [ 254.521732] handle_mm_fault+0x55/0xb0 [ 254.521732] __do_page_fault+0x235/0x4b0 [ 254.521732] page_fault+0x1e/0x30 [ 254.521732] irq event stamp: 228412 [ 254.521732] hardirqs last enabled at (228412): [<ffffffff98245846>] __slab_free+0x3e6/0x600 [ 254.521732] hardirqs last disabled at (228411): [<ffffffff98245625>] __slab_free+0x1c5/0x600 [ 254.521732] softirqs last enabled at (228396): [<ffffffff98e0031e>] __do_softirq+0x31e/0x427 [ 254.521732] softirqs last disabled at (228403): [<ffffffff98072051>] irq_exit+0xd1/0xe0 [ 254.521732] [ 254.521732] other info that might help us debug this: [ 254.521732] Possible unsafe locking scenario: [ 254.521732] [ 254.521732] CPU0 [ 254.521732] ---- [ 254.521732] lock(&(&zram->bitmap_lock)->rlock); [ 254.521732] <Interrupt> [ 254.521732] lock(&(&zram->bitmap_lock)->rlock); [ 254.521732] [ 254.521732] *** DEADLOCK *** [ 254.521732] [ 254.521732] no locks held by zram_verify/2095. [ 254.521732] [ 254.521732] stack backtrace: [ 254.521732] CPU: 5 PID: 2095 Comm: zram_verify Not tainted 4.19.0+ torvalds#390 [ 254.521732] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 [ 254.521732] Call Trace: [ 254.521732] <IRQ> [ 254.521732] dump_stack+0x67/0x9b [ 254.521732] print_usage_bug+0x1bd/0x1d3 [ 254.521732] mark_lock+0x4aa/0x540 [ 254.521732] ? check_usage_backwards+0x160/0x160 [ 254.521732] __lock_acquire+0x51d/0x1300 [ 254.521732] ? free_debug_processing+0x24e/0x400 [ 254.521732] ? bio_endio+0x6d/0x1a0 [ 254.521732] ? lockdep_hardirqs_on+0x9b/0x180 [ 254.521732] ? lock_acquire+0x90/0x180 [ 254.521732] lock_acquire+0x90/0x180 [ 254.521732] ? put_entry_bdev+0x1e/0x50 [ 254.521732] _raw_spin_lock+0x2c/0x40 [ 254.521732] ? put_entry_bdev+0x1e/0x50 [ 254.521732] put_entry_bdev+0x1e/0x50 [ 254.521732] zram_free_page+0xf6/0x110 [ 254.521732] zram_slot_free_notify+0x42/0xa0 [ 254.521732] end_swap_bio_read+0x5b/0x170 [ 254.521732] blk_update_request+0x8f/0x340 [ 254.521732] scsi_end_request+0x2c/0x1e0 [ 254.521732] scsi_io_completion+0x98/0x650 [ 254.521732] blk_done_softirq+0x9e/0xd0 [ 254.521732] __do_softirq+0xcc/0x427 [ 254.521732] irq_exit+0xd1/0xe0 [ 254.521732] do_IRQ+0x93/0x120 [ 254.521732] common_interrupt+0xf/0xf [ 254.521732] </IRQ> With writeback feature, zram_slot_free_notify could be called in softirq context by end_swap_bio_read. However, bitmap_lock is not aware of that so lockdep yell out. Thanks. get_entry_bdev spin_lock(bitmap->lock); irq softirq end_swap_bio_read zram_slot_free_notify zram_slot_lock <-- deadlock prone zram_free_page put_entry_bdev spin_lock(bitmap->lock); <-- deadlock prone With akpm's suggestion (i.e. bitmap operation is already atomic), we could remove bitmap lock. It might fail to find a empty slot if serious contention happens. However, it's not severe problem because huge page writeback has already possiblity to fail if there is severe memory pressure. Worst case is just keeping the incompressible in memory, not storage. The other problem is zram_slot_lock in zram_slot_slot_free_notify. To make it safe is this patch introduces zram_slot_trylock where zram_slot_free_notify uses it. Although it's rare to be contented, this patch adds new debug stat "miss_free" to keep monitoring how often it happens. Link: http://lkml.kernel.org/r/20181127055429.251614-2-minchan@kernel.org Signed-off-by: Minchan Kim <minchan@kernel.org> Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> Reviewed-by: Joey Pabalinas <joeypabalinas@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent 9b24d68 commit ac24307

File tree

2 files changed

+22
-18
lines changed

2 files changed

+22
-18
lines changed

drivers/block/zram/zram_drv.c

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,11 @@ static size_t huge_class_size;
5353

5454
static void zram_free_page(struct zram *zram, size_t index);
5555

56+
static int zram_slot_trylock(struct zram *zram, u32 index)
57+
{
58+
return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].value);
59+
}
60+
5661
static void zram_slot_lock(struct zram *zram, u32 index)
5762
{
5863
bit_spin_lock(ZRAM_LOCK, &zram->table[index].value);
@@ -399,7 +404,6 @@ static ssize_t backing_dev_store(struct device *dev,
399404
goto out;
400405

401406
reset_bdev(zram);
402-
spin_lock_init(&zram->bitmap_lock);
403407

404408
zram->old_block_size = old_block_size;
405409
zram->bdev = bdev;
@@ -443,29 +447,24 @@ static ssize_t backing_dev_store(struct device *dev,
443447

444448
static unsigned long get_entry_bdev(struct zram *zram)
445449
{
446-
unsigned long entry;
447-
448-
spin_lock(&zram->bitmap_lock);
450+
unsigned long blk_idx = 1;
451+
retry:
449452
/* skip 0 bit to confuse zram.handle = 0 */
450-
entry = find_next_zero_bit(zram->bitmap, zram->nr_pages, 1);
451-
if (entry == zram->nr_pages) {
452-
spin_unlock(&zram->bitmap_lock);
453+
blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx);
454+
if (blk_idx == zram->nr_pages)
453455
return 0;
454-
}
455456

456-
set_bit(entry, zram->bitmap);
457-
spin_unlock(&zram->bitmap_lock);
457+
if (test_and_set_bit(blk_idx, zram->bitmap))
458+
goto retry;
458459

459-
return entry;
460+
return blk_idx;
460461
}
461462

462463
static void put_entry_bdev(struct zram *zram, unsigned long entry)
463464
{
464465
int was_set;
465466

466-
spin_lock(&zram->bitmap_lock);
467467
was_set = test_and_clear_bit(entry, zram->bitmap);
468-
spin_unlock(&zram->bitmap_lock);
469468
WARN_ON_ONCE(!was_set);
470469
}
471470

@@ -886,9 +885,10 @@ static ssize_t debug_stat_show(struct device *dev,
886885

887886
down_read(&zram->init_lock);
888887
ret = scnprintf(buf, PAGE_SIZE,
889-
"version: %d\n%8llu\n",
888+
"version: %d\n%8llu %8llu\n",
890889
version,
891-
(u64)atomic64_read(&zram->stats.writestall));
890+
(u64)atomic64_read(&zram->stats.writestall),
891+
(u64)atomic64_read(&zram->stats.miss_free));
892892
up_read(&zram->init_lock);
893893

894894
return ret;
@@ -1400,10 +1400,14 @@ static void zram_slot_free_notify(struct block_device *bdev,
14001400

14011401
zram = bdev->bd_disk->private_data;
14021402

1403-
zram_slot_lock(zram, index);
1403+
atomic64_inc(&zram->stats.notify_free);
1404+
if (!zram_slot_trylock(zram, index)) {
1405+
atomic64_inc(&zram->stats.miss_free);
1406+
return;
1407+
}
1408+
14041409
zram_free_page(zram, index);
14051410
zram_slot_unlock(zram, index);
1406-
atomic64_inc(&zram->stats.notify_free);
14071411
}
14081412

14091413
static int zram_rw_page(struct block_device *bdev, sector_t sector,

drivers/block/zram/zram_drv.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ struct zram_stats {
7979
atomic64_t pages_stored; /* no. of pages currently stored */
8080
atomic_long_t max_used_pages; /* no. of maximum pages stored */
8181
atomic64_t writestall; /* no. of write slow paths */
82+
atomic64_t miss_free; /* no. of missed free */
8283
};
8384

8485
struct zram {
@@ -110,7 +111,6 @@ struct zram {
110111
unsigned int old_block_size;
111112
unsigned long *bitmap;
112113
unsigned long nr_pages;
113-
spinlock_t bitmap_lock;
114114
#endif
115115
#ifdef CONFIG_ZRAM_MEMORY_TRACKING
116116
struct dentry *debugfs_dir;

0 commit comments

Comments
 (0)