Skip to content

Commit c0547d0

Browse files
nhatsmrtakpm00
authored andcommitted
zsmalloc: consolidate zs_pool's migrate_lock and size_class's locks
Currently, zsmalloc has a hierarchy of locks, which includes a pool-level migrate_lock, and a lock for each size class. We have to obtain both locks in the hotpath in most cases anyway, except for zs_malloc. This exception will no longer exist when we introduce a LRU into the zs_pool for the new writeback functionality - we will need to obtain a pool-level lock to synchronize LRU handling even in zs_malloc. In preparation for zsmalloc writeback, consolidate these locks into a single pool-level lock, which drastically reduces the complexity of synchronization in zsmalloc. We have also benchmarked the lock consolidation to see the performance effect of this change on zram. First, we ran a synthetic FS workload on a server machine with 36 cores (same machine for all runs), using fs_mark -d ../zram1mnt -s 100000 -n 2500 -t 32 -k before and after for btrfs and ext4 on zram (FS usage is 80%). Here is the result (unit is file/second): With lock consolidation (btrfs): Average: 13520.2, Median: 13531.0, Stddev: 137.5961482019028 Without lock consolidation (btrfs): Average: 13487.2, Median: 13575.0, Stddev: 309.08283679298665 With lock consolidation (ext4): Average: 16824.4, Median: 16839.0, Stddev: 89.97388510006668 Without lock consolidation (ext4) Average: 16958.0, Median: 16986.0, Stddev: 194.7370021336469 As you can see, we observe a 0.3% regression for btrfs, and a 0.9% regression for ext4. This is a small, barely measurable difference in my opinion. For a more realistic scenario, we also tries building the kernel on zram. Here is the time it takes (in seconds): With lock consolidation (btrfs): real Average: 319.6, Median: 320.0, Stddev: 0.8944271909999159 user Average: 6894.2, Median: 6895.0, Stddev: 25.528415540334656 sys Average: 521.4, Median: 522.0, Stddev: 1.51657508881031 Without lock consolidation (btrfs): real Average: 319.8, Median: 320.0, Stddev: 0.8366600265340756 user Average: 6896.6, Median: 6899.0, Stddev: 16.04057355583023 sys Average: 520.6, Median: 521.0, Stddev: 1.140175425099138 With lock consolidation (ext4): real Average: 320.0, Median: 319.0, Stddev: 1.4142135623730951 user Average: 6896.8, Median: 6878.0, Stddev: 28.621670111997307 sys Average: 521.2, Median: 521.0, Stddev: 1.7888543819998317 Without lock consolidation (ext4) real Average: 319.6, Median: 319.0, Stddev: 0.8944271909999159 user Average: 6886.2, Median: 6887.0, Stddev: 16.93221781102523 sys Average: 520.4, Median: 520.0, Stddev: 1.140175425099138 The difference is entirely within the noise of a typical run on zram. This hardly justifies the complexity of maintaining both the pool lock and the class lock. In fact, for writeback, we would need to introduce yet another lock to prevent data races on the pool's LRU, further complicating the lock handling logic. IMHO, it is just better to collapse all of these into a single pool-level lock. Link: https://lkml.kernel.org/r/20221128191616.1261026-4-nphamcs@gmail.com Signed-off-by: Nhat Pham <nphamcs@gmail.com> Suggested-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Minchan Kim <minchan@kernel.org> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org> Cc: Dan Streetman <ddstreet@ieee.org> Cc: Nitin Gupta <ngupta@vflare.org> Cc: Seth Jennings <sjenning@redhat.com> Cc: Vitaly Wool <vitaly.wool@konsulko.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent 6a05aa3 commit c0547d0

File tree

1 file changed

+37
-50
lines changed

1 file changed

+37
-50
lines changed

mm/zsmalloc.c

Lines changed: 37 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,7 @@
3333
/*
3434
* lock ordering:
3535
* page_lock
36-
* pool->migrate_lock
37-
* class->lock
36+
* pool->lock
3837
* zspage->lock
3938
*/
4039

@@ -192,7 +191,6 @@ static const int fullness_threshold_frac = 4;
192191
static size_t huge_class_size;
193192

194193
struct size_class {
195-
spinlock_t lock;
196194
struct list_head fullness_list[NR_ZS_FULLNESS];
197195
/*
198196
* Size of objects stored in this class. Must be multiple
@@ -247,8 +245,7 @@ struct zs_pool {
247245
#ifdef CONFIG_COMPACTION
248246
struct work_struct free_work;
249247
#endif
250-
/* protect page/zspage migration */
251-
rwlock_t migrate_lock;
248+
spinlock_t lock;
252249
};
253250

254251
struct zspage {
@@ -355,7 +352,7 @@ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
355352
kmem_cache_free(pool->zspage_cachep, zspage);
356353
}
357354

358-
/* class->lock(which owns the handle) synchronizes races */
355+
/* pool->lock(which owns the handle) synchronizes races */
359356
static void record_obj(unsigned long handle, unsigned long obj)
360357
{
361358
*(unsigned long *)handle = obj;
@@ -452,7 +449,7 @@ static __maybe_unused int is_first_page(struct page *page)
452449
return PagePrivate(page);
453450
}
454451

455-
/* Protected by class->lock */
452+
/* Protected by pool->lock */
456453
static inline int get_zspage_inuse(struct zspage *zspage)
457454
{
458455
return zspage->inuse;
@@ -597,13 +594,13 @@ static int zs_stats_size_show(struct seq_file *s, void *v)
597594
if (class->index != i)
598595
continue;
599596

600-
spin_lock(&class->lock);
597+
spin_lock(&pool->lock);
601598
class_almost_full = zs_stat_get(class, CLASS_ALMOST_FULL);
602599
class_almost_empty = zs_stat_get(class, CLASS_ALMOST_EMPTY);
603600
obj_allocated = zs_stat_get(class, OBJ_ALLOCATED);
604601
obj_used = zs_stat_get(class, OBJ_USED);
605602
freeable = zs_can_compact(class);
606-
spin_unlock(&class->lock);
603+
spin_unlock(&pool->lock);
607604

608605
objs_per_zspage = class->objs_per_zspage;
609606
pages_used = obj_allocated / objs_per_zspage *
@@ -916,7 +913,7 @@ static void __free_zspage(struct zs_pool *pool, struct size_class *class,
916913

917914
get_zspage_mapping(zspage, &class_idx, &fg);
918915

919-
assert_spin_locked(&class->lock);
916+
assert_spin_locked(&pool->lock);
920917

921918
VM_BUG_ON(get_zspage_inuse(zspage));
922919
VM_BUG_ON(fg != ZS_EMPTY);
@@ -1268,19 +1265,19 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
12681265
BUG_ON(in_interrupt());
12691266

12701267
/* It guarantees it can get zspage from handle safely */
1271-
read_lock(&pool->migrate_lock);
1268+
spin_lock(&pool->lock);
12721269
obj = handle_to_obj(handle);
12731270
obj_to_location(obj, &page, &obj_idx);
12741271
zspage = get_zspage(page);
12751272

12761273
/*
1277-
* migration cannot move any zpages in this zspage. Here, class->lock
1274+
* migration cannot move any zpages in this zspage. Here, pool->lock
12781275
* is too heavy since callers would take some time until they calls
12791276
* zs_unmap_object API so delegate the locking from class to zspage
12801277
* which is smaller granularity.
12811278
*/
12821279
migrate_read_lock(zspage);
1283-
read_unlock(&pool->migrate_lock);
1280+
spin_unlock(&pool->lock);
12841281

12851282
class = zspage_class(pool, zspage);
12861283
off = (class->size * obj_idx) & ~PAGE_MASK;
@@ -1433,29 +1430,29 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
14331430
size += ZS_HANDLE_SIZE;
14341431
class = pool->size_class[get_size_class_index(size)];
14351432

1436-
/* class->lock effectively protects the zpage migration */
1437-
spin_lock(&class->lock);
1433+
/* pool->lock effectively protects the zpage migration */
1434+
spin_lock(&pool->lock);
14381435
zspage = find_get_zspage(class);
14391436
if (likely(zspage)) {
14401437
obj = obj_malloc(pool, zspage, handle);
14411438
/* Now move the zspage to another fullness group, if required */
14421439
fix_fullness_group(class, zspage);
14431440
record_obj(handle, obj);
14441441
class_stat_inc(class, OBJ_USED, 1);
1445-
spin_unlock(&class->lock);
1442+
spin_unlock(&pool->lock);
14461443

14471444
return handle;
14481445
}
14491446

1450-
spin_unlock(&class->lock);
1447+
spin_unlock(&pool->lock);
14511448

14521449
zspage = alloc_zspage(pool, class, gfp);
14531450
if (!zspage) {
14541451
cache_free_handle(pool, handle);
14551452
return (unsigned long)ERR_PTR(-ENOMEM);
14561453
}
14571454

1458-
spin_lock(&class->lock);
1455+
spin_lock(&pool->lock);
14591456
obj = obj_malloc(pool, zspage, handle);
14601457
newfg = get_fullness_group(class, zspage);
14611458
insert_zspage(class, zspage, newfg);
@@ -1468,7 +1465,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
14681465

14691466
/* We completely set up zspage so mark them as movable */
14701467
SetZsPageMovable(pool, zspage);
1471-
spin_unlock(&class->lock);
1468+
spin_unlock(&pool->lock);
14721469

14731470
return handle;
14741471
}
@@ -1512,16 +1509,14 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
15121509
return;
15131510

15141511
/*
1515-
* The pool->migrate_lock protects the race with zpage's migration
1512+
* The pool->lock protects the race with zpage's migration
15161513
* so it's safe to get the page from handle.
15171514
*/
1518-
read_lock(&pool->migrate_lock);
1515+
spin_lock(&pool->lock);
15191516
obj = handle_to_obj(handle);
15201517
obj_to_page(obj, &f_page);
15211518
zspage = get_zspage(f_page);
15221519
class = zspage_class(pool, zspage);
1523-
spin_lock(&class->lock);
1524-
read_unlock(&pool->migrate_lock);
15251520

15261521
obj_free(class->size, obj);
15271522
class_stat_dec(class, OBJ_USED, 1);
@@ -1531,7 +1526,7 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
15311526

15321527
free_zspage(pool, class, zspage);
15331528
out:
1534-
spin_unlock(&class->lock);
1529+
spin_unlock(&pool->lock);
15351530
cache_free_handle(pool, handle);
15361531
}
15371532
EXPORT_SYMBOL_GPL(zs_free);
@@ -1888,16 +1883,12 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
18881883
pool = zspage->pool;
18891884

18901885
/*
1891-
* The pool migrate_lock protects the race between zpage migration
1886+
* The pool's lock protects the race between zpage migration
18921887
* and zs_free.
18931888
*/
1894-
write_lock(&pool->migrate_lock);
1889+
spin_lock(&pool->lock);
18951890
class = zspage_class(pool, zspage);
18961891

1897-
/*
1898-
* the class lock protects zpage alloc/free in the zspage.
1899-
*/
1900-
spin_lock(&class->lock);
19011892
/* the migrate_write_lock protects zpage access via zs_map_object */
19021893
migrate_write_lock(zspage);
19031894

@@ -1927,10 +1918,9 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
19271918
replace_sub_page(class, zspage, newpage, page);
19281919
/*
19291920
* Since we complete the data copy and set up new zspage structure,
1930-
* it's okay to release migration_lock.
1921+
* it's okay to release the pool's lock.
19311922
*/
1932-
write_unlock(&pool->migrate_lock);
1933-
spin_unlock(&class->lock);
1923+
spin_unlock(&pool->lock);
19341924
dec_zspage_isolation(zspage);
19351925
migrate_write_unlock(zspage);
19361926

@@ -1985,9 +1975,9 @@ static void async_free_zspage(struct work_struct *work)
19851975
if (class->index != i)
19861976
continue;
19871977

1988-
spin_lock(&class->lock);
1978+
spin_lock(&pool->lock);
19891979
list_splice_init(&class->fullness_list[ZS_EMPTY], &free_pages);
1990-
spin_unlock(&class->lock);
1980+
spin_unlock(&pool->lock);
19911981
}
19921982

19931983
list_for_each_entry_safe(zspage, tmp, &free_pages, list) {
@@ -1997,9 +1987,9 @@ static void async_free_zspage(struct work_struct *work)
19971987
get_zspage_mapping(zspage, &class_idx, &fullness);
19981988
VM_BUG_ON(fullness != ZS_EMPTY);
19991989
class = pool->size_class[class_idx];
2000-
spin_lock(&class->lock);
1990+
spin_lock(&pool->lock);
20011991
__free_zspage(pool, class, zspage);
2002-
spin_unlock(&class->lock);
1992+
spin_unlock(&pool->lock);
20031993
}
20041994
};
20051995

@@ -2060,10 +2050,11 @@ static unsigned long __zs_compact(struct zs_pool *pool,
20602050
struct zspage *dst_zspage = NULL;
20612051
unsigned long pages_freed = 0;
20622052

2063-
/* protect the race between zpage migration and zs_free */
2064-
write_lock(&pool->migrate_lock);
2065-
/* protect zpage allocation/free */
2066-
spin_lock(&class->lock);
2053+
/*
2054+
* protect the race between zpage migration and zs_free
2055+
* as well as zpage allocation/free
2056+
*/
2057+
spin_lock(&pool->lock);
20672058
while ((src_zspage = isolate_zspage(class, true))) {
20682059
/* protect someone accessing the zspage(i.e., zs_map_object) */
20692060
migrate_write_lock(src_zspage);
@@ -2088,7 +2079,7 @@ static unsigned long __zs_compact(struct zs_pool *pool,
20882079
putback_zspage(class, dst_zspage);
20892080
migrate_write_unlock(dst_zspage);
20902081
dst_zspage = NULL;
2091-
if (rwlock_is_contended(&pool->migrate_lock))
2082+
if (spin_is_contended(&pool->lock))
20922083
break;
20932084
}
20942085

@@ -2105,20 +2096,17 @@ static unsigned long __zs_compact(struct zs_pool *pool,
21052096
pages_freed += class->pages_per_zspage;
21062097
} else
21072098
migrate_write_unlock(src_zspage);
2108-
spin_unlock(&class->lock);
2109-
write_unlock(&pool->migrate_lock);
2099+
spin_unlock(&pool->lock);
21102100
cond_resched();
2111-
write_lock(&pool->migrate_lock);
2112-
spin_lock(&class->lock);
2101+
spin_lock(&pool->lock);
21132102
}
21142103

21152104
if (src_zspage) {
21162105
putback_zspage(class, src_zspage);
21172106
migrate_write_unlock(src_zspage);
21182107
}
21192108

2120-
spin_unlock(&class->lock);
2121-
write_unlock(&pool->migrate_lock);
2109+
spin_unlock(&pool->lock);
21222110

21232111
return pages_freed;
21242112
}
@@ -2221,7 +2209,7 @@ struct zs_pool *zs_create_pool(const char *name)
22212209
return NULL;
22222210

22232211
init_deferred_free(pool);
2224-
rwlock_init(&pool->migrate_lock);
2212+
spin_lock_init(&pool->lock);
22252213

22262214
pool->name = kstrdup(name, GFP_KERNEL);
22272215
if (!pool->name)
@@ -2292,7 +2280,6 @@ struct zs_pool *zs_create_pool(const char *name)
22922280
class->index = i;
22932281
class->pages_per_zspage = pages_per_zspage;
22942282
class->objs_per_zspage = objs_per_zspage;
2295-
spin_lock_init(&class->lock);
22962283
pool->size_class[i] = class;
22972284
for (fullness = ZS_EMPTY; fullness < NR_ZS_FULLNESS;
22982285
fullness++)

0 commit comments

Comments
 (0)