Skip to content

Commit 8fc7564

Browse files
Matthew Wilcox (Oracle)torvalds
authored andcommitted
XArray: add xas_split
In order to use multi-index entries for huge pages in the page cache, we need to be able to split a multi-index entry (eg if a file is truncated in the middle of a huge page entry). This version does not support splitting more than one level of the tree at a time. This is an acceptable limitation for the page cache as we do not expect to support order-12 pages in the near future. [akpm@linux-foundation.org: export xas_split_alloc() to modules] [willy@infradead.org: fix xarray split] Link: https://lkml.kernel.org/r/20200910175450.GV6583@casper.infradead.org [willy@infradead.org: fix xarray] Link: https://lkml.kernel.org/r/20201001233943.GW20115@casper.infradead.org Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Cc: "Kirill A . Shutemov" <kirill@shutemov.name> Cc: Qian Cai <cai@lca.pw> Cc: Song Liu <songliubraving@fb.com> Link: https://lkml.kernel.org/r/20200903183029.14930-3-willy@infradead.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 57417ce commit 8fc7564

File tree

4 files changed

+225
-16
lines changed

4 files changed

+225
-16
lines changed

Documentation/core-api/xarray.rst

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -475,13 +475,15 @@ or iterations will move the index to the first index in the range.
475475
Each entry will only be returned once, no matter how many indices it
476476
occupies.
477477

478-
Using xas_next() or xas_prev() with a multi-index xa_state
479-
is not supported. Using either of these functions on a multi-index entry
480-
will reveal sibling entries; these should be skipped over by the caller.
481-
482-
Storing ``NULL`` into any index of a multi-index entry will set the entry
483-
at every index to ``NULL`` and dissolve the tie. Splitting a multi-index
484-
entry into entries occupying smaller ranges is not yet supported.
478+
Using xas_next() or xas_prev() with a multi-index xa_state is not
479+
supported. Using either of these functions on a multi-index entry will
480+
reveal sibling entries; these should be skipped over by the caller.
481+
482+
Storing ``NULL`` into any index of a multi-index entry will set the
483+
entry at every index to ``NULL`` and dissolve the tie. A multi-index
484+
entry can be split into entries occupying smaller ranges by calling
485+
xas_split_alloc() without the xa_lock held, followed by taking the lock
486+
and calling xas_split().
485487

486488
Functions and structures
487489
========================

include/linux/xarray.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1507,11 +1507,24 @@ void xas_create_range(struct xa_state *);
15071507

15081508
#ifdef CONFIG_XARRAY_MULTI
15091509
int xa_get_order(struct xarray *, unsigned long index);
1510+
void xas_split(struct xa_state *, void *entry, unsigned int order);
1511+
void xas_split_alloc(struct xa_state *, void *entry, unsigned int order, gfp_t);
15101512
#else
15111513
static inline int xa_get_order(struct xarray *xa, unsigned long index)
15121514
{
15131515
return 0;
15141516
}
1517+
1518+
static inline void xas_split(struct xa_state *xas, void *entry,
1519+
unsigned int order)
1520+
{
1521+
xas_store(xas, entry);
1522+
}
1523+
1524+
static inline void xas_split_alloc(struct xa_state *xas, void *entry,
1525+
unsigned int order, gfp_t gfp)
1526+
{
1527+
}
15151528
#endif
15161529

15171530
/**

lib/test_xarray.c

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1503,6 +1503,49 @@ static noinline void check_store_range(struct xarray *xa)
15031503
}
15041504
}
15051505

1506+
#ifdef CONFIG_XARRAY_MULTI
1507+
static void check_split_1(struct xarray *xa, unsigned long index,
1508+
unsigned int order)
1509+
{
1510+
XA_STATE(xas, xa, index);
1511+
void *entry;
1512+
unsigned int i = 0;
1513+
1514+
xa_store_order(xa, index, order, xa, GFP_KERNEL);
1515+
1516+
xas_split_alloc(&xas, xa, order, GFP_KERNEL);
1517+
xas_lock(&xas);
1518+
xas_split(&xas, xa, order);
1519+
xas_unlock(&xas);
1520+
1521+
xa_for_each(xa, index, entry) {
1522+
XA_BUG_ON(xa, entry != xa);
1523+
i++;
1524+
}
1525+
XA_BUG_ON(xa, i != 1 << order);
1526+
1527+
xa_set_mark(xa, index, XA_MARK_0);
1528+
XA_BUG_ON(xa, !xa_get_mark(xa, index, XA_MARK_0));
1529+
1530+
xa_destroy(xa);
1531+
}
1532+
1533+
static noinline void check_split(struct xarray *xa)
1534+
{
1535+
unsigned int order;
1536+
1537+
XA_BUG_ON(xa, !xa_empty(xa));
1538+
1539+
for (order = 1; order < 2 * XA_CHUNK_SHIFT; order++) {
1540+
check_split_1(xa, 0, order);
1541+
check_split_1(xa, 1UL << order, order);
1542+
check_split_1(xa, 3UL << order, order);
1543+
}
1544+
}
1545+
#else
1546+
static void check_split(struct xarray *xa) { }
1547+
#endif
1548+
15061549
static void check_align_1(struct xarray *xa, char *name)
15071550
{
15081551
int i;
@@ -1729,6 +1772,7 @@ static int xarray_checks(void)
17291772
check_store_range(&array);
17301773
check_store_iter(&array);
17311774
check_align(&xa0);
1775+
check_split(&array);
17321776

17331777
check_workingset(&array, 0);
17341778
check_workingset(&array, 64);

lib/xarray.c

Lines changed: 159 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -266,13 +266,14 @@ static void xa_node_free(struct xa_node *node)
266266
*/
267267
static void xas_destroy(struct xa_state *xas)
268268
{
269-
struct xa_node *node = xas->xa_alloc;
269+
struct xa_node *next, *node = xas->xa_alloc;
270270

271-
if (!node)
272-
return;
273-
XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
274-
kmem_cache_free(radix_tree_node_cachep, node);
275-
xas->xa_alloc = NULL;
271+
while (node) {
272+
XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
273+
next = rcu_dereference_raw(node->parent);
274+
radix_tree_node_rcu_free(&node->rcu_head);
275+
xas->xa_alloc = node = next;
276+
}
276277
}
277278

278279
/**
@@ -304,6 +305,7 @@ bool xas_nomem(struct xa_state *xas, gfp_t gfp)
304305
xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
305306
if (!xas->xa_alloc)
306307
return false;
308+
xas->xa_alloc->parent = NULL;
307309
XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
308310
xas->xa_node = XAS_RESTART;
309311
return true;
@@ -339,6 +341,7 @@ static bool __xas_nomem(struct xa_state *xas, gfp_t gfp)
339341
}
340342
if (!xas->xa_alloc)
341343
return false;
344+
xas->xa_alloc->parent = NULL;
342345
XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
343346
xas->xa_node = XAS_RESTART;
344347
return true;
@@ -403,7 +406,7 @@ static unsigned long xas_size(const struct xa_state *xas)
403406
/*
404407
* Use this to calculate the maximum index that will need to be created
405408
* in order to add the entry described by @xas. Because we cannot store a
406-
* multiple-index entry at index 0, the calculation is a little more complex
409+
* multi-index entry at index 0, the calculation is a little more complex
407410
* than you might expect.
408411
*/
409412
static unsigned long xas_max(struct xa_state *xas)
@@ -946,6 +949,153 @@ void xas_init_marks(const struct xa_state *xas)
946949
}
947950
EXPORT_SYMBOL_GPL(xas_init_marks);
948951

952+
#ifdef CONFIG_XARRAY_MULTI
953+
static unsigned int node_get_marks(struct xa_node *node, unsigned int offset)
954+
{
955+
unsigned int marks = 0;
956+
xa_mark_t mark = XA_MARK_0;
957+
958+
for (;;) {
959+
if (node_get_mark(node, offset, mark))
960+
marks |= 1 << (__force unsigned int)mark;
961+
if (mark == XA_MARK_MAX)
962+
break;
963+
mark_inc(mark);
964+
}
965+
966+
return marks;
967+
}
968+
969+
static void node_set_marks(struct xa_node *node, unsigned int offset,
970+
struct xa_node *child, unsigned int marks)
971+
{
972+
xa_mark_t mark = XA_MARK_0;
973+
974+
for (;;) {
975+
if (marks & (1 << (__force unsigned int)mark)) {
976+
node_set_mark(node, offset, mark);
977+
if (child)
978+
node_mark_all(child, mark);
979+
}
980+
if (mark == XA_MARK_MAX)
981+
break;
982+
mark_inc(mark);
983+
}
984+
}
985+
986+
/**
987+
* xas_split_alloc() - Allocate memory for splitting an entry.
988+
* @xas: XArray operation state.
989+
* @entry: New entry which will be stored in the array.
990+
* @order: New entry order.
991+
* @gfp: Memory allocation flags.
992+
*
993+
* This function should be called before calling xas_split().
994+
* If necessary, it will allocate new nodes (and fill them with @entry)
995+
* to prepare for the upcoming split of an entry of @order size into
996+
* entries of the order stored in the @xas.
997+
*
998+
* Context: May sleep if @gfp flags permit.
999+
*/
1000+
void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order,
1001+
gfp_t gfp)
1002+
{
1003+
unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1;
1004+
unsigned int mask = xas->xa_sibs;
1005+
1006+
/* XXX: no support for splitting really large entries yet */
1007+
if (WARN_ON(xas->xa_shift + 2 * XA_CHUNK_SHIFT < order))
1008+
goto nomem;
1009+
if (xas->xa_shift + XA_CHUNK_SHIFT > order)
1010+
return;
1011+
1012+
do {
1013+
unsigned int i;
1014+
void *sibling;
1015+
struct xa_node *node;
1016+
1017+
node = kmem_cache_alloc(radix_tree_node_cachep, gfp);
1018+
if (!node)
1019+
goto nomem;
1020+
node->array = xas->xa;
1021+
for (i = 0; i < XA_CHUNK_SIZE; i++) {
1022+
if ((i & mask) == 0) {
1023+
RCU_INIT_POINTER(node->slots[i], entry);
1024+
sibling = xa_mk_sibling(0);
1025+
} else {
1026+
RCU_INIT_POINTER(node->slots[i], sibling);
1027+
}
1028+
}
1029+
RCU_INIT_POINTER(node->parent, xas->xa_alloc);
1030+
xas->xa_alloc = node;
1031+
} while (sibs-- > 0);
1032+
1033+
return;
1034+
nomem:
1035+
xas_destroy(xas);
1036+
xas_set_err(xas, -ENOMEM);
1037+
}
1038+
EXPORT_SYMBOL_GPL(xas_split_alloc);
1039+
1040+
/**
1041+
* xas_split() - Split a multi-index entry into smaller entries.
1042+
* @xas: XArray operation state.
1043+
* @entry: New entry to store in the array.
1044+
* @order: New entry order.
1045+
*
1046+
* The value in the entry is copied to all the replacement entries.
1047+
*
1048+
* Context: Any context. The caller should hold the xa_lock.
1049+
*/
1050+
void xas_split(struct xa_state *xas, void *entry, unsigned int order)
1051+
{
1052+
unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1;
1053+
unsigned int offset, marks;
1054+
struct xa_node *node;
1055+
void *curr = xas_load(xas);
1056+
int values = 0;
1057+
1058+
node = xas->xa_node;
1059+
if (xas_top(node))
1060+
return;
1061+
1062+
marks = node_get_marks(node, xas->xa_offset);
1063+
1064+
offset = xas->xa_offset + sibs;
1065+
do {
1066+
if (xas->xa_shift < node->shift) {
1067+
struct xa_node *child = xas->xa_alloc;
1068+
1069+
xas->xa_alloc = rcu_dereference_raw(child->parent);
1070+
child->shift = node->shift - XA_CHUNK_SHIFT;
1071+
child->offset = offset;
1072+
child->count = XA_CHUNK_SIZE;
1073+
child->nr_values = xa_is_value(entry) ?
1074+
XA_CHUNK_SIZE : 0;
1075+
RCU_INIT_POINTER(child->parent, node);
1076+
node_set_marks(node, offset, child, marks);
1077+
rcu_assign_pointer(node->slots[offset],
1078+
xa_mk_node(child));
1079+
if (xa_is_value(curr))
1080+
values--;
1081+
} else {
1082+
unsigned int canon = offset - xas->xa_sibs;
1083+
1084+
node_set_marks(node, canon, NULL, marks);
1085+
rcu_assign_pointer(node->slots[canon], entry);
1086+
while (offset > canon)
1087+
rcu_assign_pointer(node->slots[offset--],
1088+
xa_mk_sibling(canon));
1089+
values += (xa_is_value(entry) - xa_is_value(curr)) *
1090+
(xas->xa_sibs + 1);
1091+
}
1092+
} while (offset-- > xas->xa_offset);
1093+
1094+
node->nr_values += values;
1095+
}
1096+
EXPORT_SYMBOL_GPL(xas_split);
1097+
#endif
1098+
9491099
/**
9501100
* xas_pause() - Pause a walk to drop a lock.
9511101
* @xas: XArray operation state.
@@ -1407,7 +1557,7 @@ EXPORT_SYMBOL(__xa_store);
14071557
* @gfp: Memory allocation flags.
14081558
*
14091559
* After this function returns, loads from this index will return @entry.
1410-
* Storing into an existing multislot entry updates the entry of every index.
1560+
* Storing into an existing multi-index entry updates the entry of every index.
14111561
* The marks associated with @index are unaffected unless @entry is %NULL.
14121562
*
14131563
* Context: Any context. Takes and releases the xa_lock.
@@ -1549,7 +1699,7 @@ static void xas_set_range(struct xa_state *xas, unsigned long first,
15491699
*
15501700
* After this function returns, loads from any index between @first and @last,
15511701
* inclusive will return @entry.
1552-
* Storing into an existing multislot entry updates the entry of every index.
1702+
* Storing into an existing multi-index entry updates the entry of every index.
15531703
* The marks associated with @index are unaffected unless @entry is %NULL.
15541704
*
15551705
* Context: Process context. Takes and releases the xa_lock. May sleep

0 commit comments

Comments
 (0)