Skip to content

Commit aebfce1

Browse files
ummakynesmehmetb0
authored andcommitted
netfilter: nft_set_hash: skip duplicated elements pending gc run
BugLink: https://bugs.launchpad.net/bugs/2095283 [ Upstream commit 7ffc7481153bbabf3332c6a19b289730c7e1edf5 ] rhashtable does not provide stable walk, duplicated elements are possible in case of resizing. I considered that checking for errors when calling rhashtable_walk_next() was sufficient to detect the resizing. However, rhashtable_walk_next() returns -EAGAIN only at the end of the iteration, which is too late, because a gc work containing duplicated elements could have been already scheduled for removal to the worker. Add a u32 gc worker sequence number per set, bump it on every workqueue run. Annotate gc worker sequence number on the expired element. Use it to skip those already seen in this gc workqueue run. Note that this new field is never reset in case gc transaction fails, so next gc worker run on the expired element overrides it. Wraparound of gc worker sequence number should not be an issue with stale gc worker sequence number in the element, that would just postpone the element removal in one gc run. Note that it is not possible to use flags to annotate that element is pending gc run to detect duplicates, given that gc transaction can be invalidated in case of update from the control plane, therefore, not allowing to clear such flag. On x86_64, pahole reports no changes in the size of nft_rhash_elem. Fixes: f6c383b ("netfilter: nf_tables: adapt set backend to use GC transaction API") Reported-by: Laurent Fasnacht <laurent.fasnacht@proton.ch> Tested-by: Laurent Fasnacht <laurent.fasnacht@proton.ch> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: Sasha Levin <sashal@kernel.org> Signed-off-by: Koichiro Den <koichiro.den@canonical.com>
1 parent 273fc96 commit aebfce1

File tree

1 file changed

+16
-0
lines changed

1 file changed

+16
-0
lines changed

net/netfilter/nft_set_hash.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,12 @@
2424
struct nft_rhash {
2525
struct rhashtable ht;
2626
struct delayed_work gc_work;
27+
u32 wq_gc_seq;
2728
};
2829

2930
struct nft_rhash_elem {
3031
struct rhash_head node;
32+
u32 wq_gc_seq;
3133
struct nft_set_ext ext;
3234
};
3335

@@ -337,6 +339,10 @@ static void nft_rhash_gc(struct work_struct *work)
337339
if (!gc)
338340
goto done;
339341

342+
/* Elements never collected use a zero gc worker sequence number. */
343+
if (unlikely(++priv->wq_gc_seq == 0))
344+
priv->wq_gc_seq++;
345+
340346
rhashtable_walk_enter(&priv->ht, &hti);
341347
rhashtable_walk_start(&hti);
342348

@@ -354,6 +360,14 @@ static void nft_rhash_gc(struct work_struct *work)
354360
goto try_later;
355361
}
356362

363+
/* rhashtable walk is unstable, already seen in this gc run?
364+
* Then, skip this element. In case of (unlikely) sequence
365+
* wraparound and stale element wq_gc_seq, next gc run will
366+
* just find this expired element.
367+
*/
368+
if (he->wq_gc_seq == priv->wq_gc_seq)
369+
continue;
370+
357371
if (nft_set_elem_is_dead(&he->ext))
358372
goto dead_elem;
359373

@@ -370,6 +384,8 @@ static void nft_rhash_gc(struct work_struct *work)
370384
if (!gc)
371385
goto try_later;
372386

387+
/* annotate gc sequence for this attempt. */
388+
he->wq_gc_seq = priv->wq_gc_seq;
373389
nft_trans_gc_elem_add(gc, he);
374390
}
375391

0 commit comments

Comments
 (0)