Skip to content

Commit 2c865a8

Browse files
committed
netfilter: nf_tables: add rule blob layout
This patch adds a blob layout per chain to represent the ruleset in the packet datapath. size (unsigned long) struct nft_rule_dp struct nft_expr ... struct nft_rule_dp struct nft_expr ... struct nft_rule_dp (is_last=1) The new structure nft_rule_dp represents the rule in a more compact way (smaller memory footprint) compared to the control-plane nft_rule structure. The ruleset blob is a read-only data structure. The first field contains the blob size, then the rules containing expressions. There is a trailing rule which is used by the tracing infrastructure which is equivalent to the NULL rule marker in the previous representation. The blob size field does not include the size of this trailing rule marker. The ruleset blob is generated from the commit path. This patch reuses the infrastructure available since 0cbc06b ("netfilter: nf_tables: remove synchronize_rcu in commit phase") to build the array of rules per chain. Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
1 parent 3b9e2ea commit 2c865a8

File tree

4 files changed

+147
-67
lines changed

4 files changed

+147
-67
lines changed

include/net/netfilter/nf_tables.h

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -974,6 +974,20 @@ static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext,
974974

975975
#define NFT_CHAIN_POLICY_UNSET U8_MAX
976976

977+
struct nft_rule_dp {
978+
u64 is_last:1,
979+
dlen:12,
980+
handle:42; /* for tracing */
981+
unsigned char data[]
982+
__attribute__((aligned(__alignof__(struct nft_expr))));
983+
};
984+
985+
struct nft_rule_blob {
986+
unsigned long size;
987+
unsigned char data[]
988+
__attribute__((aligned(__alignof__(struct nft_rule_dp))));
989+
};
990+
977991
/**
978992
* struct nft_chain - nf_tables chain
979993
*
@@ -987,8 +1001,8 @@ static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext,
9871001
* @name: name of the chain
9881002
*/
9891003
struct nft_chain {
990-
struct nft_rule *__rcu *rules_gen_0;
991-
struct nft_rule *__rcu *rules_gen_1;
1004+
struct nft_rule_blob __rcu *blob_gen_0;
1005+
struct nft_rule_blob __rcu *blob_gen_1;
9921006
struct list_head rules;
9931007
struct list_head list;
9941008
struct rhlist_head rhlhead;
@@ -1003,7 +1017,7 @@ struct nft_chain {
10031017
u8 *udata;
10041018

10051019
/* Only used during control plane commit phase: */
1006-
struct nft_rule **rules_next;
1020+
struct nft_rule_blob *blob_next;
10071021
};
10081022

10091023
int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain);
@@ -1321,7 +1335,7 @@ struct nft_traceinfo {
13211335
const struct nft_pktinfo *pkt;
13221336
const struct nft_base_chain *basechain;
13231337
const struct nft_chain *chain;
1324-
const struct nft_rule *rule;
1338+
const struct nft_rule_dp *rule;
13251339
const struct nft_verdict *verdict;
13261340
enum nft_trace_types type;
13271341
bool packet_dumped;

net/netfilter/nf_tables_api.c

Lines changed: 101 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1747,16 +1747,16 @@ static void nft_chain_stats_replace(struct nft_trans *trans)
17471747

17481748
static void nf_tables_chain_free_chain_rules(struct nft_chain *chain)
17491749
{
1750-
struct nft_rule **g0 = rcu_dereference_raw(chain->rules_gen_0);
1751-
struct nft_rule **g1 = rcu_dereference_raw(chain->rules_gen_1);
1750+
struct nft_rule_blob *g0 = rcu_dereference_raw(chain->blob_gen_0);
1751+
struct nft_rule_blob *g1 = rcu_dereference_raw(chain->blob_gen_1);
17521752

17531753
if (g0 != g1)
17541754
kvfree(g1);
17551755
kvfree(g0);
17561756

17571757
/* should be NULL either via abort or via successful commit */
1758-
WARN_ON_ONCE(chain->rules_next);
1759-
kvfree(chain->rules_next);
1758+
WARN_ON_ONCE(chain->blob_next);
1759+
kvfree(chain->blob_next);
17601760
}
17611761

17621762
void nf_tables_chain_destroy(struct nft_ctx *ctx)
@@ -2002,23 +2002,39 @@ static void nft_chain_release_hook(struct nft_chain_hook *hook)
20022002

20032003
struct nft_rules_old {
20042004
struct rcu_head h;
2005-
struct nft_rule **start;
2005+
struct nft_rule_blob *blob;
20062006
};
20072007

2008-
static struct nft_rule **nf_tables_chain_alloc_rules(const struct nft_chain *chain,
2009-
unsigned int alloc)
2008+
static void nft_last_rule(struct nft_rule_blob *blob, const void *ptr)
20102009
{
2011-
if (alloc > INT_MAX)
2010+
struct nft_rule_dp *prule;
2011+
2012+
prule = (struct nft_rule_dp *)ptr;
2013+
prule->is_last = 1;
2014+
ptr += offsetof(struct nft_rule_dp, data);
2015+
/* blob size does not include the trailer rule */
2016+
}
2017+
2018+
static struct nft_rule_blob *nf_tables_chain_alloc_rules(unsigned int size)
2019+
{
2020+
struct nft_rule_blob *blob;
2021+
2022+
/* size must include room for the last rule */
2023+
if (size < offsetof(struct nft_rule_dp, data))
2024+
return NULL;
2025+
2026+
size += sizeof(struct nft_rule_blob) + sizeof(struct nft_rules_old);
2027+
if (size > INT_MAX)
20122028
return NULL;
20132029

2014-
alloc += 1; /* NULL, ends rules */
2015-
if (sizeof(struct nft_rule *) > INT_MAX / alloc)
2030+
blob = kvmalloc(size, GFP_KERNEL);
2031+
if (!blob)
20162032
return NULL;
20172033

2018-
alloc *= sizeof(struct nft_rule *);
2019-
alloc += sizeof(struct nft_rules_old);
2034+
blob->size = 0;
2035+
nft_last_rule(blob, blob->data);
20202036

2021-
return kvmalloc(alloc, GFP_KERNEL);
2037+
return blob;
20222038
}
20232039

20242040
static void nft_basechain_hook_init(struct nf_hook_ops *ops, u8 family,
@@ -2091,9 +2107,10 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
20912107
struct nft_stats __percpu *stats;
20922108
struct net *net = ctx->net;
20932109
char name[NFT_NAME_MAXLEN];
2110+
struct nft_rule_blob *blob;
20942111
struct nft_trans *trans;
20952112
struct nft_chain *chain;
2096-
struct nft_rule **rules;
2113+
unsigned int data_size;
20972114
int err;
20982115

20992116
if (table->use == UINT_MAX)
@@ -2178,15 +2195,15 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
21782195
chain->udlen = nla_len(nla[NFTA_CHAIN_USERDATA]);
21792196
}
21802197

2181-
rules = nf_tables_chain_alloc_rules(chain, 0);
2182-
if (!rules) {
2198+
data_size = offsetof(struct nft_rule_dp, data); /* last rule */
2199+
blob = nf_tables_chain_alloc_rules(data_size);
2200+
if (!blob) {
21832201
err = -ENOMEM;
21842202
goto err_destroy_chain;
21852203
}
21862204

2187-
*rules = NULL;
2188-
rcu_assign_pointer(chain->rules_gen_0, rules);
2189-
rcu_assign_pointer(chain->rules_gen_1, rules);
2205+
RCU_INIT_POINTER(chain->blob_gen_0, blob);
2206+
RCU_INIT_POINTER(chain->blob_gen_1, blob);
21902207

21912208
err = nf_tables_register_hook(net, table, chain);
21922209
if (err < 0)
@@ -8241,32 +8258,72 @@ EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work);
82418258

82428259
static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain)
82438260
{
8261+
const struct nft_expr *expr, *last;
8262+
unsigned int size, data_size;
8263+
void *data, *data_boundary;
8264+
struct nft_rule_dp *prule;
82448265
struct nft_rule *rule;
8245-
unsigned int alloc = 0;
82468266
int i;
82478267

82488268
/* already handled or inactive chain? */
8249-
if (chain->rules_next || !nft_is_active_next(net, chain))
8269+
if (chain->blob_next || !nft_is_active_next(net, chain))
82508270
return 0;
82518271

82528272
rule = list_entry(&chain->rules, struct nft_rule, list);
82538273
i = 0;
82548274

82558275
list_for_each_entry_continue(rule, &chain->rules, list) {
8256-
if (nft_is_active_next(net, rule))
8257-
alloc++;
8276+
if (nft_is_active_next(net, rule)) {
8277+
data_size += sizeof(*prule) + rule->dlen;
8278+
if (data_size > INT_MAX)
8279+
return -ENOMEM;
8280+
}
82588281
}
8282+
data_size += offsetof(struct nft_rule_dp, data); /* last rule */
82598283

8260-
chain->rules_next = nf_tables_chain_alloc_rules(chain, alloc);
8261-
if (!chain->rules_next)
8284+
chain->blob_next = nf_tables_chain_alloc_rules(data_size);
8285+
if (!chain->blob_next)
82628286
return -ENOMEM;
82638287

8288+
data = (void *)chain->blob_next->data;
8289+
data_boundary = data + data_size;
8290+
size = 0;
8291+
82648292
list_for_each_entry_continue(rule, &chain->rules, list) {
8265-
if (nft_is_active_next(net, rule))
8266-
chain->rules_next[i++] = rule;
8293+
if (!nft_is_active_next(net, rule))
8294+
continue;
8295+
8296+
prule = (struct nft_rule_dp *)data;
8297+
data += offsetof(struct nft_rule_dp, data);
8298+
if (WARN_ON_ONCE(data > data_boundary))
8299+
return -ENOMEM;
8300+
8301+
nft_rule_for_each_expr(expr, last, rule) {
8302+
if (WARN_ON_ONCE(data + expr->ops->size > data_boundary))
8303+
return -ENOMEM;
8304+
8305+
memcpy(data + size, expr, expr->ops->size);
8306+
size += expr->ops->size;
8307+
}
8308+
if (WARN_ON_ONCE(size >= 1 << 12))
8309+
return -ENOMEM;
8310+
8311+
prule->handle = rule->handle;
8312+
prule->dlen = size;
8313+
prule->is_last = 0;
8314+
8315+
data += size;
8316+
size = 0;
8317+
chain->blob_next->size += (unsigned long)(data - (void *)prule);
82678318
}
82688319

8269-
chain->rules_next[i] = NULL;
8320+
prule = (struct nft_rule_dp *)data;
8321+
data += offsetof(struct nft_rule_dp, data);
8322+
if (WARN_ON_ONCE(data > data_boundary))
8323+
return -ENOMEM;
8324+
8325+
nft_last_rule(chain->blob_next, prule);
8326+
82708327
return 0;
82718328
}
82728329

@@ -8280,8 +8337,8 @@ static void nf_tables_commit_chain_prepare_cancel(struct net *net)
82808337

82818338
if (trans->msg_type == NFT_MSG_NEWRULE ||
82828339
trans->msg_type == NFT_MSG_DELRULE) {
8283-
kvfree(chain->rules_next);
8284-
chain->rules_next = NULL;
8340+
kvfree(chain->blob_next);
8341+
chain->blob_next = NULL;
82858342
}
82868343
}
82878344
}
@@ -8290,38 +8347,34 @@ static void __nf_tables_commit_chain_free_rules_old(struct rcu_head *h)
82908347
{
82918348
struct nft_rules_old *o = container_of(h, struct nft_rules_old, h);
82928349

8293-
kvfree(o->start);
8350+
kvfree(o->blob);
82948351
}
82958352

8296-
static void nf_tables_commit_chain_free_rules_old(struct nft_rule **rules)
8353+
static void nf_tables_commit_chain_free_rules_old(struct nft_rule_blob *blob)
82978354
{
8298-
struct nft_rule **r = rules;
82998355
struct nft_rules_old *old;
83008356

8301-
while (*r)
8302-
r++;
8303-
8304-
r++; /* rcu_head is after end marker */
8305-
old = (void *) r;
8306-
old->start = rules;
8357+
/* rcu_head is after end marker */
8358+
old = (void *)blob + sizeof(*blob) + blob->size;
8359+
old->blob = blob;
83078360

83088361
call_rcu(&old->h, __nf_tables_commit_chain_free_rules_old);
83098362
}
83108363

83118364
static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain)
83128365
{
8313-
struct nft_rule **g0, **g1;
8366+
struct nft_rule_blob *g0, *g1;
83148367
bool next_genbit;
83158368

83168369
next_genbit = nft_gencursor_next(net);
83178370

8318-
g0 = rcu_dereference_protected(chain->rules_gen_0,
8371+
g0 = rcu_dereference_protected(chain->blob_gen_0,
83198372
lockdep_commit_lock_is_held(net));
8320-
g1 = rcu_dereference_protected(chain->rules_gen_1,
8373+
g1 = rcu_dereference_protected(chain->blob_gen_1,
83218374
lockdep_commit_lock_is_held(net));
83228375

83238376
/* No changes to this chain? */
8324-
if (chain->rules_next == NULL) {
8377+
if (chain->blob_next == NULL) {
83258378
/* chain had no change in last or next generation */
83268379
if (g0 == g1)
83278380
return;
@@ -8330,22 +8383,22 @@ static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain)
83308383
* one uses same rules as current generation.
83318384
*/
83328385
if (next_genbit) {
8333-
rcu_assign_pointer(chain->rules_gen_1, g0);
8386+
rcu_assign_pointer(chain->blob_gen_1, g0);
83348387
nf_tables_commit_chain_free_rules_old(g1);
83358388
} else {
8336-
rcu_assign_pointer(chain->rules_gen_0, g1);
8389+
rcu_assign_pointer(chain->blob_gen_0, g1);
83378390
nf_tables_commit_chain_free_rules_old(g0);
83388391
}
83398392

83408393
return;
83418394
}
83428395

83438396
if (next_genbit)
8344-
rcu_assign_pointer(chain->rules_gen_1, chain->rules_next);
8397+
rcu_assign_pointer(chain->blob_gen_1, chain->blob_next);
83458398
else
8346-
rcu_assign_pointer(chain->rules_gen_0, chain->rules_next);
8399+
rcu_assign_pointer(chain->blob_gen_0, chain->blob_next);
83478400

8348-
chain->rules_next = NULL;
8401+
chain->blob_next = NULL;
83498402

83508403
if (g0 == g1)
83518404
return;

0 commit comments

Comments
 (0)