Skip to content

Commit

Permalink
Merge branch 'net-fib_rules-add-dscp-mask-support'
Browse files Browse the repository at this point in the history
Ido Schimmel says:

====================
net: fib_rules: Add DSCP mask support

In some deployments users would like to encode path information into
certain bits of the IPv6 flow label, the UDP source port and the DSCP
field and use this information to route packets accordingly.

Redirecting traffic to a routing table based on specific bits in the
DSCP field is not currently possible. Only exact match is currently
supported by FIB rules.

This patchset extends FIB rules to match on the DSCP field with an
optional mask.

Patches kernel-patches#1-kernel-patches#5 gradually extend FIB rules to match on the DSCP field with
an optional mask.

Patch kernel-patches#6 adds test cases for the new functionality.

iproute2 support can be found here [1].

[1] https://github.com/idosch/iproute2/tree/submit/fib_rule_mask_v1
====================

Link: https://patch.msgid.link/20250220080525.831924-1-idosch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
kuba-moo committed Feb 22, 2025
2 parents e877009 + e818d1d commit 27422c3
Show file tree
Hide file tree
Showing 6 changed files with 132 additions and 5 deletions.
5 changes: 5 additions & 0 deletions Documentation/netlink/specs/rt_rule.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,10 @@ attribute-sets:
name: dport-mask
type: u16
display-hint: hex
-
name: dscp-mask
type: u8
display-hint: hex

operations:
enum-model: directional
Expand Down Expand Up @@ -225,6 +229,7 @@ operations:
- flowlabel-mask
- sport-mask
- dport-mask
- dscp-mask
-
name: newrule-ntf
doc: Notify a rule creation
Expand Down
1 change: 1 addition & 0 deletions include/uapi/linux/fib_rules.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ enum {
FRA_FLOWLABEL_MASK, /* flowlabel mask */
FRA_SPORT_MASK, /* sport mask */
FRA_DPORT_MASK, /* dport mask */
FRA_DSCP_MASK, /* dscp mask */
__FRA_MAX
};

Expand Down
1 change: 1 addition & 0 deletions net/core/fib_rules.c
Original file line number Diff line number Diff line change
Expand Up @@ -845,6 +845,7 @@ static const struct nla_policy fib_rule_policy[FRA_MAX + 1] = {
[FRA_FLOWLABEL_MASK] = { .type = NLA_BE32 },
[FRA_SPORT_MASK] = { .type = NLA_U16 },
[FRA_DPORT_MASK] = { .type = NLA_U16 },
[FRA_DSCP_MASK] = NLA_POLICY_MASK(NLA_U8, INET_DSCP_MASK >> 2),
};

int fib_newrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh,
Expand Down
47 changes: 44 additions & 3 deletions net/ipv4/fib_rules.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ struct fib4_rule {
u8 dst_len;
u8 src_len;
dscp_t dscp;
dscp_t dscp_mask;
u8 dscp_full:1; /* DSCP or TOS selector */
__be32 src;
__be32 srcmask;
Expand Down Expand Up @@ -192,7 +193,8 @@ INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule,
* to mask the upper three DSCP bits prior to matching to maintain
* legacy behavior.
*/
if (r->dscp_full && r->dscp != inet_dsfield_to_dscp(fl4->flowi4_tos))
if (r->dscp_full &&
(r->dscp ^ inet_dsfield_to_dscp(fl4->flowi4_tos)) & r->dscp_mask)
return 0;
else if (!r->dscp_full && r->dscp &&
!fib_dscp_masked_match(r->dscp, fl4))
Expand Down Expand Up @@ -235,11 +237,35 @@ static int fib4_nl2rule_dscp(const struct nlattr *nla, struct fib4_rule *rule4,
}

rule4->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2);
rule4->dscp_mask = inet_dsfield_to_dscp(INET_DSCP_MASK);
rule4->dscp_full = true;

return 0;
}

static int fib4_nl2rule_dscp_mask(const struct nlattr *nla,
struct fib4_rule *rule4,
struct netlink_ext_ack *extack)
{
dscp_t dscp_mask;

if (!rule4->dscp_full) {
NL_SET_ERR_MSG_ATTR(extack, nla,
"Cannot specify DSCP mask without DSCP value");
return -EINVAL;
}

dscp_mask = inet_dsfield_to_dscp(nla_get_u8(nla) << 2);
if (rule4->dscp & ~dscp_mask) {
NL_SET_ERR_MSG_ATTR(extack, nla, "Invalid DSCP mask");
return -EINVAL;
}

rule4->dscp_mask = dscp_mask;

return 0;
}

static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct fib_rule_hdr *frh,
struct nlattr **tb,
Expand Down Expand Up @@ -271,6 +297,10 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
fib4_nl2rule_dscp(tb[FRA_DSCP], rule4, extack) < 0)
goto errout;

if (tb[FRA_DSCP_MASK] &&
fib4_nl2rule_dscp_mask(tb[FRA_DSCP_MASK], rule4, extack) < 0)
goto errout;

/* split local/main if they are not already split */
err = fib_unmerge(net);
if (err)
Expand Down Expand Up @@ -366,6 +396,14 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
return 0;
}

if (tb[FRA_DSCP_MASK]) {
dscp_t dscp_mask;

dscp_mask = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP_MASK]) << 2);
if (!rule4->dscp_full || rule4->dscp_mask != dscp_mask)
return 0;
}

#ifdef CONFIG_IP_ROUTE_CLASSID
if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW])))
return 0;
Expand All @@ -391,7 +429,9 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
if (rule4->dscp_full) {
frh->tos = 0;
if (nla_put_u8(skb, FRA_DSCP,
inet_dscp_to_dsfield(rule4->dscp) >> 2))
inet_dscp_to_dsfield(rule4->dscp) >> 2) ||
nla_put_u8(skb, FRA_DSCP_MASK,
inet_dscp_to_dsfield(rule4->dscp_mask) >> 2))
goto nla_put_failure;
} else {
frh->tos = inet_dscp_to_dsfield(rule4->dscp);
Expand All @@ -418,7 +458,8 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
return nla_total_size(4) /* dst */
+ nla_total_size(4) /* src */
+ nla_total_size(4) /* flow */
+ nla_total_size(1); /* dscp */
+ nla_total_size(1) /* dscp */
+ nla_total_size(1); /* dscp mask */
}

static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
Expand Down
45 changes: 43 additions & 2 deletions net/ipv6/fib6_rules.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ struct fib6_rule {
__be32 flowlabel;
__be32 flowlabel_mask;
dscp_t dscp;
dscp_t dscp_mask;
u8 dscp_full:1; /* DSCP or TOS selector */
};

Expand Down Expand Up @@ -331,7 +332,7 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule,
return 0;
}

if (r->dscp && r->dscp != ip6_dscp(fl6->flowlabel))
if ((r->dscp ^ ip6_dscp(fl6->flowlabel)) & r->dscp_mask)
return 0;

if ((r->flowlabel ^ flowi6_get_flowlabel(fl6)) & r->flowlabel_mask)
Expand Down Expand Up @@ -360,11 +361,35 @@ static int fib6_nl2rule_dscp(const struct nlattr *nla, struct fib6_rule *rule6,
}

rule6->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2);
rule6->dscp_mask = inet_dsfield_to_dscp(INET_DSCP_MASK);
rule6->dscp_full = true;

return 0;
}

static int fib6_nl2rule_dscp_mask(const struct nlattr *nla,
struct fib6_rule *rule6,
struct netlink_ext_ack *extack)
{
dscp_t dscp_mask;

if (!rule6->dscp_full) {
NL_SET_ERR_MSG_ATTR(extack, nla,
"Cannot specify DSCP mask without DSCP value");
return -EINVAL;
}

dscp_mask = inet_dsfield_to_dscp(nla_get_u8(nla) << 2);
if (rule6->dscp & ~dscp_mask) {
NL_SET_ERR_MSG_ATTR(extack, nla, "Invalid DSCP mask");
return -EINVAL;
}

rule6->dscp_mask = dscp_mask;

return 0;
}

static int fib6_nl2rule_flowlabel(struct nlattr **tb, struct fib6_rule *rule6,
struct netlink_ext_ack *extack)
{
Expand Down Expand Up @@ -409,10 +434,15 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
goto errout;
}
rule6->dscp = inet_dsfield_to_dscp(frh->tos);
rule6->dscp_mask = frh->tos ? inet_dsfield_to_dscp(INET_DSCP_MASK) : 0;

if (tb[FRA_DSCP] && fib6_nl2rule_dscp(tb[FRA_DSCP], rule6, extack) < 0)
goto errout;

if (tb[FRA_DSCP_MASK] &&
fib6_nl2rule_dscp_mask(tb[FRA_DSCP_MASK], rule6, extack) < 0)
goto errout;

if ((tb[FRA_FLOWLABEL] || tb[FRA_FLOWLABEL_MASK]) &&
fib6_nl2rule_flowlabel(tb, rule6, extack) < 0)
goto errout;
Expand Down Expand Up @@ -482,6 +512,14 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
return 0;
}

if (tb[FRA_DSCP_MASK]) {
dscp_t dscp_mask;

dscp_mask = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP_MASK]) << 2);
if (!rule6->dscp_full || rule6->dscp_mask != dscp_mask)
return 0;
}

if (tb[FRA_FLOWLABEL] &&
nla_get_be32(tb[FRA_FLOWLABEL]) != rule6->flowlabel)
return 0;
Expand Down Expand Up @@ -512,7 +550,9 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
if (rule6->dscp_full) {
frh->tos = 0;
if (nla_put_u8(skb, FRA_DSCP,
inet_dscp_to_dsfield(rule6->dscp) >> 2))
inet_dscp_to_dsfield(rule6->dscp) >> 2) ||
nla_put_u8(skb, FRA_DSCP_MASK,
inet_dscp_to_dsfield(rule6->dscp_mask) >> 2))
goto nla_put_failure;
} else {
frh->tos = inet_dscp_to_dsfield(rule6->dscp);
Expand All @@ -539,6 +579,7 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
return nla_total_size(16) /* dst */
+ nla_total_size(16) /* src */
+ nla_total_size(1) /* dscp */
+ nla_total_size(1) /* dscp mask */
+ nla_total_size(4) /* flowlabel */
+ nla_total_size(4); /* flowlabel mask */
}
Expand Down
38 changes: 38 additions & 0 deletions tools/testing/selftests/net/fib_rule_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,25 @@ fib_rule6_test()
"iif dscp no redirect to table"
fi

ip rule help 2>&1 | grep -q "DSCP\[/MASK\]"
if [ $? -eq 0 ]; then
match="dscp 0x0f/0x0f"
tosmatch=$(printf 0x"%x" $((0x1f << 2)))
tosnomatch=$(printf 0x"%x" $((0x1e << 2)))
getmatch="tos $tosmatch"
getnomatch="tos $tosnomatch"
fib_rule6_test_match_n_redirect "$match" "$getmatch" \
"$getnomatch" "dscp masked redirect to table" \
"dscp masked no redirect to table"

match="dscp 0x0f/0x0f"
getmatch="from $SRC_IP6 iif $DEV tos $tosmatch"
getnomatch="from $SRC_IP6 iif $DEV tos $tosnomatch"
fib_rule6_test_match_n_redirect "$match" "$getmatch" \
"$getnomatch" "iif dscp masked redirect to table" \
"iif dscp masked no redirect to table"
fi

fib_check_iproute_support "flowlabel" "flowlabel"
if [ $? -eq 0 ]; then
match="flowlabel 0xfffff"
Expand Down Expand Up @@ -597,6 +616,25 @@ fib_rule4_test()
"$getnomatch" "iif dscp redirect to table" \
"iif dscp no redirect to table"
fi

ip rule help 2>&1 | grep -q "DSCP\[/MASK\]"
if [ $? -eq 0 ]; then
match="dscp 0x0f/0x0f"
tosmatch=$(printf 0x"%x" $((0x1f << 2)))
tosnomatch=$(printf 0x"%x" $((0x1e << 2)))
getmatch="tos $tosmatch"
getnomatch="tos $tosnomatch"
fib_rule4_test_match_n_redirect "$match" "$getmatch" \
"$getnomatch" "dscp masked redirect to table" \
"dscp masked no redirect to table"

match="dscp 0x0f/0x0f"
getmatch="from $SRC_IP iif $DEV tos $tosmatch"
getnomatch="from $SRC_IP iif $DEV tos $tosnomatch"
fib_rule4_test_match_n_redirect "$match" "$getmatch" \
"$getnomatch" "iif dscp masked redirect to table" \
"iif dscp masked no redirect to table"
fi
}

fib_rule4_vrf_test()
Expand Down

0 comments on commit 27422c3

Please sign in to comment.