Skip to content

Commit

Permalink
fix: kubespan MSS clamping
Browse files Browse the repository at this point in the history
Change TCP maximum segment size if it goes through the KubeSpan to match
KubeSpan MTU.

Signed-off-by: Andrey Smirnov <andrey.smirnov@talos-systems.com>
  • Loading branch information
sergelogvinov authored and smira committed Jan 17, 2023
1 parent 683b4cc commit 70d9428
Show file tree
Hide file tree
Showing 8 changed files with 229 additions and 24 deletions.
9 changes: 3 additions & 6 deletions internal/app/machined/pkg/controllers/kubespan/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,12 +230,12 @@ func (ctrl *ManagerController) Run(ctx context.Context, r controller.Runtime, lo
}
}

cfgSpec := cfg.(*kubespan.Config).TypedSpec()

if nfTablesMgr == nil {
nfTablesMgr = ctrl.NfTablesManagerFactory(constants.KubeSpanDefaultFirewallMark, constants.KubeSpanDefaultForceFirewallMark, constants.KubeSpanDefaultFirewallMask)
}

cfgSpec := cfg.(*kubespan.Config).TypedSpec()

localIdentity, err := r.Get(ctx, resource.NewMetadata(kubespan.NamespaceName, kubespan.IdentityType, kubespan.LocalIdentity, resource.VersionUndefined))
if err != nil {
if state.IsNotFoundError(err) {
Expand Down Expand Up @@ -416,9 +416,6 @@ func (ctrl *ManagerController) Run(ctx context.Context, r controller.Runtime, lo
}

mtu := cfgSpec.MTU
if mtu == 0 {
mtu = constants.KubeSpanLinkMTU
}

for _, spec := range []network.RouteSpecSpec{
{
Expand Down Expand Up @@ -499,7 +496,7 @@ func (ctrl *ManagerController) Run(ctx context.Context, r controller.Runtime, lo
return fmt.Errorf("error modifying link spec: %w", err)
}

if err = nfTablesMgr.Update(allowedIPsSet); err != nil {
if err = nfTablesMgr.Update(allowedIPsSet, mtu); err != nil {
return fmt.Errorf("failed updating nftables: %w", err)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ type mockNftablesManager struct {
ipSet *netipx.IPSet
}

func (mock *mockNftablesManager) Update(ipSet *netipx.IPSet) error {
func (mock *mockNftablesManager) Update(ipSet *netipx.IPSet, mtu uint32) error {
mock.mu.Lock()
defer mock.mu.Unlock()

Expand Down
183 changes: 171 additions & 12 deletions internal/app/machined/pkg/controllers/kubespan/nftables.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@ import (
"github.com/google/nftables/binaryutil"
"github.com/google/nftables/expr"
"go4.org/netipx"
"golang.org/x/sys/unix"
)

// NfTablesManager manages nftables outside of controllers/resources scope.
type NfTablesManager interface {
Update(*netipx.IPSet) error
Update(ips *netipx.IPSet, mtu uint32) error
Cleanup() error
}

Expand Down Expand Up @@ -54,6 +55,7 @@ type nfTablesManager struct {
MarkMask uint32

currentSet *netipx.IPSet
currentMTU uint32

// nfTable is a handle for the KubeSpan root table
nfTable *nftables.Table
Expand All @@ -66,16 +68,17 @@ type nfTablesManager struct {
}

// Update the nftables rules based on the IPSet.
func (m *nfTablesManager) Update(desired *netipx.IPSet) error {
if m.currentSet != nil && m.currentSet.Equal(desired) {
func (m *nfTablesManager) Update(desired *netipx.IPSet, mtu uint32) error {
if m.currentSet != nil && m.currentSet.Equal(desired) && m.currentMTU == mtu {
return nil
}

if err := m.setNFTable(desired); err != nil {
if err := m.setNFTable(desired, mtu); err != nil {
return fmt.Errorf("failed to update IP sets: %w", err)
}

m.currentSet = desired
m.currentMTU = mtu

return nil
}
Expand Down Expand Up @@ -129,7 +132,7 @@ func (m *nfTablesManager) tableExists() (bool, error) {
return foundExisting, nil
}

func (m *nfTablesManager) setNFTable(ips *netipx.IPSet) error {
func (m *nfTablesManager) setNFTable(ips *netipx.IPSet, mtu uint32) error {
c := &nftables.Conn{}

// NB: sets should be flushed before new members because nftables will fail
Expand Down Expand Up @@ -175,6 +178,22 @@ func (m *nfTablesManager) setNFTable(ips *netipx.IPSet) error {
return fmt.Errorf("failed to add IPv6 set: %w", err)
}

// meta ifname "lo" accept
ruleLo := []expr.Any{
// [ meta load oifname => reg 1 ]
&expr.Meta{Key: expr.MetaKeyOIFNAME, Register: 1},
// [ cmp eq reg 1 lo ]
&expr.Cmp{
Op: expr.CmpOpEq,
Register: 1,
Data: ifname("lo"),
},
// Accept the packet to stop the ruleset processing
&expr.Verdict{
Kind: expr.VerdictAccept,
},
}

// meta mark & 0x00000060 == 0x00000020 accept
ruleExpr := []expr.Any{
// Load the firewall mark into register 1
Expand Down Expand Up @@ -219,6 +238,14 @@ func (m *nfTablesManager) setNFTable(ips *netipx.IPSet) error {
Exprs: ruleExpr,
})

// match lo interface
// accept and return without modifying the table or mark
c.AddRule(&nftables.Rule{
Table: m.nfTable,
Chain: outChain,
Exprs: ruleLo,
})

c.AddRule(&nftables.Rule{
Table: m.nfTable,
Chain: preChain,
Expand All @@ -231,12 +258,24 @@ func (m *nfTablesManager) setNFTable(ips *netipx.IPSet) error {
Exprs: matchIPv6Set(m.targetSet6, m.InternalMark, m.MarkMask),
})

c.AddRule(&nftables.Rule{
Table: m.nfTable,
Chain: outChain,
Exprs: matchIPSetMSS(m.targetSet4, mtu, nftables.TableFamilyIPv4),
})

c.AddRule(&nftables.Rule{
Table: m.nfTable,
Chain: outChain,
Exprs: matchIPv4Set(m.targetSet4, m.InternalMark, m.MarkMask),
})

c.AddRule(&nftables.Rule{
Table: m.nfTable,
Chain: outChain,
Exprs: matchIPSetMSS(m.targetSet6, mtu, nftables.TableFamilyIPv6),
})

c.AddRule(&nftables.Rule{
Table: m.nfTable,
Chain: outChain,
Expand All @@ -258,17 +297,24 @@ func matchIPv6Set(set *nftables.Set, mark, mask uint32) []expr.Any {
return matchIPSet(set, mark, mask, nftables.TableFamilyIPv6)
}

func matchIPSet(set *nftables.Set, mark, mask uint32, family nftables.TableFamily) []expr.Any {
var (
offset uint32 = 16
length uint32 = 4
)

if family == nftables.TableFamilyIPv6 {
func ipOffsetLength(family nftables.TableFamily) (offset uint32, length uint32) {
switch family { //nolint:exhaustive
case nftables.TableFamilyIPv4:
offset = 16
length = 4
case nftables.TableFamilyIPv6:
offset = 24
length = 16
default:
panic("unexpected IP family")
}

return offset, length
}

func matchIPSet(set *nftables.Set, mark, mask uint32, family nftables.TableFamily) []expr.Any {
offset, length := ipOffsetLength(family)

// ip daddr @kubespan_targets_ipv4 meta mark set meta mark & 0xffffffdf | 0x00000040 accept
return []expr.Any{
// Store protocol type to register 1
Expand Down Expand Up @@ -324,6 +370,119 @@ func matchIPSet(set *nftables.Set, mark, mask uint32, family nftables.TableFamil
}
}

func matchIPSetMSS(set *nftables.Set, mtu uint32, family nftables.TableFamily) []expr.Any {
offset, length := ipOffsetLength(family)

var mss uint16

switch family { //nolint:exhaustive
case nftables.TableFamilyIPv4:
mss = uint16(mtu) - 40 // TCP + IPv4 overhead
case nftables.TableFamilyIPv6:
mss = uint16(mtu) - 60 // TCP + IPv6 overhead
default:
panic("unexpected IP family")
}

// ip daddr @kubespan_targets_ipv4 tcp flags & (syn|rst) == syn tcp option maxseg size > $MSS tcp option maxseg size set $MSS
return []expr.Any{
// Store protocol type to register 1
&expr.Meta{
Key: expr.MetaKeyNFPROTO,
Register: 1,
},
// Match IP Family
&expr.Cmp{
Op: expr.CmpOpEq,
Register: 1,
Data: []byte{byte(family)},
},

// Store the destination IP address to register 1
&expr.Payload{
DestRegister: 1,
Base: expr.PayloadBaseNetworkHeader,
Offset: offset,
Len: length,
},
// Match from target set
&expr.Lookup{
SourceRegister: 1,
SetName: set.Name,
SetID: set.ID,
},

// Load the current packet mark into register 1
&expr.Meta{
Key: expr.MetaKeyL4PROTO,
Register: 1,
},
// Match TCP Family
&expr.Cmp{
Op: expr.CmpOpEq,
Register: 1,
Data: []byte{unix.IPPROTO_TCP},
},

// [ payload load 1b @ transport header + 13 => reg 1 ]
&expr.Payload{
DestRegister: 1,
Base: expr.PayloadBaseTransportHeader,
Offset: 13,
Len: 1,
},
// [ bitwise reg 1 = ( reg 1 & 0x00000006 ) ^ 0x00000000 ]
&expr.Bitwise{
DestRegister: 1,
SourceRegister: 1,
Len: 1,
Mask: []byte{0x02 | 0x04},
Xor: []byte{0x00},
},
// [ cmp eq reg 1 0x00000002 ]
&expr.Cmp{
Op: expr.CmpOpEq,
Register: 1,
Data: []byte{0x02},
},

// [ exthdr load tcpopt 2b @ 2 + 2 => reg 1 ]
&expr.Exthdr{
DestRegister: 1,
Type: 2,
Offset: 2,
Len: 2,
Op: expr.ExthdrOpTcpopt,
},
// [ cmp gte reg 1 MTU ]
&expr.Cmp{
Op: expr.CmpOpGt,
Register: 1,
Data: binaryutil.BigEndian.PutUint16(mss),
},
// [ immediate reg 1 MTU ]
&expr.Immediate{
Register: 1,
Data: binaryutil.BigEndian.PutUint16(mss),
},
// [ exthdr write tcpopt reg 1 => 2b @ 2 + 2 ]
&expr.Exthdr{
SourceRegister: 1,
Type: 2,
Offset: 2,
Len: 2,
Op: expr.ExthdrOpTcpopt,
},
}
}

func ifname(name string) []byte {
b := make([]byte, 16)
copy(b, []byte(name))

return b
}

func (m *nfTablesManager) setElements(ips *netipx.IPSet) (setElements4, setElements6 []nftables.SetElement) {
if ips == nil {
return nil, nil
Expand Down
10 changes: 7 additions & 3 deletions internal/app/machined/pkg/controllers/kubespan/nftables_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@ import (

func TestNfTables(t *testing.T) {
// use a different mark to avoid conflicts with running kubespan
mgr := kubespan.NewNfTablesManager(constants.KubeSpanDefaultFirewallMark+10, constants.KubeSpanDefaultForceFirewallMark<<1, constants.KubeSpanDefaultFirewallMask<<1)
mgr := kubespan.NewNfTablesManager(
constants.KubeSpanDefaultFirewallMark<<1,
constants.KubeSpanDefaultForceFirewallMark<<1,
constants.KubeSpanDefaultFirewallMask<<1,
)

// cleanup should be fine if nothing is installed
assert.NoError(t, mgr.Cleanup())
Expand All @@ -32,14 +36,14 @@ func TestNfTables(t *testing.T) {
ipSet, err := builder.IPSet()
require.NoError(t, err)

assert.NoError(t, mgr.Update(ipSet))
assert.NoError(t, mgr.Update(ipSet, constants.KubeSpanLinkMTU))

builder.AddPrefix(netip.MustParsePrefix("10.0.0.0/8"))

ipSet, err = builder.IPSet()
require.NoError(t, err)

assert.NoError(t, mgr.Update(ipSet))
assert.NoError(t, mgr.Update(ipSet, constants.KubeSpanLinkMTU))

assert.NoError(t, mgr.Cleanup())
}
7 changes: 6 additions & 1 deletion pkg/machinery/config/types/v1alpha1/v1alpha1_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -1081,7 +1081,12 @@ func (k *NetworkKubeSpan) AdvertiseKubernetesNetworks() bool {

// MTU implements the KubeSpan interface.
func (k *NetworkKubeSpan) MTU() uint32 {
return pointer.SafeDeref(k.KubeSpanMTU)
mtu := pointer.SafeDeref(k.KubeSpanMTU)
if mtu == 0 {
mtu = constants.KubeSpanLinkMTU
}

return mtu
}

// Filters implements the KubeSpan interface.
Expand Down
6 changes: 6 additions & 0 deletions pkg/machinery/config/types/v1alpha1/v1alpha1_validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,12 @@ func (c *Config) Validate(mode config.RuntimeMode, options ...config.ValidationO
warnings = append(warnings, warn...)
result = multierror.Append(result, err)
}

if c.Machine().Network().KubeSpan().Enabled() {
if c.Machine().Network().KubeSpan().MTU() < constants.KubeSpanLinkMinimumMTU {
result = multierror.Append(result, fmt.Errorf("kubespan link MTU must be at least %d", constants.KubeSpanLinkMinimumMTU))
}
}
}

if c.MachineConfig.MachineDisks != nil {
Expand Down
Loading

0 comments on commit 70d9428

Please sign in to comment.