Skip to content

Commit

Permalink
datapath: Create sysctl rp_filter overwrite config on agent init
Browse files Browse the repository at this point in the history
SystemD versions greater than 245 will create sysctl config which sets
the `rp_filter` value for all network interfaces to 1. This conflicts
with cilium which requires `rp_filter` to be 0 on interfaces it uses.

This commit adds a small utility/tool: `sysctlfix` which will insert
a config file into the `/etc/sysctl.d` dir with the highest priority
containing directives to disable `rp_filter` and perhaps to contain
other sysctl config in future.

This utility is called as an init container before the cilium agent
starts. Because the sysctl config is in place before the agent starts,
all interfaces created by the agent and matching the patten in the
config file will have `rp_filter` disabled, even when SystemD >=245 is
installed.

Fixes: cilium#10645
Fixes: cilium#19909
Signed-off-by: Dylan Reimerink <dylan.reimerink@isovalent.com>
  • Loading branch information
dylandreimerink authored and kkourt committed Jun 17, 2022
1 parent 6510b76 commit 6432558
Show file tree
Hide file tree
Showing 5 changed files with 132 additions and 1 deletion.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ debug: all

include Makefile.defs

SUBDIRS_CILIUM_CONTAINER := proxylib envoy bpf cilium daemon cilium-health bugtool tools/mount
SUBDIRS_CILIUM_CONTAINER := proxylib envoy bpf cilium daemon cilium-health bugtool tools/mount tools/sysctlfix
SUBDIRS := $(SUBDIRS_CILIUM_CONTAINER) operator plugins tools hubble-relay

SUBDIRS_CILIUM_CONTAINER += plugins/cilium-cni
Expand Down
44 changes: 44 additions & 0 deletions install/kubernetes/cilium/templates/cilium-agent/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ spec:
container.apparmor.security.beta.kubernetes.io/cilium-agent: "unconfined"
container.apparmor.security.beta.kubernetes.io/clean-cilium-state: "unconfined"
container.apparmor.security.beta.kubernetes.io/mount-cgroup: "unconfined"
container.apparmor.security.beta.kubernetes.io/apply-sysctl-overwrites: "unconfined"
{{- end }}
{{- with .Values.podAnnotations }}
{{- toYaml . | nindent 8 }}
Expand Down Expand Up @@ -454,6 +455,49 @@ spec:
- SYS_PTRACE
{{- end}}
{{- end }}
- name: apply-sysctl-overwrites
image: {{ include "cilium.image" .Values.image | quote }}
imagePullPolicy: {{ .Values.image.pullPolicy }}
env:
- name: BIN_PATH
value: {{ .Values.cni.binPath }}
command:
- sh
- -ec
# The statically linked Go program binary is invoked to avoid any
# dependency on utilities like sh that can be missing on certain
# distros installed on the underlying host. Copy the binary to the
# same directory where we install cilium cni plugin so that exec permissions
# are available.
- |
cp /usr/bin/cilium-sysctlfix /hostbin/cilium-sysctlfix;
nsenter --mount=/hostproc/1/ns/mnt "${BIN_PATH}/cilium-sysctlfix";
rm /hostbin/cilium-sysctlfix
volumeMounts:
- name: hostproc
mountPath: /hostproc
- name: cni-path
mountPath: /hostbin
securityContext:
{{- if .Values.securityContext.privileged }}
privileged: true
{{- else }}
seLinuxOptions:
level: 's0'
# Running with spc_t since we have removed the privileged mode.
# Users can change it to a different type as long as they have the
# type available on the system.
type: 'spc_t'
capabilities:
drop:
- ALL
add:
# Required in order to access host's /etc/sysctl.d dir
- SYS_ADMIN
# Used for nsenter
- SYS_CHROOT
- SYS_PTRACE
{{- end}}
{{- if not .Values.securityContext.privileged }}
# Mount the bpf fs if it is not mounted. We will perform this task
# from a privileged container because the mount propagation bidirectional
Expand Down
1 change: 1 addition & 0 deletions tools/sysctlfix/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
cilium-sysctlfix
27 changes: 27 additions & 0 deletions tools/sysctlfix/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Copyright Authors of Cilium
# SPDX-License-Identifier: Apache-2.0

include ../../Makefile.defs

TARGET := cilium-sysctlfix

.PHONY: all $(TARGET) $(SUBDIRS) clean install

all: $(TARGET)

$(TARGET):
@$(ECHO_GO)
$(QUIET)$(GO_BUILD) -o $@

clean:
@$(ECHO_CLEAN)
-$(QUIET)rm -f $(TARGET)
$(QUIET)$(GO_CLEAN)

install:
$(QUIET)$(INSTALL) -m 0755 -d $(DESTDIR)$(BINDIR)
$(QUIET)$(INSTALL) -m 0755 $(TARGET) $(DESTDIR)$(BINDIR)

install-binary: install

install-bash-completion:
59 changes: 59 additions & 0 deletions tools/sysctlfix/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright Authors of Cilium

package main

import (
"fmt"
"os"
"path"
"strings"
)

// This tool attempts to write a sysctl config file to the sysctl config directory with the highest precedence so
// we can overwrite any other config and ensure correct sysctl options for Cilium to function.

const (
sysctlD = "/etc/sysctl.d/"
// The 99-zzz prefix ensures our config file gets precedence over most if not all other files.
ciliumOverwrites = "99-zzz-override_cilium.conf"
)

var sysctlConfig = strings.Join([]string{
"# Disable rp_filter on Cilium interfaces since it may cause mangled packets to be dropped",
"net.ipv4.conf.lxc*.rp_filter = 0",
"net.ipv4.conf.cilium_*.rp_filter = 0",
"",
}, "\n")

// This program is executed by an init container so we purposely don't
// exit with any error codes. In case of errors, the function will print warnings,
// but we don't block cilium agent pod from running.
func main() {
info, err := os.Stat(sysctlD)
if err != nil {
fmt.Printf("can't stat sysctl.d dir '%s': %s", sysctlD, err)
return
}

if !info.IsDir() {
fmt.Printf("'%s' is not a directory", sysctlD)
return
}

overwritesPath := path.Join(sysctlD, ciliumOverwrites)
f, err := os.OpenFile(overwritesPath, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0644)
if err != nil {
fmt.Printf("unable to create cilium sysctl overwrites config: %s", err)
return
}
defer f.Close()

_, err = fmt.Fprint(f, sysctlConfig)
if err != nil {
fmt.Printf("error while writing to sysctl config: %s", err)
return
}

fmt.Println("sysctl config written")
}

0 comments on commit 6432558

Please sign in to comment.