Skip to content

Commit

Permalink
Add support for setting sysctls
Browse files Browse the repository at this point in the history
This patch will allow users to specify namespace specific "kernel parameters"
for running inside of a container.

Signed-off-by: Dan Walsh <dwalsh@redhat.com>
  • Loading branch information
rhatdan committed Apr 12, 2016
1 parent 7b5a684 commit 9caf7ae
Show file tree
Hide file tree
Showing 12 changed files with 157 additions and 0 deletions.
1 change: 1 addition & 0 deletions contrib/completion/bash/docker
Original file line number Diff line number Diff line change
Expand Up @@ -1671,6 +1671,7 @@ _docker_run() {
--shm-size
--stop-signal
--tmpfs
--sysctl
--ulimit
--user -u
--userns
Expand Down
1 change: 1 addition & 0 deletions contrib/completion/zsh/_docker
Original file line number Diff line number Diff line change
Expand Up @@ -644,6 +644,7 @@ __docker_subcommand() {
"($help)--privileged[Give extended privileges to this container]"
"($help)--read-only[Mount the container's root filesystem as read only]"
"($help)*--security-opt=[Security options]:security option: "
"($help)*--sysctl=-[sysctl options]:sysctl: "
"($help -t --tty)"{-t,--tty}"[Allocate a pseudo-tty]"
"($help -u --user)"{-u=,--user=}"[Username or UID]:user:_users"
"($help)--tmpfs[mount tmpfs]"
Expand Down
1 change: 1 addition & 0 deletions daemon/oci_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,7 @@ func (daemon *Daemon) createSpec(c *container.Container) (*libcontainerd.Spec, e
return nil, fmt.Errorf("linux runtime spec resources: %v", err)
}
s.Linux.Resources.OOMScoreAdj = &c.HostConfig.OomScoreAdj
s.Linux.Sysctl = c.HostConfig.Sysctls
if err := setDevices(&s, c); err != nil {
return nil, fmt.Errorf("linux runtime spec devices: %v", err)
}
Expand Down
1 change: 1 addition & 0 deletions docs/reference/api/docker_remote_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ This section lists each version from latest to oldest. Each listing includes a

[Docker Remote API v1.21](docker_remote_api_v1.21.md) documentation

* `POST /containers/create` and `POST /containers/(id)/start` allow you to configure kernel parameters (sysctls) for use in the container.
* `GET /volumes` lists volumes from all volume drivers.
* `POST /volumes/create` to create a volume.
* `GET /volumes/(name)` get low-level information about a volume.
Expand Down
8 changes: 8 additions & 0 deletions docs/reference/api/docker_remote_api_v1.21.md
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ Create a container
"RestartPolicy": { "Name": "", "MaximumRetryCount": 0 },
"NetworkMode": "bridge",
"Devices": [],
"Sysctls": { "net.ipv4.ip_forward": "1" },
"Ulimits": [{}],
"LogConfig": { "Type": "json-file", "Config": {} },
"SecurityOpt": [],
Expand Down Expand Up @@ -306,6 +307,10 @@ Json Parameters:
- **Devices** - A list of devices to add to the container specified as a JSON object in the
form
`{ "PathOnHost": "/dev/deviceName", "PathInContainer": "/dev/deviceName", "CgroupPermissions": "mrw"}`
- **Sysctls** - A list of kernel parameters (sysctls) to set in the container, specified as
`{ <name>: <Value> }`, for example:
`{ "net.ipv4.ip_forward": "1" }`

- **Ulimits** - A list of ulimits to set in the container, specified as
`{ "Name": <name>, "Soft": <soft limit>, "Hard": <hard limit> }`, for example:
`Ulimits: { "Name": "nofile", "Soft": 1024, "Hard": 2048 }`
Expand Down Expand Up @@ -426,6 +431,9 @@ Return low-level information on the container `id`
"Type": "json-file"
},
"SecurityOpt": null,
"Sysctls": {
"net.ipv4.ip_forward": "1"
},
"VolumesFrom": null,
"Ulimits": [{}],
"VolumeDriver": ""
Expand Down
1 change: 1 addition & 0 deletions docs/reference/commandline/create.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ Creates a new container.
--stop-signal="SIGTERM" Signal to stop a container
--shm-size=[] Size of `/dev/shm`. The format is `<number><unit>`. `number` must be greater than `0`. Unit is optional and can be `b` (bytes), `k` (kilobytes), `m` (megabytes), or `g` (gigabytes). If you omit the unit, the system uses bytes. If you omit the size entirely, the system uses `64m`.
--storage-opt=[] Set storage driver options per container
--sysctl[=*[]*]] Configure namespaced kernel parameters at runtime
-t, --tty Allocate a pseudo-TTY
-u, --user="" Username or UID
--userns="" Container user namespace
Expand Down
28 changes: 28 additions & 0 deletions docs/reference/commandline/run.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ parent = "smn_cli"
--sig-proxy=true Proxy received signals to the process
--stop-signal="SIGTERM" Signal to stop a container
--storage-opt=[] Set storage driver options per container
--sysctl[=*[]*]] Configure namespaced kernel parameters at runtime
-t, --tty Allocate a pseudo-TTY
-u, --user="" Username or UID (format: <name|uid>[:<group|gid>])
--userns="" Container user namespace
Expand Down Expand Up @@ -620,3 +621,30 @@ If you have set the `--exec-opt isolation=hyperv` option on the Docker `daemon`,
$ docker run -d --isolation default busybox top
$ docker run -d --isolation hyperv busybox top
```

### Configure namespaced kernel parameters (sysctls) at runtime

The `--sysctl` sets namespaced kernel parameters (sysctls) in the
container. For example, to turn on IP forwarding in the containers
network namespace, run this command:

$ docker run --sysctl net.ipv4.ip_forward=1 someimage


> **Note**: Not all sysctls are namespaced. docker does not support changing sysctls
> inside of a container that also modify the host system. As the kernel
> evolves we expect to see more sysctls become namespaced.
#### Currently supported sysctls

`IPC Namespace`:

kernel.msgmax, kernel.msgmnb, kernel.msgmni, kernel.sem, kernel.shmall, kernel.shmmax, kernel.shmmni, kernel.shm_rmid_forced
Sysctls beginning with fs.mqueue.*

If you use the `--ipc=host` option these sysctls will not be allowed.

`Network Namespace`:
Sysctls beginning with net.*

If you use the `--net=host` option using these sysctls will not be allowed.
32 changes: 32 additions & 0 deletions integration-cli/docker_cli_run_unix_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package main

import (
"bufio"
"encoding/json"
"fmt"
"io/ioutil"
"os"
Expand Down Expand Up @@ -747,6 +748,37 @@ func (s *DockerSuite) TestRunTmpfsMounts(c *check.C) {
}
}

func (s *DockerSuite) TestRunSysctls(c *check.C) {

testRequires(c, DaemonIsLinux)
var err error

out, _ := dockerCmd(c, "run", "--sysctl", "net.ipv4.ip_forward=1", "--name", "test", "busybox", "cat", "/proc/sys/net/ipv4/ip_forward")
c.Assert(strings.TrimSpace(out), check.Equals, "1")

out = inspectFieldJSON(c, "test", "HostConfig.Sysctls")

sysctls := make(map[string]string)
err = json.Unmarshal([]byte(out), &sysctls)
c.Assert(err, check.IsNil)
c.Assert(sysctls["net.ipv4.ip_forward"], check.Equals, "1")

out, _ = dockerCmd(c, "run", "--sysctl", "net.ipv4.ip_forward=0", "--name", "test1", "busybox", "cat", "/proc/sys/net/ipv4/ip_forward")
c.Assert(strings.TrimSpace(out), check.Equals, "0")

out = inspectFieldJSON(c, "test1", "HostConfig.Sysctls")

err = json.Unmarshal([]byte(out), &sysctls)
c.Assert(err, check.IsNil)
c.Assert(sysctls["net.ipv4.ip_forward"], check.Equals, "0")

runCmd := exec.Command(dockerBinary, "run", "--sysctl", "kernel.foobar=1", "--name", "test2", "busybox", "cat", "/proc/sys/kernel/foobar")
out, _, _ = runCommandWithOutput(runCmd)
if !strings.Contains(out, "invalid value") {
c.Fatalf("expected --sysctl to fail, got %s", out)
}
}

// TestRunSeccompProfileDenyUnshare checks that 'docker run --security-opt seccomp=/tmp/profile.json debian:jessie unshare' exits with operation not permitted.
func (s *DockerSuite) TestRunSeccompProfileDenyUnshare(c *check.C) {
testRequires(c, SameHostDaemon, seccompEnabled, NotArm, Apparmor)
Expand Down
16 changes: 16 additions & 0 deletions man/docker-create.1.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ docker-create - Create a new container
[**--storage-opt**[=*[]*]]
[**--stop-signal**[=*SIGNAL*]]
[**--shm-size**[=*[]*]]
[**--sysctl**[=*[]*]]
[**-t**|**--tty**]
[**--tmpfs**[=*[CONTAINER-DIR[:<OPTIONS>]*]]
[**-u**|**--user**[=*USER*]]
Expand Down Expand Up @@ -336,6 +337,21 @@ unit, `b` is used. Set LIMIT to `-1` to enable unlimited swap.
**--stop-signal**=*SIGTERM*
Signal to stop a container. Default is SIGTERM.

**--sysctl**=SYSCTL
Configure namespaced kernel parameters at runtime

IPC Namespace - current sysctls allowed:

kernel.msgmax, kernel.msgmnb, kernel.msgmni, kernel.sem, kernel.shmall, kernel.shmmax, kernel.shmmni, kernel.shm_rmid_forced
Sysctls beginning with fs.mqueue.*

Note: if you use --ipc=host using these sysctls will not be allowed.

Network Namespace - current sysctls allowed:
Sysctls beginning with net.*

Note: if you use --net=host using these sysctls will not be allowed.

**-t**, **--tty**=*true*|*false*
Allocate a pseudo-TTY. The default is *false*.

Expand Down
33 changes: 33 additions & 0 deletions man/docker-run.1.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ docker-run - Run a command in a new container
[**--stop-signal**[=*SIGNAL*]]
[**--shm-size**[=*[]*]]
[**--sig-proxy**[=*true*]]
[**--sysctl**[=*[]*]]
[**-t**|**--tty**]
[**--tmpfs**[=*[CONTAINER-DIR[:<OPTIONS>]*]]
[**-u**|**--user**[=*USER*]]
Expand Down Expand Up @@ -492,6 +493,21 @@ its root filesystem mounted as read only prohibiting any writes.
`number` must be greater than `0`. Unit is optional and can be `b` (bytes), `k` (kilobytes), `m`(megabytes), or `g` (gigabytes).
If you omit the unit, the system uses bytes. If you omit the size entirely, the system uses `64m`.

**--sysctl**=SYSCTL
Configure namespaced kernel parameters at runtime

IPC Namespace - current sysctls allowed:

kernel.msgmax, kernel.msgmnb, kernel.msgmni, kernel.sem, kernel.shmall, kernel.shmmax, kernel.shmmni, kernel.shm_rmid_forced
Sysctls beginning with fs.mqueue.*

If you use the `--ipc=host` option these sysctls will not be allowed.

Network Namespace - current sysctls allowed:
Sysctls beginning with net.*

If you use the `--net=host` option these sysctls will not be allowed.

**--sig-proxy**=*true*|*false*
Proxy received signals to the process (non-TTY mode only). SIGCHLD, SIGSTOP, and SIGKILL are not proxied. The default is *true*.

Expand Down Expand Up @@ -955,6 +971,23 @@ $ docker run -d --isolation default busybox top
$ docker run -d --isolation hyperv busybox top
```

## Setting Namespaced Kernel Parameters (Sysctls)

The `--sysctl` sets namespaced kernel parameters (sysctls) in the
container. For example, to turn on IP forwarding in the containers
network namespace, run this command:

$ docker run --sysctl net.ipv4.ip_forward=1 someimage

Note:

Not all sysctls are namespaced. docker does not support changing sysctls
inside of a container that also modify the host system. As the kernel
evolves we expect to see more sysctls become namespaced.

See the definition of the `--sysctl` option above for the current list of
supported sysctls.

# HISTORY
April 2014, Originally compiled by William Henry (whenry at redhat dot com)
based on docker.com source material and internal work.
Expand Down
32 changes: 32 additions & 0 deletions opts/opts.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,3 +240,35 @@ func ValidateLabel(val string) (string, error) {
}
return val, nil
}

// ValidateSysctl validates an sysctl and returns it.
func ValidateSysctl(val string) (string, error) {
validSysctlMap := map[string]bool{
"kernel.msgmax": true,
"kernel.msgmnb": true,
"kernel.msgmni": true,
"kernel.sem": true,
"kernel.shmall": true,
"kernel.shmmax": true,
"kernel.shmmni": true,
"kernel.shm_rmid_forced": true,
}
validSysctlPrefixes := []string{
"net.",
"fs.mqueue.",
}
arr := strings.Split(val, "=")
if len(arr) < 2 {
return "", fmt.Errorf("sysctl '%s' is not whitelisted", val)
}
if validSysctlMap[arr[0]] {
return val, nil
}

for _, vp := range validSysctlPrefixes {
if strings.HasPrefix(arr[0], vp) {
return val, nil
}
}
return "", fmt.Errorf("sysctl '%s' is not whitelisted", val)
}
3 changes: 3 additions & 0 deletions runconfig/opts/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
flDevices = opts.NewListOpts(ValidateDevice)

flUlimits = NewUlimitOpt(nil)
flSysctls = opts.NewMapOpts(nil, opts.ValidateSysctl)

flPublish = opts.NewListOpts(nil)
flExpose = opts.NewListOpts(nil)
Expand Down Expand Up @@ -127,6 +128,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
cmd.Var(&flSecurityOpt, []string{"-security-opt"}, "Security Options")
cmd.Var(&flStorageOpt, []string{"-storage-opt"}, "Set storage driver options per container")
cmd.Var(flUlimits, []string{"-ulimit"}, "Ulimit options")
cmd.Var(flSysctls, []string{"-sysctl"}, "Sysctl options")
cmd.Var(&flLoggingOpts, []string{"-log-opt"}, "Log driver options")

cmd.Require(flag.Min, 1)
Expand Down Expand Up @@ -430,6 +432,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
ShmSize: shmSize,
Resources: resources,
Tmpfs: tmpfs,
Sysctls: flSysctls.GetAll(),
}

// When allocating stdin in attached mode, close stdin at client disconnect
Expand Down

0 comments on commit 9caf7ae

Please sign in to comment.